<a href="https://colab.research.google.com/github/ijazahmad-star/Colab-Workspace/blob/main/oct%2027%202025/customize_gpt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain.tools import tool
from langgraph.graph import StateGraph, MessagesState
from langgraph.prebuilt import ToolNode
import os
from pathlib import Path
import bs4

In [None]:
from langchain_core.messages import SystemMessage

In [None]:
from dotenv import load_dotenv

load_dotenv()

True

In [None]:
# 1. Load PDFs from directory
def load_pdfs_from_directory(directory_path):
    docs = []
    for pdf_file in Path(directory_path).glob("*.pdf"):
        loader = PyPDFLoader(str(pdf_file))
        docs.extend(loader.load())
    return docs

# 2. Load data from websites
def load_from_websites(urls):
    docs = []
    for url in urls:
        loader = WebBaseLoader(url)
        docs.extend(loader.load())
    return docs

In [None]:
# 3. Combine and split documents
pdf_docs = load_pdfs_from_directory("/data")
web_urls = ["https://strategisthub.com/services/", "https://strategisthub.com/about/", "https://strategisthub.com/case-studies/", "https://strategisthub.com/blogs/"]
web_docs = load_from_websites(web_urls)

all_docs = pdf_docs + web_docs

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, chunk_overlap=50
)
doc_splits = text_splitter.split_documents(all_docs)

In [None]:
# pdf_docs

In [None]:
# doc_splits

In [None]:
# 4. Create vector store
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_documents(doc_splits, embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 2})

In [None]:
# 5. Create retriever tool
@tool(response_format="content_and_artifact")
def retrieve_documents(query: str):
    """Retrieve relevant documents."""
    docs = retriever.invoke(query)
    serialized = "\\n\\n".join(
        f"Source: {doc.metadata}\\nContent: {doc.page_content}"
        for doc in docs
    )
    return serialized, docs

tools = [retrieve_documents]
tool_node = ToolNode(tools)

In [None]:
# 6. Enhanced system prompt for email responses
EMAIL_SYSTEM_PROMPT = """
You are a professional email assistant for our company's sales team. Your role is to respond to customer inquiries using ONLY information from our knowledge base.

CRITICAL RULES:
1. You MUST respond in proper business email format with subject line, salutation, body, and signature
2. If the customer's question can be answered using the provided context, write a helpful, professional email response
3. If the information is NOT in the knowledge base (context shows "NO_RELEVANT_INFORMATION_FOUND"), respond with a polite email explaining this
4. Never invent information or use external knowledge
5. Maintain a professional, helpful tone in all communications
6. Format your response as a ready-to-send email
7. Always start the subject with "Re: " followed by the original subject or an appropriate title

EMAIL FORMAT:
Subject: Re: [Original Subject or Appropriate Title]

Dear [Customer Name],

[Professional email body acknowledging their query and providing information or explaining limitations]

[Clear next steps or contact information if needed]

Best regards,
[Sale Team]
[Strategisthub]
"""

In [None]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.store.memory import InMemoryStore

In [None]:
# 6. Build LangGraph nodes
model = ChatOpenAI(model="gpt-4o-mini", temperature=0).bind_tools(tools)

def call_model(state: MessagesState):
    response = model.invoke(
        [
            SystemMessage(
                        content=EMAIL_SYSTEM_PROMPT
                    )
        ]
        + state["messages"]
        )
    return {"messages": [response]}

def should_continue(state: MessagesState):
    last_message = state["messages"][-1]
    if last_message.tool_calls:
        return "tools"
    return "__end__"

# 7. Build graph
workflow = StateGraph(MessagesState)
workflow.add_node("agent", call_model)
workflow.add_node("tools", tool_node)
workflow.add_edge("__start__", "agent")
workflow.add_conditional_edges("agent", should_continue)
workflow.add_edge("tools", "agent")

# app = workflow.compile()
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [None]:
user_input = f"""
from: john@exmaple.com

subject: need info about the Modernizing Global

Hi Sale Tean, i am john, i will be thankful if you can share some insights on the following topic:

'Modernizing Global Real Estate Experiences'

i will be waiting for your response

regards
john
"""

In [None]:
# memory = MemorySaver()
# app = workflow.compile(checkpointer=memory)

# Use with thread
config = {"configurable": {"thread_id": "1"}}
response = app.invoke(
    {"messages": [{"role": "user", "content": user_input}]},
    config
)
print(response["messages"][-1].content)

Subject: Re: need info about the Modernizing Global

Dear John,

Thank you for reaching out with your inquiry about "Modernizing Global Real Estate Experiences." 

Keller Williams stands out as a leader in the global real estate market, leveraging advanced technology to enhance real estate experiences. Their innovative product, Command, effectively manages financial transactions on an international scale, overseeing a transaction volume of $4.5 billion across 35 countries. 

Key features of their approach include:
- Development of the Front Door app, which focuses on front-end user experience.
- Comprehensive reporting dashboards, invoicing, and payment modules.
- Multilingual functionality that supports multiple currencies, making it accessible for a diverse clientele.

If you have any further questions or need additional insights, please feel free to ask.

Best regards,  
Sales Team  
Strategisthub


In [None]:
config = {"configurable": {"thread_id": "1"}}
response = app.invoke(
    {"messages": [{"role": "user", "content": "provide 2 line summary of user previous query"}]},
    config
)
print(response["messages"][-1].content)

John is seeking insights on the topic of "Modernizing Global Real Estate Experiences." He is looking for information related to advancements and technologies in the global real estate sector.


In [None]:
config = {"configurable": {"thread_id": "1"}}
response = app.invoke(
    {"messages": [{"role": "user", "content": "what is the name of the email sender"}]},
    config
)
print(response["messages"][-1].content)

The name of the email sender is John.
