In [53]:
from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain.tools import tool
from langgraph.graph import StateGraph, MessagesState
from langgraph.prebuilt import ToolNode
import os
from pathlib import Path
import bs4

In [54]:
from langchain_core.messages import SystemMessage

In [55]:
from dotenv import load_dotenv

load_dotenv()

True

In [4]:

from pathlib import Path
from langchain_community.document_loaders import PyPDFLoader

def load_pdfs_from_directory(directory_path: str):
    docs = []
    for pdf_file in Path(directory_path).rglob("*.pdf"):
        try:
            print("Reading data from: ", pdf_file)
            loader = PyPDFLoader(str(pdf_file))
            docs.extend(loader.load())
        except Exception as e:
            print(f"Error loading {pdf_file}: {e}")
    return docs

pdf_data = load_pdfs_from_directory("../Sales")
print(f"Loaded {len(pdf_data)} PDFs.")


Reading data from:  ../Sales/Profiles & Portfolio Documents/StrategistHub Rate Card.pdf
Reading data from:  ../Sales/Team CVs - outdated/Ali-Ahmad.pdf
Reading data from:  ../Sales/Team CVs - outdated/Arslan-Asghar.pdf
Reading data from:  ../Sales/Team CVs - outdated/Wajeeh-Ul-Hassan.pdf
Reading data from:  ../Sales/Team CVs - outdated/Ali-Usman.pdf
Reading data from:  ../Sales/Team CVs - outdated/Wasif-Farooq.pdf
Reading data from:  ../Sales/Case Studies/Mediguardian Case Study.pdf
Reading data from:  ../Sales/Case Studies/Creator AI.pdf
Reading data from:  ../Sales/Case Studies/Keller Williams.pdf
Reading data from:  ../Sales/Case Studies/TryHackMe.pdf
Reading data from:  ../Sales/Case Studies/Aura Health.pdf
Reading data from:  ../Sales/Case Studies/Distill.pdf
Reading data from:  ../Sales/Case Studies/TheGrowHub Social Network_market place - MVP Case Study.pdf
Reading data from:  ../Sales/Case Studies/Archive/MediGuardian - MVP.pdf
Reading data from:  ../Sales/Case Studies/Archive/A

In [None]:
# import weaviate
# from weaviate.classes.init import Auth
# import os

# # Best practice: store your credentials in environment variables
# weaviate_url = os.environ["WEAVIATE_URL"]
# weaviate_api_key = os.environ["WEAVIATE_API_KEY"]

# client = weaviate.connect_to_weaviate_cloud(
#     cluster_url=weaviate_url,
#     auth_credentials=Auth.api_key(weaviate_api_key),
# )

# print(client.is_ready())  # Should print: `True`

# client.close() 

True


In [None]:
# 1. Load PDFs from directory
# def load_pdfs_from_directory(directory_path):
#     docs = []
#     for pdf_file in Path(directory_path).glob("*.pdf"):
#         loader = PyPDFLoader(str(pdf_file))
#         docs.extend(loader.load())
#     return docs

# 2. Load data from websites
def load_from_websites(urls):
    docs = []
    for url in urls:
        loader = WebBaseLoader(url)
        docs.extend(loader.load())
    return docs

In [57]:
# 3. Combine and split documents
pdf_docs = load_pdfs_from_directory("/home/hp/Desktop/Workplace/CustomizeGPT/data")
web_urls = ["https://strategisthub.com/services/", "https://strategisthub.com/about/", "https://strategisthub.com/case-studies/", "https://strategisthub.com/blogs/"]
web_docs = load_from_websites(web_urls)

all_docs = pdf_docs + web_docs

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, chunk_overlap=50
)
doc_splits = text_splitter.split_documents(all_docs)

In [58]:
# pdf_docs

In [59]:
# doc_splits

In [60]:
# 4. Create vector store
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_documents(doc_splits, embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 2})

In [61]:
# 5. Create retriever tool
@tool(response_format="content_and_artifact")
def retrieve_documents(query: str):
    """Retrieve relevant documents."""
    docs = retriever.invoke(query)
    serialized = "\\n\\n".join(
        f"Source: {doc.metadata}\\nContent: {doc.page_content}"
        for doc in docs
    )
    return serialized, docs

tools = [retrieve_documents]
tool_node = ToolNode(tools)

In [62]:
# 6. Enhanced system prompt for email responses
EMAIL_SYSTEM_PROMPT = """
You are a professional email assistant for our company's sales team. Your role is to respond to customer inquiries using ONLY information from our knowledge base.

CRITICAL RULES:
1. You MUST respond in proper business email format with subject line, salutation, body, and signature
2. If the customer's question can be answered using the provided context, write a helpful, professional email response
3. If the information is NOT in the knowledge base (context shows "NO_RELEVANT_INFORMATION_FOUND"), respond with a polite email explaining this
4. Never invent information or use external knowledge
5. Maintain a professional, helpful tone in all communications
6. Format your response as a ready-to-send email
7. Always start the subject with "Re: " followed by the original subject or an appropriate title

EMAIL FORMAT:
Subject: Re: [Original Subject or Appropriate Title]

Dear [Customer Name],

[Professional email body acknowledging their query and providing information or explaining limitations]

[Clear next steps or contact information if needed]

Best regards,
[Sale Team]
[Strategisthub]
"""

In [None]:
from langgraph.checkpoint.memory import MemorySaver

In [64]:
# 6. Build LangGraph nodes
model = ChatOpenAI(model="gpt-4o-mini", temperature=0).bind_tools(tools)

def call_model(state: MessagesState):
    response = model.invoke(
        [
            SystemMessage(
                        content=EMAIL_SYSTEM_PROMPT
                    )
        ]
        + state["messages"]
        )
    return {"messages": [response]}

def should_continue(state: MessagesState):
    last_message = state["messages"][-1]
    if last_message.tool_calls:
        return "tools"
    return "__end__"

# 7. Build graph
workflow = StateGraph(MessagesState)
workflow.add_node("agent", call_model)
workflow.add_node("tools", tool_node)
workflow.add_edge("__start__", "agent")
workflow.add_conditional_edges("agent", should_continue)
workflow.add_edge("tools", "agent")

# app = workflow.compile()
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [71]:
# from IPython.display import Image, display
# display(Image(app.get_graph().draw_mermaid_png()))

In [65]:
user_input = f"""
from: john@exmaple.com

subject: need info about the Modernizing Global

Hi Sale Tean, i am john, i will be thankful if you can share some insights on the following topic:

'Modernizing Global Real Estate Experiences'

i will be waiting for your response

regards
john
"""

In [67]:
# memory = MemorySaver()
# app = workflow.compile(checkpointer=memory)

# Use with thread
config = {"configurable": {"thread_id": "1"}}
response = app.invoke(
    {"messages": [{"role": "user", "content": user_input}]},
    config
)
print(response["messages"][-1].content)

Subject: Re: need info about the Modernizing Global

Dear John,

Thank you for reaching out with your inquiry about "Modernizing Global Real Estate Experiences." 

Keller Williams stands out as a leader in the global real estate market, leveraging advanced technology to enhance real estate experiences. Their innovative product, Command, effectively manages financial transactions on an international scale, overseeing a transaction volume of $4.5 billion across 35 countries. 

Key features of their approach include:
- Development of the Front Door app, which focuses on front-end user experience.
- Comprehensive reporting dashboards, invoicing, and payment modules.
- Multilingual functionality that supports multiple currencies, making it accessible for a diverse clientele.

If you have any further questions or need additional insights, please feel free to ask.

Best regards,  
Sales Team  
Strategisthub


In [68]:
config = {"configurable": {"thread_id": "1"}}
response = app.invoke(
    {"messages": [{"role": "user", "content": "provide 2 line summary of user previous query"}]},
    config
)
print(response["messages"][-1].content)

John is seeking insights on the topic of "Modernizing Global Real Estate Experiences." He is looking for information related to advancements and technologies in the global real estate sector.


In [69]:
config = {"configurable": {"thread_id": "1"}}
response = app.invoke(
    {"messages": [{"role": "user", "content": "what is the name of the email sender"}]},
    config
)
print(response["messages"][-1].content)

The name of the email sender is John.


In [None]:
# from langchain.messages import HumanMessage
# result = app.invoke({
#     "messages": [HumanMessage(content=user_input)]
# })
# print(result["messages"][-1].content)

Subject: Re: need info about the Modernizing Global

Dear John,

Thank you for reaching out. I appreciate your interest in the topic of "Modernizing Global Real Estate Experiences."

Keller Williams stands out as a leader in the global real estate market, leveraging advanced technology to enhance real estate experiences. Their innovative product, Command, is designed to manage financial transactions on an international scale, overseeing a transaction volume of $4.5 billion across 35 countries. 

Key features of their approach include:
- Front-end development of the Front Door app.
- Comprehensive reporting dashboard, invoicing, financial transactions, and payment modules.
- Multilingual functionality that accommodates multiple currencies.

If you have any further questions or need additional insights, please feel free to ask.

Best regards,

Sales Team  
Strategisthub


In [None]:
# 8. Run
from langchain.messages import HumanMessage
result = app.invoke({
    "messages": [HumanMessage(content="Modernizing Global Real Estate Experiences with a Next-Generation Digital CRM Platform")]
})
print(result["messages"][-1].content)

Subject: Re: Modernizing Global Real Estate Experiences with a Next-Generation Digital CRM Platform

Dear [Customer Name],

Thank you for your inquiry regarding the modernization of global real estate experiences with our next-generation digital CRM platform. 

I would like to direct you to our case studies, where you can discover how our industry-leading specialists and innovative solutions have helped businesses succeed across multiple industries. These real-world examples showcase problem-solving, optimized workflows, and scalable architectures that leverage cutting-edge technology for measurable impact and efficiency.

For more detailed insights, please visit our case studies page [here](https://strategisthub.com/case-studies/).

If you have any further questions or need additional information, please feel free to reach out.

Best regards,  
Sales Team  
Strategisthub  
