In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain.schema import Document
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate

In [3]:
docs = [
    Document(
        page_content="The Chennai Super Kings (CSK) are one of the most successful teams in IPL history, having won the championship five times. The team is led by legendary captain MS Dhoni.",
        metadata={"source": "teams.txt"},
    ),
    Document(
        page_content="The 2024 IPL season featured 10 teams competing in a round-robin format followed by playoffs. The final was held at Chennai.",
        metadata={"source": "season_info.txt"},
    ),
    Document(
        page_content="Virat Kohli is the highest run-scorer in IPL history with over 7,000 runs. He plays for Royal Challengers Bangalore (RCB) and has several centuries to his name.",
        metadata={"source": "players.txt"},
    ),
    Document(
        page_content="The IPL generates significant revenue through broadcasting rights, sponsorships, and ticket sales. In 2023, the broadcasting rights were sold for over ₹48,000 crore.",
        metadata={"source": "economics.txt"},
    ),
    Document(
        page_content="Jasprit Bumrah is considered one of the top bowlers in the IPL, known for his death-over accuracy. He plays for the Mumbai Indians and has taken over 150 wickets in the league.",
        metadata={"source": "players.txt"},
    ),
    Document(
        page_content="The Mumbai Indians (MI) have won the IPL title five times, tying with Chennai Super Kings for the most championships. They are known for their strong core of Indian and international players.",
        metadata={"source": "teams.txt"},
    ),
]

In [4]:
embedding_function = OpenAIEmbeddings()
db = Chroma.from_documents(docs, embedding_function)

retriever = db.as_retriever(search_kwargs={"k": 2})

In [5]:
template = """
Answer the question based on the following context and the Chathistory. Especially take the latest question into consideration:

Chathistory: {chat_history}
Context: {context}
Question: {question}
Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

In [6]:
llm = ChatOpenAI(model="gpt-4o-mini")
rag_chain = prompt | llm 

In [7]:
val = rag_chain.invoke({"chat_history": "", "context": "", "question": "Who is the highest run-scorer in IPL history?"})

In [8]:
print(val.content)

As of October 2023, the highest run-scorer in IPL history is Virat Kohli.


# with Langgraph

In [9]:
from typing import List, Literal
from pydantic import BaseModel, Field
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langgraph.graph import StateGraph, START, END
from langgraph.graph import MessagesState

In [10]:
class AgentState(MessagesState):
    documents: List[Document]
    on_topic: str
    rephrased_question: str
    proceed_to_generate: bool
    rephrase_count: int
    question: HumanMessage

In [11]:
class GradeQuestion(BaseModel):
    score: str = Field(
        description="Question is about the specified topics? If yes -> 'Yes' if not -> 'No'"
    )

In [12]:
def question_rewriter_node(state: AgentState) -> AgentState:
    """
    This node is responsible for rephrasing the question based on the topic.
    """
    
    state['documents'] = []
    state['on_topic'] = ""
    state['rephrased_question'] = ""
    state['proceed_to_generate'] = False
    state['rephrase_count'] = 0

    state["messages"] = state.get("messages", [])

    if state['question'] not in state['messages']:
        state['messages'].append(state['question'])

    if len(state['messages']) > 1:

        conversation = state['messages'][:-1]
        current_question = state['question'].content 

        messages = [
            SystemMessage(
                content="You are a helpful assistant that rephrases the user's question to be a standalone question optimized for retrieval."
            )
        ]

        messages.extend(conversation)
        messages.append(HumanMessage(content=current_question))
        rephrase_prompt = ChatPromptTemplate.from_messages(messages)
        llm = ChatOpenAI(model="gpt-4o-mini")

        prompt = rephrase_prompt.format()
        response = llm.invoke(prompt)
        better_question = response.content.strip()
        print("Rephrased question:", better_question)
        state['rephrased_question'] = better_question

    else:
        state['rephrased_question'] = state['question'].content

    return state

In [13]:
def question_classifier_node(state: AgentState) -> AgentState:
    """
    This node is responsible for classifying the question as either about the topic or not.
    """
    
    print("Entering Question Classifier Node")
    system_message = SystemMessage(
        content="""
        You are a strict classifier that answers only 'yes' or 'no' to determine whether a user's question is about any of the following three topics:
        
        1. Information about the Chennai Super Kings (CSK), including their achievements, titles, or captain MS Dhoni.
        2. Information about the Indian Premier League (IPL) as a tournament, including its format, venues, revenue, broadcasting rights, or general economics.
        3. Records or statistics specifically related to Virat Kohli in the IPL, such as runs scored, centuries, or the team he plays for.
        
        Respond with 'yes' **only** if the question clearly falls under one of these categories. Otherwise, respond with 'no'.
        Do not explain your answer. Just respond with 'yes' or 'no'.
        """
    )

    human_message = HumanMessage(
        content=f"User question: {state['rephrased_question']}"
    )

    grade_prompt = ChatPromptTemplate.from_messages([system_message, human_message])    
    llm = ChatOpenAI(model="gpt-4o-mini")

    structured_llm = llm.with_structured_output(GradeQuestion)
    grader_llm = grade_prompt | structured_llm
    result = grader_llm.invoke({})

    state['on_topic'] = result.score.strip().lower()
    print(f"Question is about the topic: {state['on_topic']}")

    return state

In [14]:
def on_topic_node(state: AgentState) -> AgentState:
    """ 
    This node is responsible for generating the answer to the question.
    """
    
    print("Entering on Topic Router")
    on_topic = state.get("on_topic","").strip().lower()
    if on_topic == "yes":
        print("Routing to Retriever")
        return "retrieve_node"
    else:
        print("Routing to off_topic response")
        return "off_topic_response_node"


In [15]:
def retrieve_node(state: AgentState)-> AgentState:
    """ 
    This node is responsible for retrieving the answer to the question.
    """
    
    print("Entering Retriever Node")
    documents = retriever.invoke(state['rephrased_question'])
    print(f"retriever returned {len(documents)} documents")
    state['documents'] = documents
    return state

In [16]:
class GradeDocument(BaseModel):
    score: str = Field(
        description="Document is relevant to question or not If yes -> 'Yes' if not-> 'No'"
    )

In [17]:
def retriever_grade_node(state: AgentState) -> AgentState:
    """ 
    This node is responsible for grading the retrieved documents.
    """
    
    print("Entering Retriever Grader Node")
    
    system_message = SystemMessage(
        content="""
        You are a grader assessing the relevance of a retrieved document to a user question.
        Only answer with 'yes' or 'no'.
        
        If the document contains information relevant to the user's question, respond with 'yes'.
        Otherwise, respond with 'no'."""
    )
    llm = ChatOpenAI(model="gpt-4o-mini")
    strucutured_llm = llm.with_structured_output(GradeDocument)

    relevant_docs = []
    for doc in state['documents']:

        human_message = HumanMessage(
            content=f"User question: {state['rephrased_question']} \n\Retrieved document: {doc.page_content}"
        )
        grader_prompt = ChatPromptTemplate.from_messages([system_message, human_message])
        grader_llm = grader_prompt | strucutured_llm
        result = grader_llm.invoke({})

        print(f"Grading document: {doc.page_content[:30]}... is relevant to question? Result: {result.score.strip()}")

        if result.score.strip().lower() == "yes":
            relevant_docs.append(doc)

    state['documents'] = relevant_docs
    state['proceed_to_generate'] = True if len(relevant_docs) > 0 else False
    print(f"retriever_grade: Proceed to Generate = {state['proceed_to_generate']}")
    return state 

In [18]:
def proceed_router_node(state: AgentState)-> AgentState:
    """ 
    This node is responsible for generating the answer to the question.
    """

    print("Entering proceed Router")
    rephrase_count = state.get("rephrase_count", 0)
    if state.get("proceed_to_generate", False):
        print("Routing to generate_answer")
        return "generate_answer_node"
    elif rephrase_count >= 2:
        print("Maximum rephrase attempts reached. Cannot find relevant documents.")
        return "cannot_answer_node"
    else:
        print("Routing to refine_question")
        return "refine_question_node"

In [19]:
def refine_question_node(state: AgentState)-> AgentState:
    """ 
    This node is responsible for refining the question."""

    print("Entering refine_question Node")
    rephrase_count = state.get("rephrase_count", 0)

    if rephrase_count >=2:
        print("Maximum rephrase attempts reached. Cannot find relevant documents.")
        return state #cannot_answer_node 
    
    question_to_refine = state['rephrased_question']

    system_message = SystemMessage(
        content="""You are a helpful assistant that slightly refines the user's question to improve retrieval results.
Provide a slightly adjusted version of the question."""
    )
    human_message = HumanMessage(
        content=f"Original question: {question_to_refine}\n\nProvide a slightly refined question."
    )

    refine_prompt = ChatPromptTemplate.from_messages([system_message, human_message])
    llm = ChatOpenAI(model="gpt-4o-mini")
    prompt = refine_prompt.format()

    response = llm.invoke(prompt)
    refined_question = response.content.strip()
    print(f"Refined question: {refined_question}")
    state['rephrased_question'] = refined_question
    state['rephrase_count'] = rephrase_count + 1
    return state

In [20]:
def generate_answer_node(state: AgentState)-> AgentState:
    """ 
    This node is responsible for generating the answer to the question."""

    print("Entering Generate Answer Node")

    if "messages" not in state or state['messages'] is None:
        raise ValueError("No messages found in state")
    
    history = state['messages']
    documents = state['documents']
    rephrased_question = state['rephrased_question']

    response = rag_chain.invoke(
        {"chat_history": history, 
         "context": documents, 
         "question": rephrased_question}
    )

    generation = response.content.strip()
    state["messages"].append(AIMessage(content=generation))
    print(f"Generate Answer: {generation}")
    return state

In [21]:
def cannot_answer_node(state: AgentState)-> AgentState:
    """ 
    This node is responsible for generating the answer to the question."""

    print("Entering Cannot Answer Node")
    if "messages" not in state or state["messages"] is None:
        state["messages"] = []

    state["messages"].append(
        AIMessage(
            content="I'm sorry, but I cannot find the information you're looking for."
        )
    )
    return state
    

In [22]:
def off_topic_response_node(state: AgentState)-> AgentState:
    """ 
    This node is responsible for generating the answer to the question."""

    print("Entering Off Topic Response Node")
    if "messages" not in state or state["messages"] is None:
        state["messages"] = []
    state["messages"].append(AIMessage(content="I can't respond to that!"))
    return state

In [23]:
from langgraph.checkpoint.memory import MemorySaver

checkpointer = MemorySaver()

In [24]:
#workflow

builder = StateGraph(AgentState)

#nodes
builder.add_node("question_rewriter_node", question_rewriter_node)
builder.add_node("question_classifier_node", question_classifier_node)
builder.add_node("off_topic_response_node", off_topic_response_node)
builder.add_node("retrieve_node", retrieve_node)
builder.add_node("refine_question_node", refine_question_node)
builder.add_node("generate_answer_node", generate_answer_node)
builder.add_node("cannot_answer_node", cannot_answer_node)
builder.add_node("retriever_grade_node", retriever_grade_node)

#edges
builder.add_edge(START,"question_rewriter_node")
builder.add_edge("question_rewriter_node", "question_classifier_node")


builder.add_conditional_edges(
    "question_classifier_node",
    on_topic_node, 
    {
        "retrieve_node": "retrieve_node",
        "off_topic_response_node": "off_topic_response_node"
    }
)
builder.add_edge("retrieve_node", "retriever_grade_node")

builder.add_conditional_edges(
    "retriever_grade_node",
    proceed_router_node,
    {
        "generate_answer_node": "generate_answer_node",
        "cannot_answer_node": "cannot_answer_node",
        "refine_question_node": "refine_question_node"
    }
)

builder.add_edge("refine_question_node", "retrieve_node")
builder.add_edge("generate_answer_node",END)
builder.add_edge("cannot_answer_node",END)
builder.add_edge("off_topic_response_node",END)



<langgraph.graph.state.StateGraph at 0x1b5c7474d90>

In [25]:
graph = builder.compile(checkpointer=checkpointer)

In [26]:
input_data = {"question": HumanMessage(content="How is the weather?")}
graph.invoke(input=input_data, config={"configurable": {"thread_id": 1}})

Entering Question Classifier Node
Question is about the topic: no
Entering on Topic Router
Routing to off_topic response
Entering Off Topic Response Node


{'messages': [HumanMessage(content='How is the weather?', additional_kwargs={}, response_metadata={}, id='a0265b5a-0446-484a-8817-7f9565c2ba3a'),
  AIMessage(content="I can't respond to that!", additional_kwargs={}, response_metadata={}, id='800a5b33-f2bd-46fe-9662-3ee21fb21169')],
 'documents': [],
 'on_topic': 'no',
 'rephrased_question': 'How is the weather?',
 'proceed_to_generate': False,
 'rephrase_count': 0,
 'question': HumanMessage(content='How is the weather?', additional_kwargs={}, response_metadata={}, id='a0265b5a-0446-484a-8817-7f9565c2ba3a')}

In [27]:

input_data = {
    "question": HumanMessage(
        content="who is captain of CSK?"
    )
}
graph.invoke(input=input_data, config={"configurable": {"thread_id": 2}})

Entering Question Classifier Node
Question is about the topic: yes
Entering on Topic Router
Routing to Retriever
Entering Retriever Node
retriever returned 2 documents
Entering Retriever Grader Node
Grading document: The Chennai Super Kings (CSK) ... is relevant to question? Result: yes
Grading document: Virat Kohli is the highest run... is relevant to question? Result: no
retriever_grade: Proceed to Generate = True
Entering proceed Router
Routing to generate_answer
Entering Generate Answer Node
Generate Answer: The captain of CSK is MS Dhoni.


{'messages': [HumanMessage(content='who is captain of CSK?', additional_kwargs={}, response_metadata={}, id='8b06a616-05dc-4428-9eee-b9f5bfa531c5'),
  AIMessage(content='The captain of CSK is MS Dhoni.', additional_kwargs={}, response_metadata={}, id='c74e28bd-2657-433f-821d-50487a994ad9')],
 'documents': [Document(metadata={'source': 'teams.txt'}, page_content='The Chennai Super Kings (CSK) are one of the most successful teams in IPL history, having won the championship five times. The team is led by legendary captain MS Dhoni.')],
 'on_topic': 'yes',
 'rephrased_question': 'who is captain of CSK?',
 'proceed_to_generate': True,
 'rephrase_count': 0,
 'question': HumanMessage(content='who is captain of CSK?', additional_kwargs={}, response_metadata={}, id='8b06a616-05dc-4428-9eee-b9f5bfa531c5')}

In [28]:

input_data = {
    "question": HumanMessage(
        content="Tell about IPL and CSK"
    )
}
graph.invoke(input=input_data, config={"configurable": {"thread_id": 3}})

Entering Question Classifier Node
Question is about the topic: yes
Entering on Topic Router
Routing to Retriever
Entering Retriever Node
retriever returned 2 documents
Entering Retriever Grader Node
Grading document: The Chennai Super Kings (CSK) ... is relevant to question? Result: yes
Grading document: The Mumbai Indians (MI) have w... is relevant to question? Result: no
retriever_grade: Proceed to Generate = True
Entering proceed Router
Routing to generate_answer
Entering Generate Answer Node
Generate Answer: The Indian Premier League (IPL) is a professional Twenty20 cricket league in India, known for its exciting matches and a mix of international and local players. The Chennai Super Kings (CSK) are one of the most successful teams in IPL history, having won the championship five times. The team is led by legendary captain MS Dhoni, who is renowned for his leadership skills and finishing abilities in matches. CSK is celebrated for its loyal fan base and consistent performances thro

{'messages': [HumanMessage(content='Tell about IPL and CSK', additional_kwargs={}, response_metadata={}, id='82c4279f-5b7e-4ec0-a59a-9b6ec72a69c5'),
  AIMessage(content="The Indian Premier League (IPL) is a professional Twenty20 cricket league in India, known for its exciting matches and a mix of international and local players. The Chennai Super Kings (CSK) are one of the most successful teams in IPL history, having won the championship five times. The team is led by legendary captain MS Dhoni, who is renowned for his leadership skills and finishing abilities in matches. CSK is celebrated for its loyal fan base and consistent performances throughout the tournament's history.", additional_kwargs={}, response_metadata={}, id='cb1c644a-4b88-463b-a84c-ce2b2c408d65')],
 'documents': [Document(metadata={'source': 'teams.txt'}, page_content='The Chennai Super Kings (CSK) are one of the most successful teams in IPL history, having won the championship five times. The team is led by legendary 