In [2]:
import os
from dotenv import load_dotenv
from typing import TypedDict, Annotated, Sequence
import operator
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langgraph.graph import StateGraph, END

In [22]:
load_dotenv()

# Initialize LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash-preview-04-17",
    temperature=0.7,
    google_api_key=os.getenv("GOOGLE_API_KEY")
)

# Initialize Web Search
web_search = TavilySearchResults(
    max_results=3,
    api_key=os.getenv("TAVILY_API_KEY")
)


In [23]:
class AgentState(TypedDict):
    messages: Annotated[Sequence[str], operator.add]

In [24]:
from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser

class TopicSelectionParser(BaseModel):
    Topic: str = Field(description="selected Topic")
    Reasoning: str = Field(description="reasoning for selection")

parser = PydanticOutputParser(pydantic_object=TopicSelectionParser)

In [25]:
# Load and split knowledge base from usa.txt
kb_path = "usa.txt"
with open(kb_path, "r", encoding="utf-8") as f:
    kb_text = f.read()

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
kb_chunks = [doc.page_content for doc in splitter.create_documents([kb_text])]
print(f"Knowledge base loaded with {len(kb_chunks)} chunks")

Knowledge base loaded with 10 chunks


In [26]:
# Supervisor - Routes queries to appropriate node using Pydantic parser
def supervisor(state: AgentState):
    question = state["messages"][-1]
    print("Question:", question)
    
    template = """
    Your task is to classify the given user query into one of the following categories: [USA, Web, General]. 
    
    - USA: If the query is about the U.S. economy and can be answered from knowledge base
    - Web: If the query needs current/recent information, latest developments, or real-time data
    - General: For other general questions that don't need specific knowledge or current info
    
    User query: {question}
    {format_instructions}
    """
    
    prompt = PromptTemplate(
        template=template,
        input_variables=["question"],
        partial_variables={"format_instructions": parser.get_format_instructions()}
    )
    
    chain = prompt | llm | parser
    response = chain.invoke({"question": question})
    print("Parsed response:", response)
    
    return {"messages": [response.Topic]}

In [27]:
# Router function
def router(state: AgentState):
    print("->ROUTER->")
    last_message = state["messages"][-1]
    print("last_message:", last_message)
    
    if "usa" in last_message.lower():
        return "RAG"
    elif "web" in last_message.lower():
        return "WEB" 
    else:
        return "LLM"

In [28]:
# RAG Node - Uses knowledge base chunks
def rag_node(state: AgentState):
    print("->RAG Call->")
    question = state["messages"][0]  # Original question
    
    # Find relevant chunks
    chunks_text = "\n\n".join([f"Chunk {i+1}: {chunk}" for i, chunk in enumerate(kb_chunks)])
    
    template = """Use the following context to answer the question. Answer in natural language paragraph format without markdown, bullets, or headings.

Context: {context}
Question: {question}

Answer:"""
    
    prompt = PromptTemplate(template=template, input_variables=["context", "question"])
    response = llm.invoke(prompt.format(context=chunks_text, question=question))
    
    return {"messages": [response.content]}

In [29]:
# LLM Node - General questions
def llm_node(state: AgentState):
    print("->LLM Call->")
    question = state["messages"][0]  # Original question
    
    template = """Answer the following question in natural language paragraph format. Do not use markdown, bullet points, or headings.

Question: {question}

Answer:"""
    
    prompt = PromptTemplate(template=template, input_variables=["question"])
    response = llm.invoke(prompt.format(question=question))
    
    return {"messages": [response.content]}

In [30]:
# Web Search Node
def web_node(state: AgentState):
    print("->WEB Call->")
    question = state["messages"][0]  # Original question
    
    try:
        results = web_search.invoke({"query": question})
        search_content = results[0]["content"] if results else "No search results found"
        
        template = """Based on the web search results, answer the question in natural language paragraph format. Do not use markdown, bullet points, or headings.

Search Results: {results}
Question: {question}

Answer:"""
        
        prompt = PromptTemplate(template=template, input_variables=["results", "question"])
        response = llm.invoke(prompt.format(results=search_content, question=question))
        
        return {"messages": [response.content]}
    except Exception as e:
        print(f"Web search failed: {e}")
        return {"messages": ["Unable to perform web search at this time."]}

In [31]:
workflow = StateGraph(AgentState)

In [32]:
workflow.add_node("supervisor", supervisor)

<langgraph.graph.state.StateGraph at 0x15afb88d000>

In [33]:

workflow.add_node("RAG", rag_node)
workflow.add_node("LLM", llm_node)
workflow.add_node("WEB", web_node)

<langgraph.graph.state.StateGraph at 0x15afb88d000>

In [34]:
workflow.set_entry_point("supervisor")

<langgraph.graph.state.StateGraph at 0x15afb88d000>

In [35]:
workflow.add_conditional_edges(
    "supervisor",
    router,
    {
        "RAG": "RAG",
        "LLM": "LLM", 
        "WEB": "WEB"
    }
)


<langgraph.graph.state.StateGraph at 0x15afb88d000>

In [36]:
workflow.add_edge("RAG", END)
workflow.add_edge("LLM", END)
workflow.add_edge("WEB", END)

<langgraph.graph.state.StateGraph at 0x15afb88d000>

In [37]:
app = workflow.compile()

In [38]:
state1 = {"messages": ["What are the key strengths of the U.S. economy?"]}
result1 = app.invoke(state1)
result1["messages"][-1]

Question: What are the key strengths of the U.S. economy?
Parsed response: Topic='USA' Reasoning='The query is specifically about the U.S. economy and asks for its key strengths, which is information typically available in a knowledge base about the U.S. economy. It does not require current or real-time data.'
->ROUTER->
last_message: USA
->RAG Call->
Parsed response: Topic='USA' Reasoning='The query is specifically about the U.S. economy and asks for its key strengths, which is information typically available in a knowledge base about the U.S. economy. It does not require current or real-time data.'
->ROUTER->
last_message: USA
->RAG Call->


"The U.S. economy possesses several key strengths that solidify its position as the world's largest and most powerful. These include a highly flexible labor market, deep and liquid capital markets, and technological superiority underpinned by massive investment in research and development and top-tier universities. A legal system that strongly encourages innovation and protects property rights further supports this dynamic. The U.S. benefits from a large, highly educated workforce, access to vast natural resources, and a diverse immigration pipeline. It is a global innovation hub, home to many of the world's leading companies and a vibrant startup ecosystem, particularly strong in sectors like technology, software, biotechnology, and financial services. The U.S. dollar's status as the global reserve currency provides significant advantages, while robust consumer spending acts as a major driver of economic stability. The economy is also highly diversified across numerous strong sectors 

In [39]:
state2 = {"messages": ["What are the latest AI developments in 2025?"]}
result2 = app.invoke(state2)
result2["messages"][-1]

Question: What are the latest AI developments in 2025?
Parsed response: Topic='Web' Reasoning="The query asks for the 'latest' developments in AI for the year '2025'. This requires access to current information, recent research, or future predictions, which necessitates searching the web for up-to-date data."
->ROUTER->
last_message: Web
->WEB Call->
Parsed response: Topic='Web' Reasoning="The query asks for the 'latest' developments in AI for the year '2025'. This requires access to current information, recent research, or future predictions, which necessitates searching the web for up-to-date data."
->ROUTER->
last_message: Web
->WEB Call->


"Based on reports from 2025, significant developments in AI include Google's announcement of Gemini 2.0, described as their most capable model to date, featuring agentic capabilities for various users. Google also introduced new state-of-the-art AI video and image generation models named Veo 2 and Imagen 3. Concurrently, analysis highlights India and China as key AI innovation hubs, noting an intensifying divide between open-source and closed AI ecosystems and suggesting that collaboration and transparency will influence global AI leadership. China is actively accelerating its AI development, partly to counter trade restrictions, by rolling out models like Manus and DeepSeek to rival top US models, increasing collaboration between government, private sectors, and universities, reducing dependence on imported technology, and establishing a National Computing Power Grid to support these advancements."

In [40]:
state3 = {"messages": ["Explain how photosynthesis works"]}
result3 = app.invoke(state3)
result3["messages"][-1]

Question: Explain how photosynthesis works
Parsed response: Topic='General' Reasoning='The query asks for an explanation of a fundamental biological process (photosynthesis), which falls under general scientific knowledge and does not require specific U.S. economy data or current information from the web.'
->ROUTER->
last_message: General
->LLM Call->
Parsed response: Topic='General' Reasoning='The query asks for an explanation of a fundamental biological process (photosynthesis), which falls under general scientific knowledge and does not require specific U.S. economy data or current information from the web.'
->ROUTER->
last_message: General
->LLM Call->


"Photosynthesis is the vital process by which plants, algae, and some bacteria convert light energy into chemical energy, essentially creating their own food. This complex process primarily takes place in the chloroplasts, organelles found mainly in the leaves of plants, which contain a green pigment called chlorophyll. The plant takes in three key ingredients from its environment: carbon dioxide from the air, which enters through small pores on the leaves called stomata; water from the soil, absorbed by the roots and transported up to the leaves; and light energy from the sun. Inside the chloroplasts, chlorophyll captures this light energy. Photosynthesis involves two main stages. In the first stage, the light-dependent reactions, light energy is used to split water molecules, producing oxygen as a byproduct and generating energy-carrying molecules. In the second stage, known as the light-independent reactions or Calvin cycle, the energy from the first stage is used to convert carbon 