In [1]:
"""
Agentic RAG (Retrieval-Augmented Generation) System
==================================================

This system demonstrates how to build an intelligent RAG agent that can:
1. Decide whether to use retrieval tools or answer directly
2. Retrieve relevant documents from a vector database
3. Grade document relevance to queries
4. Rewrite queries if documents aren't relevant
5. Generate final answers using retrieved context

The system uses LangGraph to orchestrate the workflow between different components.
"""



#### ENVIRONMENT SETUP

In [2]:
from dotenv import load_dotenv
import os
import warnings

# Load environment variables and suppress warnings for cleaner output
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
warnings.filterwarnings("ignore")

#### CORE COMPONENTS INITIALIZATION

In [3]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.tools import DuckDuckGoSearchRun

# Initialize the language model for reasoning and generation
llm = ChatOpenAI(temperature=0)  # Temperature=0 for consistent outputs

# Initialize embeddings model for vector similarity search
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

# Initialize web search tool for fallback when documents are irrelevant
web_search_tool = DuckDuckGoSearchRun()

print("Core LLM, embedding models, and web search tool initialized")

Core LLM, embedding models, and web search tool initialized


#### DOCUMENT LOADING & PROCESSING

In [4]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

def load_and_process_documents():
    """
    Load documents from web sources and split them into chunks for vector storage.
    
    This function demonstrates the document ingestion pipeline:
    1. Load web content using WebBaseLoader
    2. Split documents into manageable chunks
    3. Prepare documents for embedding and storage
    """
    # Define source URLs - using Lilian Weng's blog posts about AI agents
    urls = [
        "https://lilianweng.github.io/posts/2023-06-23-agent/",
        "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/"
    ]
    
    # Load documents from each URL
    docs = [WebBaseLoader(url).load() for url in urls]
    
    # Flatten the list of document lists into a single list
    docs_list = [item for sublist in docs for item in sublist]
    
    # Split documents into chunks for better retrieval
    # Using tiktoken encoder to count tokens accurately
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=100,    # Small chunks for precise retrieval
        chunk_overlap=25   # Overlap to maintain context between chunks
    )
    
    doc_splits = text_splitter.split_documents(docs_list)
    
    print(f"Loaded and split {len(doc_splits)} document chunks")
    return doc_splits


USER_AGENT environment variable not set, consider setting it to identify your requests.


#### VECTOR STORE SETUP

In [5]:
from langchain_community.vectorstores import Chroma
from langchain.tools.retriever import create_retriever_tool

def setup_vector_store_and_retriever(doc_splits):
    """
    Create a vector store from document chunks and set up retrieval tool.
    
    This demonstrates the RAG retrieval component:
    1. Create embeddings for all document chunks
    2. Store embeddings in Chroma vector database
    3. Create a retriever interface
    4. Wrap retriever in a tool for agent use
    """
    # Create vector store with embedded documents
    vectorstore = Chroma.from_documents(
        documents=doc_splits,
        collection_name="rag-chroma",
        embedding=embeddings
    )
    
    # Create retriever interface
    retriever = vectorstore.as_retriever()
    
    # Wrap retriever in a tool that the agent can use
    retriever_tool = create_retriever_tool(
        retriever,
        "retriever_blog_post",
        """Search and return information about Lilian Weng blog posts on LLM agents, 
        prompt engineering, and adversarial attacks on LLMs. Use this tool when queries 
        relate to AI agents, planning, reflection, or prompt engineering concepts."""
    )
    
    print("Vector store and retriever tool created")
    return [retriever_tool]


#### AGENT STATE DEFINITION

In [6]:
from typing import Annotated, Sequence, TypedDict
from langchain_core.messages import BaseMessage, HumanMessage
from langgraph.graph.message import add_messages

class AgentState(TypedDict):
    """
    Defines the state structure for our agentic workflow.
    
    The state contains:
    - messages: A sequence of messages that gets updated as the agent processes
    """
    messages: Annotated[Sequence[BaseMessage], add_messages]

#### AGENT NODES (WORKFLOW COMPONENTS)

In [7]:
def llm_decision_maker(state: AgentState):
    """
    DECISION NODE: Determines whether to use tools or respond directly.
    
    This is the "brain" of the agentic system that decides:
    - Should I use the retrieval tool for this query?
    - Or can I respond directly without external knowledge?
    
    The LLM is bound with tools, allowing it to decide when to call them.
    """
    messages = state["messages"]
    
    # Bind tools to LLM so it can decide when to use them
    llm_with_tools = llm.bind_tools(tools)
    
    # Get the most recent message (user query)
    last_message = messages[-1]
    
    # Let LLM decide whether to use tools or respond directly
    response = llm_with_tools.invoke(last_message.content)
    
    return {"messages": [response]}

def grade_documents_node(state: AgentState):
    """
    EVALUATION NODE: Grades retrieved documents for relevance and stores result.
    
    This function demonstrates document relevance evaluation:
    1. Extracts the original question and retrieved documents
    2. Uses structured output to get a binary relevance score
    3. Adds the relevance score to the state for routing decisions
    """
    
    from pydantic import BaseModel, Field
    from langchain_core.prompts import PromptTemplate
    from langchain_core.messages import HumanMessage
    
    # Define structured output format for grading
    class DocumentGrade(BaseModel):
        binary_score: str = Field(description="Relevance score 'yes' or 'no'")
    
    # Create LLM with structured output
    llm_with_structure = llm.with_structured_output(DocumentGrade)
    
    # Grading prompt
    grading_prompt = PromptTemplate(
        template="""You are a grader assessing document relevance to a user question.
        
        Document: {context}
        Question: {question}
        
        If the document contains information relevant to answering the question, grade it as relevant.
        Give a binary score: 'yes' for relevant, 'no' for not relevant.""",
        input_variables=["context", "question"]
    )
    
    # Create grading chain
    grading_chain = grading_prompt | llm_with_structure
    
    # Extract question and documents from state
    messages = state['messages']
    question = messages[0].content  # Original question
    last_message = messages[-1]     # Retrieved documents
    docs = last_message.content
    
    # Grade the documents
    scored_result = grading_chain.invoke({"question": question, "context": docs})
    score = scored_result.binary_score
    
    # Add grading result to messages for routing
    grade_message = HumanMessage(content=f"GRADE_RESULT:{score}")

    return {"messages": [grade_message]}


In [8]:
def web_search_node(state: AgentState):
    """
    WEB SEARCH NODE: Searches the web when local documents are insufficient.
    
    This function provides a fallback mechanism:
    1. Takes the rewritten query from the previous step
    2. Searches the web for relevant information
    3. Returns web search results as context for answer generation
    """
    
    messages = state["messages"]
    
    # Get the most recent query (could be original or rewritten)
    query = None
    for msg in reversed(messages):
        if hasattr(msg, 'content') and not msg.content.startswith("GRADE_RESULT:"):
            # Check if this is a rewritten query or the original question
            if "improved question:" in msg.content.lower() or "reformulated" in msg.content.lower():
                # Extract the improved question from the rewrite response
                content = msg.content
                lines = content.split('\n')
                for line in lines:
                    if line.strip() and not any(skip in line.lower() for skip in ['original', 'consider', 'analyze']):
                        query = line.strip()
                        break
                break
            elif msg == messages[0]:  # Original question
                query = msg.content
                break
    
    if not query:
        query = messages[0].content  # Fallback to original question
    
    print(f"Searching for: {query}")
    
    try:
        # Perform web search
        search_results = web_search_tool.run(query)
        
        # Create a formatted response with search results
        web_context = f"Web Search Results for: {query}\n\n{search_results}"
        search_message = HumanMessage(content=web_context)
        
        print("Web search completed successfully")
        return {"messages": [search_message]}
        
    except Exception as e:
        print(f"Web search failed: {e}")
        # Return a fallback message
        fallback_message = HumanMessage(content=f"Web search unavailable. Using general knowledge to answer: {query}")
        return {"messages": [fallback_message]}

In [9]:
def route_after_grading(state: AgentState):
    """
    ROUTING FUNCTION: Routes based on document grading results.
    
    This function checks the grading result and routes accordingly:
    - If documents are relevant: go to answer generation
    - If documents are not relevant: go to query rewriting
    """
    from typing import Literal
    
    messages = state['messages']
    last_message = messages[-1]
    
    # Check if the last message contains grading result
    if hasattr(last_message, 'content') and last_message.content.startswith("GRADE_RESULT:"):
        grade = last_message.content.split(":")[1]
        if grade == "yes":
            return "answer_generator"
        else:
            return "query_rewriter"
    
    # Default routing if no grade found
    return "answer_generator"

In [10]:
def generate_answer(state: AgentState):
    """
    GENERATION NODE: Creates final answer using retrieved context or web search results.
    
    This is the final step in successful RAG:
    1. Takes the original question and available context (documents or web search)
    2. Uses a RAG prompt to generate a contextual answer
    3. Returns the generated response
    """
    
    from langchain import hub
    
    messages = state["messages"]
    question = messages[0].content    # Original question
    
    # Find the most recent context (either retrieved documents or web search results)
    context = None
    context_type = "documents"
    
    # Look for context in reverse order (most recent first)
    for msg in reversed(messages):
        if hasattr(msg, 'content') and not msg.content.startswith("GRADE_RESULT:"):
            if msg != messages[0]:  # Not the original question
                context = msg.content
                # Determine if this is web search results or document retrieval
                if "Web Search Results" in context:
                    context_type = "web_search"
                break
    
    if context is None:
        print("No context found, generating response from general knowledge")
        context = "No specific context available."

    print(f"Using {context_type} as context source")

    # Use pre-built RAG prompt from LangChain hub
    rag_prompt = hub.pull("rlm/rag-prompt")
    
    # Create RAG chain
    rag_chain = rag_prompt | llm
    
    # Generate answer using context
    response = rag_chain.invoke({"context": context, "question": question})

    print("Generated contextual answer")
    return {"messages": [response]}

In [11]:
def rewrite_query(state: AgentState):
    """
    QUERY REWRITING NODE: Improves query when documents aren't relevant.
    
    This demonstrates query optimization:
    1. Analyzes why the original query might not have retrieved good results
    2. Reformulates the query to be more specific or use different keywords
    3. Sends the improved query back for another retrieval attempt
    """
    print("Query Rewriter: Reformulating query for better retrieval...")
    
    messages = state["messages"]
    question = messages[0].content
    
    # Query rewriting prompt
    rewrite_input = [HumanMessage(content=f"""
    Analyze this question and rewrite it to be more specific and likely to retrieve relevant information:
    
    Original question: {question}
    
    Consider:
    - What are the key concepts that should be searched for?
    - Are there more specific terms or technical language that should be used?
    - How can the question be reformulated for better document matching?
    
    Provide an improved version of the question:
    """)]
    
    response = llm.invoke(rewrite_input)

    print("Query rewritten for better retrieval")
    return {"messages": [response]}


#### WORKFLOW ORCHESTRATION

In [12]:
from langgraph.graph import END, StateGraph, START
from langgraph.prebuilt import ToolNode, tools_condition

def create_agentic_rag_workflow(tools):
    """
    Creates the agentic RAG workflow using LangGraph.
    
    This function demonstrates workflow orchestration:
    1. Defines all nodes (processing steps)
    2. Sets up conditional edges for dynamic routing
    3. Creates a state machine that can handle different query types
    
    Workflow Flow:
    START → LLM Decision Maker → [Tools OR End]
    Tools → Document Grader → [Generator OR Rewriter]
    Generator → END
    Rewriter → Web Search → Generator → END
    """
    print("Building agentic RAG workflow...")
    
    # Create the workflow graph
    workflow = StateGraph(AgentState)
    
    # Add all processing nodes
    workflow.add_node("llm_decision_maker", llm_decision_maker)
    workflow.add_node("vector_retriever", ToolNode(tools))
    workflow.add_node("document_grader", grade_documents_node)
    workflow.add_node("answer_generator", generate_answer)
    workflow.add_node("query_rewriter", rewrite_query)
    workflow.add_node("web_search", web_search_node)  # New web search node
    
    # Set entry point
    workflow.add_edge(START, "llm_decision_maker")
    
    # Decision maker routes to tools OR ends directly
    workflow.add_conditional_edges(
        "llm_decision_maker",
        tools_condition,  # Built-in function that checks if tools should be called
        {
            "tools": "vector_retriever",  # Use retrieval if tools are needed
            END: END                      # End if no tools needed
        }
    )
    
    # After retrieval, grade documents for relevance
    workflow.add_edge("vector_retriever", "document_grader")
    
    # Document grader routes to generation OR query rewriting
    workflow.add_conditional_edges(
        "document_grader",
        route_after_grading,  # This function returns the routing decision
        {
            "answer_generator": "answer_generator",  # Generate if docs are relevant
            "query_rewriter": "query_rewriter"       # Rewrite if docs aren't relevant
        }
    )
    
    # Answer generator ends the workflow
    workflow.add_edge("answer_generator", END)
    
    # Query rewriter goes to web search (following the diagram)
    workflow.add_edge("query_rewriter", "web_search")
    
    # Web search results go to answer generator
    workflow.add_edge("web_search", "answer_generator")

    return workflow.compile()


#### SYSTEM INITIALIZATION

In [16]:
def initialize_agentic_rag_system():
    """
    Initialize the complete agentic RAG system.
    
    This function ties everything together:
    1. Loads and processes documents
    2. Sets up vector store and retrieval
    3. Creates the workflow
    4. Returns ready-to-use RAG agent
    """
    print("Initializing Agentic RAG System...")
    
    # Step 1: Load and process documents
    doc_splits = load_and_process_documents()
    
    # Step 2: Set up vector store and retriever
    global tools  # Make tools global so other functions can access
    tools = setup_vector_store_and_retriever(doc_splits)
    
    # Step 3: Create workflow
    app = create_agentic_rag_workflow(tools)

    return app


In [21]:
def demonstrate_system_capabilities(app):
    """
    Demonstrate different capabilities of the agentic RAG system.
    """
    
    # Test cases demonstrating different system behaviors
    test_cases = [
        {
            "name": "Complex Technical Query (Uses RAG)",
            "query": "What is LLM Powered Autonomous Agents? Explain planning and reflection in terms of agents and LangChain.",
            "expected": "Should use retrieval tool and generate contextual answer"
        },
        {
            "name": "Simple Greeting (Direct Response)",
            "query": "Hi, how are you?",
            "expected": "Should respond directly without using tools"
        },
        {
            "name": "Specific Concept Query (Uses RAG)",
            "query": "Can you explain task decomposition and why Chain of Thought (CoT) prompting enhances model performance?",
            "expected": "Should retrieve relevant documents and generate detailed answer"
        }
    ]
    
    for i, test_case in enumerate(test_cases, 1):
        print(f"\n--- Test Case {i}: {test_case['name']} ---")
        print(f"Query: {test_case['query']}")
        print(f"Expected: {test_case['expected']}")
        print("\nResponse:")
        
        try:
            result = app.invoke({"messages": [test_case['query']]})
            # Extract the final message content
            final_message = result['messages'][-1].content
            print(f"{final_message[:200]}..." if len(final_message) > 200 else f" {final_message}")
        except Exception as e:
            print(f"Error: {e}")
        

In [22]:
if __name__ == "__main__":
    # Initialize the system
    rag_agent = initialize_agentic_rag_system()
    
    # Demonstrate capabilities
    demonstrate_system_capabilities(rag_agent)

Initializing Agentic RAG System...
Loaded and split 287 document chunks
Vector store and retriever tool created
Building agentic RAG workflow...

--- Test Case 1: Complex Technical Query (Uses RAG) ---
Query: What is LLM Powered Autonomous Agents? Explain planning and reflection in terms of agents and LangChain.
Expected: Should use retrieval tool and generate contextual answer

Response:
Query Rewriter: Reformulating query for better retrieval...
Query rewritten for better retrieval
Searching for: Improved question: Can you provide an explanation of how LLM Powered Autonomous Agents utilize planning and reflection within the context of LangChain technology?
Web search completed successfully
Using web_search as context source
Generated contextual answer
LLM Powered Autonomous Agents are software applications that use large language models to perform tasks independently. Planning and reflection in terms of agents and LangChain involve leveraging natur...

--- Test Case 2: Simple Greetin