### Config the model

In [7]:
!pip install langgraph



In [8]:

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
import operator
from typing import List
from pydantic import BaseModel , Field
from langchain.prompts import PromptTemplate
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import HumanMessage, AIMessage
from langgraph.graph import StateGraph,END

In [9]:
import os

# Set your OpenAI API key here if not already set
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [10]:
def load_model(model_name):
    if model_name == "gpt-4o":
        return ChatOpenAI(model=model_name, temperature=0)
    elif model_name == "text-embedding-3-small":
        return  OpenAIEmbeddings(model= model_name, dimensions=512)
    else:
        raise ValueError(f"Unsupported model name: {model_name}")

In [11]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

model=load_model("gpt-4o")
embedding_model=load_model("text-embedding-3-small")

# Parsing the PDF Document for the RAG

In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
import os

def load_and_split_text(file_path, chunk_size=1000, chunk_overlap=100):
    """
    Load a text file and split it into chunks for RAG processing
    
    Args:
        file_path (str): Path to the text file
        chunk_size (int): Size of each text chunk
        chunk_overlap (int): Overlap between chunks
    
    Returns:
        list: List of document chunks
    """
    # Load the text file
    loader = TextLoader(file_path)
    documents = loader.load()
    
    # Initialize the text splitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n", "\n", " ", ""]  # Try to split on paragraphs first, then sentences, then words
    )
    
    # Split the documents into chunks
    chunks = text_splitter.split_documents(documents)
    
    print(f"Loaded {len(documents)} document(s)")
    print(f"Split into {len(chunks)} chunks")
    
    return chunks

# Load and split your dividend.txt file
file_path = "/home/ashok/Agentic_RAG/data/dividend.txt"
document_chunks = load_and_split_text(file_path, chunk_size=1000, chunk_overlap=200)

Loaded 1 document(s)
Split into 3 chunks


# DATA EXTRACTION

In [13]:
def create_vector_database(chunks, embedding_model):
    """
    Create a vector database from document chunks
    
    Args:
        chunks (list): List of document chunks
        embedding_model: The embedding model to use
    
    Returns:
        FAISS: Vector database
    """
    # Create FAISS vector store from document chunks
    vector_db = FAISS.from_documents(chunks, embedding_model)
    
    print(f"Created vector database with {len(chunks)} chunks")
    return vector_db

# Create vector database from the chunks
vector_db = create_vector_database(document_chunks, embedding_model)

Created vector database with 3 chunks


In [14]:
def retrieve_relevant_chunks(query, vector_db, k=3):
    """
    Retrieve relevant document chunks based on a query
    
    Args:
        query (str): The query string
        vector_db: The vector database
        k (int): Number of relevant chunks to retrieve
    
    Returns:
        list: List of relevant document chunks
    """
    # Perform similarity search
    relevant_docs = vector_db.similarity_search(query, k=k)
    
    print(f"Retrieved {len(relevant_docs)} relevant chunks for query: '{query}'")
    
    return relevant_docs

# Example usage
query = "What is dividend yield of Natco pharma on february month?"
relevant_chunks = retrieve_relevant_chunks(query, vector_db, k=3)

Retrieved 3 relevant chunks for query: 'What is dividend yield of Natco pharma on february month?'


In [15]:
relevant_chunks


[Document(id='4b4988d8-a180-4942-a97b-df3c7ed85fc1', metadata={'source': '/home/ashok/Agentic_RAG/data/dividend.txt'}, page_content='1. Natco Pharma will pay a dividend of 15 on February 26, 2024.\n2. Fineotex Chem will pay a dividend of 138 on February 26, 2024.\n3. Hero Motocorp will pay a dividend of 225 on February 21, 2024.\n4. Hero Motocorp will pay another dividend of 675 on February 21, 2024.\n5. Apollo Hospitals will pay a dividend of 54 on February 20, 2024.\n6. Sundaram Finance will pay a dividend of 84 on February 16, 2024.\n7. Manappuram Finance will pay a dividend of 16.2 on February 16, 2024.\n8. Gulf Oil Lubricants will pay a dividend of 384 on February 13, 2024.\n9. Symphony will pay a dividend of 2 on February 7, 2024.\n10. Shriram Finance will pay a dividend of 100 on February 6, 2024.\n11. Goa Carbon will pay a dividend of 100 on January 29, 2024.\n12. Natco Pharma paid a dividend of 15 on November 24, 2023.\n13. Manappuram Finance paid a dividend of 15.3 on Novembe

In [16]:
# Install required packages for enhanced LangGraph with tools
!pip install requests beautifulsoup4 duckduckgo-search langchain-community langgraph langchain-core



# Corrective RAG Agent System with LangGraph StateGraph

This section implements a comprehensive Corrective RAG agent system with:
- **Retriever Node**: Retrieves relevant information from documents
- **Grade/Evaluator Node**: Evaluate the relevancy of query and retrieved docs
- **Query Rewriter Node**: Rewrites the user query
- **Web Search Node**: Fetches real-time information from internet
- **Generate Node**: generates the final output from retrieved docs

![Alt text](agentic_rag_image.png)

In [17]:
# Install TavilySearchResults for web search
!pip install tavily-python langchain-community



In [18]:
# Imports for Corrective RAG Agent System
import os
import operator
from typing import List, Dict, Any, TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langgraph.graph import StateGraph, END
from langchain_community.tools.tavily_search import TavilySearchResults

# Set Tavily API Key if needed
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
if not TAVILY_API_KEY:
    print("  Note: Set TAVILY_API_KEY environment variable for web search functionality")
    print("You can get a free API key from https://tavily.com")

print(" Corrective RAG imports completed!")

 Corrective RAG imports completed!


In [19]:
# Define the Corrective RAG State with Threshold Support
class CorrectiveRAGState(TypedDict):
    """
    State for the Corrective RAG Agent System with 70% threshold support
    """
    # Input
    question: str
    
    # Retrieved documents
    documents: List[Document]
    
    # Document grading with threshold
    documents_relevant: bool
    grade_scores: List[str]  # "relevant" or "not relevant" for each doc
    relevancy_percentage: float  # Percentage of relevant documents
    threshold_met: bool  # Whether 70% threshold is met
    
    # Query processing
    rewritten_question: str
    
    # Web search
    web_search_needed: bool
    web_documents: List[Document]
    
    # Generation
    generation: str
    
    # System tracking
    current_step: str
    retry_count: int
    
    # Message history
    messages: Annotated[Sequence[BaseMessage], operator.add]

print(" Corrective RAG State defined successfully!")

 Corrective RAG State defined successfully!


In [20]:
# 1. RETRIEVER NODE
def retriever_node(state: CorrectiveRAGState) -> CorrectiveRAGState:
    """
    Retrieves relevant documents from the vector database based on the question
    """
    question = state["question"]
    
    try:
        print(f" Retrieving documents for: {question}")
        
        # Use the existing vector database to retrieve relevant documents
        relevant_docs = vector_db.similarity_search(question, k=4)
        
        print(f" Retrieved {len(relevant_docs)} documents")
        
        return {
            **state,
            "documents": relevant_docs,
            "current_step": "retrieval_complete",
            "messages": state["messages"] + [AIMessage(content=f"Retrieved {len(relevant_docs)} relevant documents")]
        }
        
    except Exception as e:
        print(f" Error in retriever node: {e}")
        return {
            **state,
            "documents": [],
            "current_step": "retrieval_failed",
            "messages": state["messages"] + [AIMessage(content=f"Retrieval error: {e}")]
        }

print(" Retriever Node implemented!")

 Retriever Node implemented!


In [21]:
# 2. GRADE/EVALUATOR NODE WITH THRESHOLD
def grader_node(state: CorrectiveRAGState) -> CorrectiveRAGState:
    """
    Evaluates the relevancy of retrieved documents to the user question with 70% threshold
    """
    question = state["question"]
    documents = state["documents"]
    
    try:
        print(f"📊 Grading {len(documents)} documents for relevancy")
        
        grade_scores = []
        relevant_docs = []
        
        # Create grading prompt
        grading_prompt = ChatPromptTemplate.from_template("""
        You are a grader assessing relevance of retrieved documents to a user question.
        
        Retrieved document: {document}
        
        User question: {question}
        
        If the document contains information relevant to the user question, grade it as "relevant".
        If the document does not contain information relevant to the user question, grade it as "not relevant".
        
        Give a binary score 'relevant' or 'not relevant' to indicate whether the document is relevant to the question.
        
        Provide the grade as a single word: relevant or not relevant
        """)
        
        grading_chain = grading_prompt | model | StrOutputParser()
        
        for doc in documents:
            # Grade each document
            grade = grading_chain.invoke({
                "document": doc.page_content,
                "question": question
            }).strip().lower()
            
            # Normalize the grade
            if "relevant" in grade and "not relevant" not in grade:
                grade_scores.append("relevant")
                relevant_docs.append(doc)
            else:
                grade_scores.append("not relevant")
        
        # Calculate relevancy percentage
        relevant_count = sum(1 for score in grade_scores if score == "relevant")
        total_documents = len(documents)
        relevancy_percentage = (relevant_count / total_documents) * 100 if total_documents > 0 else 0
        
        # Apply 70% threshold rule
        threshold_met = relevancy_percentage >= 70.0
        
        print(f" Grading complete: {relevant_count}/{len(documents)} documents are relevant ({relevancy_percentage:.1f}%)")
        print(f" Threshold (70%): {' MET' if threshold_met else ' NOT MET'}")
        
        return {
            **state,
            "documents": relevant_docs if threshold_met else documents,  # Keep only relevant if threshold met
            "documents_relevant": threshold_met,
            "grade_scores": grade_scores,
            "relevancy_percentage": relevancy_percentage,
            "threshold_met": threshold_met,
            "current_step": "grading_complete",
            "messages": state["messages"] + [AIMessage(content=f"Document grading: {relevancy_percentage:.1f}% relevant ({relevant_count}/{len(documents)}), Threshold: {'Met' if threshold_met else 'Not Met'}")]
        }
        
    except Exception as e:
        print(f" Error in grader node: {e}")
        return {
            **state,
            "documents_relevant": False,
            "grade_scores": ["not relevant"] * len(documents),
            "relevancy_percentage": 0.0,
            "threshold_met": False,
            "current_step": "grading_failed",
            "messages": state["messages"] + [AIMessage(content=f"Grading error: {e}")]
        }

print(" Grader/Evaluator Node implemented!")

 Grader/Evaluator Node implemented!


In [22]:
# 3. QUERY REWRITER NODE
def query_rewriter_node(state: CorrectiveRAGState) -> CorrectiveRAGState:
    """
    Rewrites the user query to improve retrieval results
    """
    question = state["question"]
    
    try:
        print(f" Rewriting query: {question}")
        
        # Create query rewriting prompt
        rewriting_prompt = ChatPromptTemplate.from_template("""
        You are a query rewriter. Your task is to rewrite the user's question to improve document retrieval.
        
        Original question: {question}
        
        Rewrite this question to be more specific, clear, and likely to retrieve relevant documents.
        Add synonyms, expand abbreviations, and make the intent clearer.
        
        Improved question:
        """)
        
        rewriting_chain = rewriting_prompt | model | StrOutputParser()
        
        rewritten_question = rewriting_chain.invoke({"question": question}).strip()
        
        print(f" Query rewritten to: {rewritten_question}")
        
        return {
            **state,
            "rewritten_question": rewritten_question,
            "current_step": "query_rewritten",
            "messages": state["messages"] + [AIMessage(content=f"Query rewritten: {rewritten_question}")]
        }
        
    except Exception as e:
        print(f" Error in query rewriter node: {e}")
        return {
            **state,
            "rewritten_question": question,  # Fallback to original question
            "current_step": "rewriting_failed",
            "messages": state["messages"] + [AIMessage(content=f"Query rewriting error: {e}")]
        }

print(" Query Rewriter Node implemented!")

 Query Rewriter Node implemented!


In [23]:
# 4. WEB SEARCH NODE
def web_search_node(state: CorrectiveRAGState) -> CorrectiveRAGState:
    """
    Performs web search using TavilySearchResults to find additional relevant information
    """
    question = state.get("rewritten_question", state["question"])
    
    try:
        print(f"🌐 Performing web search for: {question}")
        
        # Initialize Tavily search (fallback if API key not available)
        if TAVILY_API_KEY:
            tavily_search = TavilySearchResults(
                max_results=3,
                search_depth="basic",
                include_answer=True,
                include_raw_content=True
            )
            
            # Perform web search
            search_results = tavily_search.invoke({"query": question})
            
            # Convert search results to Document objects
            web_documents = []
            for result in search_results:
                if isinstance(result, dict):
                    content = result.get("content", "")
                    if content:
                        web_documents.append(Document(
                            page_content=content,
                            metadata={
                                "source": result.get("url", "web_search"),
                                "title": result.get("title", "Web Search Result"),
                                "search_type": "tavily"
                            }
                        ))
            
            print(f" Found {len(web_documents)} web search results")
            
        else:
            # Fallback: Create mock web search results when Tavily API not available
            print("  Tavily API key not found. Using fallback web search simulation.")
            web_documents = [
                Document(
                    page_content=f"Web search result for '{question}'. This is a simulated result when Tavily API is not available.",
                    metadata={"source": "fallback_search", "title": "Simulated Web Result", "search_type": "fallback"}
                )
            ]
        
        return {
            **state,
            "web_documents": web_documents,
            "web_search_needed": False,
            "current_step": "web_search_complete",
            "messages": state["messages"] + [AIMessage(content=f"Web search completed: {len(web_documents)} results")]
        }
        
    except Exception as e:
        print(f" Error in web search node: {e}")
        return {
            **state,
            "web_documents": [],
            "web_search_needed": False,
            "current_step": "web_search_failed",
            "messages": state["messages"] + [AIMessage(content=f"Web search error: {e}")]
        }

print(" Web Search Node implemented!")

 Web Search Node implemented!


In [24]:
# 5. GENERATE NODE WITH COMBINED INFORMATION
def generate_node(state: CorrectiveRAGState) -> CorrectiveRAGState:
    """
    Generates the final answer using retrieved documents and/or web search results
    Now combines RAG documents with web search information when threshold < 70%
    """
    question = state["question"]
    documents = state.get("documents", [])
    web_documents = state.get("web_documents", [])
    threshold_met = state.get("threshold_met", False)
    relevancy_percentage = state.get("relevancy_percentage", 0.0)
    
    try:
        print(f" Generating answer for: {question}")
        print(f" Using: {len(documents)} RAG docs + {len(web_documents)} web docs")
        
        # Combine all available documents
        all_documents = documents + web_documents
        
        if not all_documents:
            # No documents available
            generation = "I apologize, but I couldn't find relevant information to answer your question. Please try rephrasing your question or provide more context."
        else:
            # Create context from all documents
            rag_context = ""
            web_context = ""
            
            # Separate RAG and web content for better organization
            if documents:
                rag_context = "\n\n".join([
                    f"RAG Source {i+1}: {doc.page_content}" 
                    for i, doc in enumerate(documents[:3])  # Limit RAG sources
                ])
            
            if web_documents:
                web_context = "\n\n".join([
                    f"Web Source {i+1}: {doc.page_content}" 
                    for i, doc in enumerate(web_documents[:3])  # Limit web sources
                ])
            
            # Combine contexts
            combined_context = ""
            if rag_context:
                combined_context += f"INTERNAL KNOWLEDGE:\n{rag_context}\n\n"
            if web_context:
                combined_context += f"EXTERNAL KNOWLEDGE:\n{web_context}"
            
            # Create generation prompt with threshold information
            generation_prompt = ChatPromptTemplate.from_template("""
            You are an AI assistant that provides accurate and helpful answers based on the given context.
            
            Context information:
            {context}
            
            Question: {question}
            
            Analysis Summary:
            - Relevancy Score: {relevancy_percentage:.1f}%
            - Threshold Status: {threshold_status}
            - Information Sources: {source_info}
            
            Instructions:
            1. Use both internal knowledge (RAG) and external knowledge (web search) to provide a comprehensive answer
            2. Prioritize information from internal knowledge if it's highly relevant
            3. Use external knowledge to supplement or provide additional context
            4. If combining multiple sources, clearly synthesize the information
            5. Be specific and cite information from the context when possible
            6. Provide a clear, well-structured response
            
            Answer:
            """)
            
            generation_chain = generation_prompt | model | StrOutputParser()
            
            # Determine source information
            source_info = []
            if documents:
                source_info.append(f"{len(documents)} internal documents")
            if web_documents:
                source_info.append(f"{len(web_documents)} web sources")
            
            generation = generation_chain.invoke({
                "context": combined_context,
                "question": question,
                "relevancy_percentage": relevancy_percentage,
                "threshold_status": "Met (≥70%)" if threshold_met else "Not Met (<70%)",
                "source_info": " + ".join(source_info) if source_info else "No sources"
            }).strip()
        
        print(f" Generated answer ({len(generation)} characters)")
        
        return {
            **state,
            "generation": generation,
            "current_step": "generation_complete",
            "messages": state["messages"] + [AIMessage(content=generation)]
        }
        
    except Exception as e:
        print(f" Error in generate node: {e}")
        return {
            **state,
            "generation": f"Error generating response: {e}",
            "current_step": "generation_failed",
            "messages": state["messages"] + [AIMessage(content=f"Generation error: {e}")]
        }

print(" Generate Node implemented!")

 Generate Node implemented!


In [25]:
# 6. ROUTING FUNCTIONS WITH THRESHOLD LOGIC
def decide_to_grade_or_rewrite(state: CorrectiveRAGState) -> str:
    """
    Determines whether to grade documents or rewrite query after retrieval
    """
    documents = state.get("documents", [])
    
    if len(documents) == 0:
        print(" No documents retrieved, rewriting query")
        return "rewrite_query"
    else:
        print(" Documents found, proceeding to grade")
        return "grade_documents"

def decide_to_generate_or_search(state: CorrectiveRAGState) -> str:
    """
    Determines next step based on 70% threshold:
    - If threshold >= 70%: Go directly to generate
    - If threshold < 70%: Go to rewrite query, then web search, then generate
    """
    threshold_met = state.get("threshold_met", False)
    relevancy_percentage = state.get("relevancy_percentage", 0.0)
    retry_count = state.get("retry_count", 0)
    
    if threshold_met:
        print(f" Threshold met ({relevancy_percentage:.1f}% >= 70%), generating answer directly")
        return "generate"
    else:
        print(f"  Threshold not met ({relevancy_percentage:.1f}% < 70%), proceeding to query rewrite and web search")
        return "rewrite_query"

def decide_after_rewrite(state: CorrectiveRAGState) -> str:
    """
    After rewriting query, always go to web search to gather additional information
    """
    print(" Query rewritten, performing web search for additional information")
    return "web_search"

def decide_after_web_search(state: CorrectiveRAGState) -> str:
    """
    After web search, always go to generate with combined information
    """
    print(" Web search completed, combining with RAG documents for final generation")
    return "generate"

print(" Routing functions with threshold logic implemented!")

 Routing functions with threshold logic implemented!


In [26]:
# 7. CREATE ENHANCED CORRECTIVE RAG STATEGRAPH WITH THRESHOLD
def create_corrective_rag_graph():
    """
    Creates the Enhanced Corrective RAG Agent System with 70% threshold logic
    
    Workflow:
    1. Retrieve documents from RAG
    2. Grade documents for relevancy
    3. If relevancy >= 70%: Go directly to Generate
    4. If relevancy < 70%: Rewrite query → Web search → Generate (with combined info)
    """
    # Initialize the StateGraph
    workflow = StateGraph(CorrectiveRAGState)
    
    # Add nodes to the graph
    workflow.add_node("retrieve", retriever_node)
    workflow.add_node("grade_documents", grader_node)
    workflow.add_node("rewrite_query", query_rewriter_node)
    workflow.add_node("web_search", web_search_node)
    workflow.add_node("generate", generate_node)
    
    # Set entry point
    workflow.set_entry_point("retrieve")
    
    # Add conditional edges for retrieval routing
    workflow.add_conditional_edges(
        "retrieve",
        decide_to_grade_or_rewrite,
        {
            "grade_documents": "grade_documents",
            "rewrite_query": "rewrite_query"
        }
    )
    
    # Add conditional edges for grading routing (threshold-based)
    workflow.add_conditional_edges(
        "grade_documents", 
        decide_to_generate_or_search,
        {
            "generate": "generate",           # If threshold >= 70%
            "rewrite_query": "rewrite_query"  # If threshold < 70%
        }
    )
    
    # Add conditional edges for rewrite routing (always to web search)
    workflow.add_conditional_edges(
        "rewrite_query",
        decide_after_rewrite,
        {
            "web_search": "web_search"
        }
    )
    
    # Add edges for web search and generation
    workflow.add_edge("web_search", "generate")
    workflow.add_edge("generate", END)
    
    # Compile the graph
    app = workflow.compile()
    
    return app

# Create the Enhanced Corrective RAG system
corrective_rag_app = create_corrective_rag_graph()

print(" Enhanced Corrective RAG Agent System with 70% threshold created successfully!")
print(" New Workflow:")
print("    If relevancy >= 70%: Retrieve → Grade → Generate")
print("    If relevancy < 70%: Retrieve → Grade → Rewrite → Web Search → Generate")

 Enhanced Corrective RAG Agent System with 70% threshold created successfully!
 New Workflow:
    If relevancy >= 70%: Retrieve → Grade → Generate
    If relevancy < 70%: Retrieve → Grade → Rewrite → Web Search → Generate


In [27]:
# 8. ENHANCED RETRIEVER WITH RETRY LOGIC (Updated for Threshold System)
def enhanced_retriever_node(state: CorrectiveRAGState) -> CorrectiveRAGState:
    """
    Enhanced retriever that uses rewritten query and handles retries
    """
    # Use rewritten question if available, otherwise use original
    question = state.get("rewritten_question", state["question"])
    retry_count = state.get("retry_count", 0)
    
    try:
        print(f" Enhanced retrieval (attempt {retry_count + 1}) for: {question}")
        
        # Retrieve documents using the (possibly rewritten) question
        relevant_docs = vector_db.similarity_search(question, k=5)  # Increased to 5 for better threshold calculation
        
        print(f" Retrieved {len(relevant_docs)} documents")
        
        return {
            **state,
            "documents": relevant_docs,
            "retry_count": retry_count + 1,
            "current_step": "retrieval_complete",
            "messages": state["messages"] + [AIMessage(content=f"Enhanced retrieval: {len(relevant_docs)} documents")]
        }
        
    except Exception as e:
        print(f" Error in enhanced retriever: {e}")
        return {
            **state,
            "documents": [],
            "retry_count": retry_count + 1,
            "current_step": "retrieval_failed",
            "messages": state["messages"] + [AIMessage(content=f"Enhanced retrieval error: {e}")]
        }

# Update the graph with enhanced retriever and threshold logic
def create_enhanced_corrective_rag_graph():
    """
    Creates enhanced Corrective RAG system with 70% threshold and better document handling
    """
    workflow = StateGraph(CorrectiveRAGState)
    
    # Add nodes
    workflow.add_node("retrieve", enhanced_retriever_node)
    workflow.add_node("grade_documents", grader_node)
    workflow.add_node("rewrite_query", query_rewriter_node)
    workflow.add_node("web_search", web_search_node)
    workflow.add_node("generate", generate_node)
    
    # Set entry point
    workflow.set_entry_point("retrieve")
    
    # Add conditional edges with threshold logic
    workflow.add_conditional_edges(
        "retrieve",
        decide_to_grade_or_rewrite,
        {
            "grade_documents": "grade_documents",
            "rewrite_query": "rewrite_query"
        }
    )
    
    workflow.add_conditional_edges(
        "grade_documents", 
        decide_to_generate_or_search,
        {
            "generate": "generate",           # Threshold >= 70%
            "rewrite_query": "rewrite_query"  # Threshold < 70%
        }
    )
    
    workflow.add_conditional_edges(
        "rewrite_query",
        decide_after_rewrite,
        {
            "web_search": "web_search"
        }
    )
    
    workflow.add_edge("web_search", "generate")
    workflow.add_edge("generate", END)
    
    return workflow.compile()

# Create enhanced system with threshold
enhanced_corrective_rag_app = create_enhanced_corrective_rag_graph()

print(" Enhanced Corrective RAG Agent System with 70% threshold ready!")
print(" Threshold Logic: >= 70% → Direct Generation | < 70% → Query Rewrite + Web Search")

 Enhanced Corrective RAG Agent System with 70% threshold ready!
 Threshold Logic: >= 70% → Direct Generation | < 70% → Query Rewrite + Web Search


In [28]:
# 9. UPDATED TEST FUNCTIONS FOR THRESHOLD-BASED CORRECTIVE RAG
def run_corrective_rag(question: str):
    """
    Run the Enhanced Corrective RAG Agent System with 70% threshold logic
    """
    print(f"\n{'='*80}")
    print(f" ENHANCED CORRECTIVE RAG AGENT SYSTEM (70% Threshold)")
    print(f"Question: {question}")
    print(f"{'='*80}\n")
    
    # Initialize state with threshold support
    initial_state = {
        "question": question,
        "documents": [],
        "documents_relevant": False,
        "grade_scores": [],
        "relevancy_percentage": 0.0,
        "threshold_met": False,
        "rewritten_question": "",
        "web_search_needed": False,
        "web_documents": [],
        "generation": "",
        "current_step": "start",
        "retry_count": 0,
        "messages": [HumanMessage(content=question)]
    }
    
    try:
        # Run the Enhanced Corrective RAG system
        final_state = enhanced_corrective_rag_app.invoke(initial_state)
        
        print(f"\n{'='*80}")
        print(" ENHANCED CORRECTIVE RAG EXECUTION COMPLETED")
        print(f"{'='*80}")
        
        # Display results with threshold information
        print(f"\n FINAL ANSWER:")
        print("-" * 50)
        print(final_state.get("generation", "No answer generated"))
        
        print(f"\n SYSTEM STATISTICS:")
        print(f"   • Documents retrieved: {len(final_state.get('documents', []))}")
        print(f"   • Relevancy percentage: {final_state.get('relevancy_percentage', 0.0):.1f}%")
        print(f"   • Threshold (70%) status: {' MET' if final_state.get('threshold_met', False) else ' NOT MET'}")
        print(f"   • Relevant documents: {sum(1 for score in final_state.get('grade_scores', []) if score == 'relevant')}")
        print(f"   • Web search performed: {'Yes' if final_state.get('web_documents') else 'No'}")
        print(f"   • Web documents found: {len(final_state.get('web_documents', []))}")
        print(f"   • Query rewrites: {final_state.get('retry_count', 0)}")
        print(f"   • Final step: {final_state.get('current_step', 'unknown')}")
        
        # Show workflow path taken
        workflow_path = "Retrieve → Grade → "
        if final_state.get('threshold_met', False):
            workflow_path += "Generate (Direct - Threshold Met)"
        else:
            workflow_path += "Rewrite → Web Search → Generate (Combined)"
        print(f"   • Workflow path: {workflow_path}")
        
        return final_state
        
    except Exception as e:
        print(f"\n Error running Enhanced Corrective RAG: {e}")
        import traceback
        traceback.print_exc()
        return None


print(" Enhanced Corrective RAG test functions with threshold logic ready!")
print(" Use run_corrective_rag('your question') to test individual queries")
print(" Use test_threshold_corrective_rag_system() to run comprehensive threshold tests")

 Enhanced Corrective RAG test functions with threshold logic ready!
 Use run_corrective_rag('your question') to test individual queries
 Use test_threshold_corrective_rag_system() to run comprehensive threshold tests


In [29]:
# 10. QUICK TEST - Demonstrating Threshold-Based Routing
print(" Testing Enhanced Corrective RAG with 70% threshold logic...")
print("\n" + "="*60)

print("\n TEST 1: High relevancy question (should meet 70% threshold)")
print("-" * 60)
# Test with a question about dividends (should find relevant docs and meet threshold)
result1 = run_corrective_rag("What is the dividend amount Natco pharma will pay and i want to know a brief description about natco pharma company?")


 Testing Enhanced Corrective RAG with 70% threshold logic...


 TEST 1: High relevancy question (should meet 70% threshold)
------------------------------------------------------------

 ENHANCED CORRECTIVE RAG AGENT SYSTEM (70% Threshold)
Question: What is the dividend amount Natco pharma will pay and i want to know a brief description about natco pharma company?

 Enhanced retrieval (attempt 1) for: 
 Retrieved 3 documents
 Documents found, proceeding to grade
📊 Grading 3 documents for relevancy
 Grading complete: 1/3 documents are relevant (33.3%)
 Threshold (70%):  NOT MET
  Threshold not met (33.3% < 70%), proceeding to query rewrite and web search
 Rewriting query: What is the dividend amount Natco pharma will pay and i want to know a brief description about natco pharma company?
 Query rewritten to: What is the dividend payment amount announced by Natco Pharma Limited, and can you provide a brief overview or description of Natco Pharma as a pharmaceutical company?
 Query rewritt

  tavily_search = TavilySearchResults(


 Found 3 web search results
 Generating answer for: What is the dividend amount Natco pharma will pay and i want to know a brief description about natco pharma company?
 Using: 3 RAG docs + 3 web docs
 Generated answer (1032 characters)

 ENHANCED CORRECTIVE RAG EXECUTION COMPLETED

 FINAL ANSWER:
--------------------------------------------------
Natco Pharma is scheduled to pay a dividend of ₹15 on February 26, 2024, as per the internal knowledge provided (RAG Source 2). This information is consistent with the internal data, which should be prioritized for accuracy.

Regarding a brief description of Natco Pharma, it is a pharmaceutical company based in India, known for its focus on the development, manufacture, and marketing of finished dosage formulations and active pharmaceutical ingredients. The company has a strong presence in the domestic market and also exports its products to various countries. Natco Pharma is recognized for its work in the oncology segment and has been involv

In [30]:
result1

{'question': 'What is the dividend amount Natco pharma will pay and i want to know a brief description about natco pharma company?',
 'documents': [Document(id='d38b5538-0eb7-4773-94c3-76ee9651d2e7', metadata={'source': '/home/ashok/Agentic_RAG/data/dividend.txt'}, page_content='## Profit/Loss Statement for Portfolio as on  04 Jul 2024\n\n\nFrom Date\n\n01/APR/2023\n\nTo Date\n\n31/MAR/2024\n\nPortfolio\n\nTaxable Portfolio\n\nFor Asset Class\n\nDividend\n\nAccount Subtype\n\nDirect Transaction'),
  Document(id='4b4988d8-a180-4942-a97b-df3c7ed85fc1', metadata={'source': '/home/ashok/Agentic_RAG/data/dividend.txt'}, page_content='1. Natco Pharma will pay a dividend of 15 on February 26, 2024.\n2. Fineotex Chem will pay a dividend of 138 on February 26, 2024.\n3. Hero Motocorp will pay a dividend of 225 on February 21, 2024.\n4. Hero Motocorp will pay another dividend of 675 on February 21, 2024.\n5. Apollo Hospitals will pay a dividend of 54 on February 20, 2024.\n6. Sundaram Finance wi

In [31]:
# Extract only the final generated output from result1
if result1:
    final_answer = result1.get("generation", "No answer generated")
    print("="*60)
    print("📋 FINAL GENERATED OUTPUT ONLY:")
    print("="*60)
    print(final_answer)
    print("="*60)
else:
    print(" No result available to extract from")

📋 FINAL GENERATED OUTPUT ONLY:
Natco Pharma is scheduled to pay a dividend of ₹15 on February 26, 2024, as per the internal knowledge provided (RAG Source 2). This information is consistent with the internal data, which should be prioritized for accuracy.

Regarding a brief description of Natco Pharma, it is a pharmaceutical company based in India, known for its focus on the development, manufacture, and marketing of finished dosage formulations and active pharmaceutical ingredients. The company has a strong presence in the domestic market and also exports its products to various countries. Natco Pharma is recognized for its work in the oncology segment and has been involved in the production of generic versions of complex drugs. The company has declared a total of 37 dividends since November 10, 2005, and in the past 12 months, it has declared an equity dividend amounting to ₹6.00 (Web Source 2).

This synthesis of internal and external knowledge provides a comprehensive view of Natco