In [1]:
!pip install langchain langchain-community langchain-openai
!pip install chromadb sentence-transformers
!pip install pypdf pydantic
!pip install faiss-cpu tiktoken



In [None]:
!pip install

In [5]:
import os
from typing import List, Dict, Any, Optional, Literal
from dataclasses import dataclass
from enum import Enum

# LangChain imports
from langchain_classic.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma, FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_classic.schema import Document
from langchain_openai import ChatOpenAI

# Pydantic for structured outputs
from pydantic import BaseModel, Field

# Standard library
import json
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

print("All imports successful!")

All imports successful!


In [53]:
import os

class RAGConfig:
    OPENAI_API_KEY = "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"

    BASE_URL = "https://openrouter.ai/api/v1"
    LLM_MODEL = "liquid/lfm-2.5-1.2b-thinking:free"
    LLM_TEMPERATURE = 0.2

    EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

    VECTOR_DB_TYPE = "chroma"
    PERSIST_DIRECTORY = "./chroma_db"
    COLLECTION_NAME = "agentic_rag_docs"

    CHUNK_SIZE = 1000
    CHUNK_OVERLAP = 200

    TOP_K_DOCUMENTS = 5
    SIMILARITY_THRESHOLD = 0.7

    MAX_ITERATIONS = 5
    ENABLE_SELF_CORRECTION = True
    ENABLE_QUERY_DECOMPOSITION = True

os.environ["OPENAI_API_KEY"] = RAGConfig.OPENAI_API_KEY

In [54]:
from typing import List, Optional
from enum import Enum
from pydantic import BaseModel, Field


class QueryType(str, Enum):
    SIMPLE_FACTUAL = "simple_factual"
    COMPLEX_REASONING = "complex_reasoning"
    COMPARISON = "comparison"
    MULTI_HOP = "multi_hop"
    SUMMARIZATION = "summarization"


class QueryClassification(BaseModel):
    query_type: QueryType = Field(description="Type of query")
    complexity: int = Field(description="Complexity score from 1-10")
    requires_decomposition: bool = Field(description="Whether query needs to be broken down")
    reasoning: str = Field(description="Explanation of classification")


class SubQuery(BaseModel):
    question: str = Field(description="The sub-question")
    order: int = Field(description="Order of execution")
    dependencies: List[int] = Field(default=[], description="Indices of queries this depends on")


class QueryDecomposition(BaseModel):
    sub_queries: List[SubQuery] = Field(description="List of sub-queries")
    synthesis_instruction: str = Field(description="How to combine answers")


class RetrievalEvaluation(BaseModel):
    is_sufficient: bool = Field(description="Are documents sufficient to answer?")
    relevance_scores: List[float] = Field(description="Relevance score for each document")
    missing_info: Optional[str] = Field(description="What information is missing if insufficient")
    confidence: float = Field(description="Confidence in evaluation (0-1)")


class GenerationEvaluation(BaseModel):
    is_accurate: bool = Field(description="Is the answer accurate?")
    is_complete: bool = Field(description="Is the answer complete?")
    is_grounded: bool = Field(description="Is answer grounded in retrieved docs?")
    needs_refinement: bool = Field(description="Does answer need refinement?")
    issues: List[str] = Field(default=[], description="List of issues if any")
    confidence: float = Field(description="Confidence score (0-1)")

In [55]:
QUERY_CLASSIFICATION_PROMPT = """Analyze the following user query and classify it.

User Query: {query}

Classify based on:
1. Query Type: simple_factual, complex_reasoning, comparison, multi_hop, summarization
2. Complexity: Rate from 1-10
3. Whether it requires decomposition into sub-queries
4. Provide reasoning for your classification

Respond ONLY with valid JSON in this exact format (no markdown, no extra text):
{{
    "query_type": "simple_factual",
    "complexity": 5,
    "requires_decomposition": false,
    "reasoning": "explanation here"
}}"""


QUERY_DECOMPOSITION_PROMPT = """Decompose this complex query into simpler sub-queries.

User Query: {query}

Create sub-queries that:
1. Can be answered independently or with minimal dependencies
2. Are ordered logically
3. Together provide information to answer the original query

Respond ONLY with valid JSON (no markdown, no extra text):
{{
    "sub_queries": [
        {{"question": "sub-question 1", "order": 1, "dependencies": []}},
        {{"question": "sub-question 2", "order": 2, "dependencies": [1]}}
    ],
    "synthesis_instruction": "How to combine the answers"
}}"""


RETRIEVAL_EVALUATION_PROMPT = """Evaluate if these retrieved documents are sufficient to answer the query.

Query: {query}

Retrieved Documents:
{docs_text}

Evaluate:
1. Are documents sufficient? (true/false)
2. Relevance score for each document (0-1)
3. What information is missing if insufficient?
4. Confidence in your evaluation (0-1)

Respond ONLY with valid JSON (no markdown, no extra text):
{{
    "is_sufficient": true,
    "relevance_scores": [0.9, 0.8, 0.7, 0.6, 0.5],
    "missing_info": null,
    "confidence": 0.85
}}"""


ANSWER_GENERATION_PROMPT = """Answer the following question based ONLY on the provided documents.

Question: {query}

Retrieved Documents:
{docs_text}{context_str}

Instructions:
1. Provide a comprehensive answer based on the documents
2. Cite specific documents when making claims
3. If documents don't contain enough information, explicitly state what's missing
4. Be precise and factual
5. Do not add information not present in the documents

Answer:"""


GENERATION_EVALUATION_PROMPT = """Evaluate the quality of this generated answer.

Question: {query}

Generated Answer:
{answer}

Source Documents:
{docs_text}

Evaluate:
1. Is the answer accurate based on documents? (true/false)
2. Is the answer complete? (true/false)
3. Is the answer grounded in the documents (no hallucinations)? (true/false)
4. Does it need refinement? (true/false)
5. List any issues found
6. Confidence score (0-1)

Respond ONLY with valid JSON (no markdown, no extra text):
{{
    "is_accurate": true,
    "is_complete": true,
    "is_grounded": true,
    "needs_refinement": false,
    "issues": [],
    "confidence": 0.9
}}"""


ANSWER_REFINEMENT_PROMPT = """Refine the following answer to address the identified issues.

Question: {query}

Initial Answer:
{initial_answer}

Issues Identified:
{issues_text}

Source Documents:
{docs_text}

Instructions:
1. Address each issue mentioned
2. Ensure answer is grounded in documents
3. Improve completeness and accuracy
4. Maintain clarity and coherence

Refined Answer:"""

In [56]:
from typing import List
from pathlib import Path
from langchain_classic.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma, FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_classic.schema import Document
# from config import RAGConfig


class DocumentProcessor:
    def __init__(self, config: RAGConfig):
        self.config = config
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=config.CHUNK_SIZE,
            chunk_overlap=config.CHUNK_OVERLAP,
            separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
            length_function=len
        )
        self.embeddings = HuggingFaceEmbeddings(
            model_name=config.EMBEDDING_MODEL
        )
        self.vector_store = None

    def load_pdf_documents(self, pdf_paths: List[str]) -> List[Document]:
        all_documents = []

        for pdf_path in pdf_paths:
            print(f"Loading: {pdf_path}")
            loader = PyPDFLoader(pdf_path)
            documents = loader.load()

            for doc in documents:
                doc.metadata['source_file'] = Path(pdf_path).name

            all_documents.extend(documents)

        print(f"Loaded {len(all_documents)} pages from {len(pdf_paths)} PDFs")
        return all_documents

    def chunk_documents(self, documents: List[Document]) -> List[Document]:
        chunks = self.text_splitter.split_documents(documents)

        for i, chunk in enumerate(chunks):
            chunk.metadata['chunk_id'] = i

        print(f"Created {len(chunks)} chunks")
        return chunks

    def create_vector_store(self, chunks: List[Document]) -> None:
        if self.config.VECTOR_DB_TYPE == "chroma":
            self.vector_store = Chroma.from_documents(
                documents=chunks,
                embedding=self.embeddings,
                collection_name=self.config.COLLECTION_NAME,
                persist_directory=self.config.PERSIST_DIRECTORY
            )
            self.vector_store.persist()
        else:
            self.vector_store = FAISS.from_documents(
                documents=chunks,
                embedding=self.embeddings
            )
            self.vector_store.save_local(self.config.PERSIST_DIRECTORY)

        print(f"Vector store created with {len(chunks)} chunks")

    def load_existing_vector_store(self) -> None:
        if self.config.VECTOR_DB_TYPE == "chroma":
            self.vector_store = Chroma(
                collection_name=self.config.COLLECTION_NAME,
                embedding_function=self.embeddings,
                persist_directory=self.config.PERSIST_DIRECTORY
            )
        else:
            self.vector_store = FAISS.load_local(
                self.config.PERSIST_DIRECTORY,
                self.embeddings
            )

        print("Loaded existing vector store")

    def process_and_store(self, pdf_paths: List[str]) -> None:
        documents = self.load_pdf_documents(pdf_paths)
        chunks = self.chunk_documents(documents)
        self.create_vector_store(chunks)

In [57]:
from typing import List, Dict, Any
import json
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_classic.schema import Document
from langchain_openai import ChatOpenAI
# from config import RAGConfig
# from models import (
#     QueryClassification, QueryDecomposition,
#     RetrievalEvaluation, GenerationEvaluation
# )
# from prompts import (
#     QUERY_CLASSIFICATION_PROMPT, QUERY_DECOMPOSITION_PROMPT,
#     RETRIEVAL_EVALUATION_PROMPT, ANSWER_GENERATION_PROMPT,
#     GENERATION_EVALUATION_PROMPT, ANSWER_REFINEMENT_PROMPT
# )


class AgenticRAG:
    def __init__(self, config: RAGConfig, vector_store):
        self.config = config
        self.vector_store = vector_store

        self.llm = ChatOpenAI(
            model=config.LLM_MODEL,
            openai_api_key=config.OPENAI_API_KEY,
            openai_api_base=config.BASE_URL,
            temperature=config.LLM_TEMPERATURE,
            max_tokens=1024,
            default_headers={
                "HTTP-Referer": "https://company-chatbot.local",
                "X-Title": "Multi-Agent System",
            }
        )

        self.embeddings = HuggingFaceEmbeddings(
            model_name=config.EMBEDDING_MODEL
        )

    def _clean_json_response(self, content: str) -> str:
        content = content.strip()
        if content.startswith("```json"):
            content = content[7:]
        if content.startswith("```"):
            content = content[3:]
        if content.endswith("```"):
            content = content[:-3]
        return content.strip()

    def classify_query(self, query: str) -> QueryClassification:
        prompt = QUERY_CLASSIFICATION_PROMPT.format(query=query)
        response = self.llm.invoke(prompt)
        content = self._clean_json_response(response.content)

        try:
            result = json.loads(content)
            return QueryClassification(**result)
        except json.JSONDecodeError:
            return QueryClassification(
                query_type="simple_factual",
                complexity=5,
                requires_decomposition=False,
                reasoning="Default classification due to parsing error"
            )

    def decompose_query(self, query: str) -> QueryDecomposition:
        prompt = QUERY_DECOMPOSITION_PROMPT.format(query=query)
        response = self.llm.invoke(prompt)
        content = self._clean_json_response(response.content)

        try:
            result = json.loads(content)
            return QueryDecomposition(**result)
        except json.JSONDecodeError:
            return QueryDecomposition(
                sub_queries=[],
                synthesis_instruction="Combine answers sequentially"
            )

    def retrieve_documents(self, query: str, k: int = None) -> List[Document]:
        k = k or self.config.TOP_K_DOCUMENTS
        docs = self.vector_store.similarity_search(query, k=k)
        return docs

    def retrieve_with_scores(self, query: str, k: int = None) -> List[tuple]:
        k = k or self.config.TOP_K_DOCUMENTS
        docs_with_scores = self.vector_store.similarity_search_with_score(query, k=k)
        return docs_with_scores

    def evaluate_retrieval(self, query: str, documents: List[Document]) -> RetrievalEvaluation:
        docs_text = "\n\n".join([
            f"Document {i+1}:\n{doc.page_content[:500]}..."
            for i, doc in enumerate(documents)
        ])

        prompt = RETRIEVAL_EVALUATION_PROMPT.format(
            query=query,
            docs_text=docs_text
        )

        response = self.llm.invoke(prompt)
        content = self._clean_json_response(response.content)

        try:
            result = json.loads(content)
            return RetrievalEvaluation(**result)
        except json.JSONDecodeError:
            return RetrievalEvaluation(
                is_sufficient=True,
                relevance_scores=[0.5] * len(documents),
                missing_info=None,
                confidence=0.5
            )

    def generate_answer(self, query: str, documents: List[Document],
                       context: Dict[str, Any] = None) -> str:
        docs_text = "\n\n".join([
            f"Document {i+1} (Source: {doc.metadata.get('source_file', 'unknown')}):\n{doc.page_content}"
            for i, doc in enumerate(documents)
        ])

        context_str = ""
        if context:
            context_str = f"\n\nAdditional Context:\n{json.dumps(context, indent=2)}"

        prompt = ANSWER_GENERATION_PROMPT.format(
            query=query,
            docs_text=docs_text,
            context_str=context_str
        )

        response = self.llm.invoke(prompt)
        return response.content

    def evaluate_generation(self, query: str, answer: str,
                           documents: List[Document]) -> GenerationEvaluation:
        docs_text = "\n\n".join([
            f"Doc {i+1}: {doc.page_content[:300]}..."
            for i, doc in enumerate(documents)
        ])

        prompt = GENERATION_EVALUATION_PROMPT.format(
            query=query,
            answer=answer,
            docs_text=docs_text
        )

        response = self.llm.invoke(prompt)
        content = self._clean_json_response(response.content)

        try:
            result = json.loads(content)
            return GenerationEvaluation(**result)
        except json.JSONDecodeError:
            return GenerationEvaluation(
                is_accurate=True,
                is_complete=True,
                is_grounded=True,
                needs_refinement=False,
                issues=[],
                confidence=0.8
            )

    def refine_answer(self, query: str, initial_answer: str,
                     documents: List[Document], evaluation: GenerationEvaluation) -> str:
        docs_text = "\n\n".join([
            f"Doc {i+1}: {doc.page_content}"
            for i, doc in enumerate(documents)
        ])

        issues_text = "\n".join([f"- {issue}" for issue in evaluation.issues])

        prompt = ANSWER_REFINEMENT_PROMPT.format(
            query=query,
            initial_answer=initial_answer,
            issues_text=issues_text,
            docs_text=docs_text
        )

        response = self.llm.invoke(prompt)
        return response.content

    def process_query(self, query: str, verbose: bool = True) -> Dict[str, Any]:
        result = {
            "query": query,
            "classification": None,
            "decomposition": None,
            "sub_results": [],
            "retrieved_documents": [],
            "retrieval_evaluation": None,
            "initial_answer": None,
            "generation_evaluation": None,
            "final_answer": None,
            "iterations": 0,
            "metadata": {}
        }

        if verbose:
            print("\n" + "="*70)
            print("STEP 1: QUERY CLASSIFICATION")
            print("="*70)

        classification = self.classify_query(query)
        result["classification"] = classification.dict()

        if verbose:
            print(f"Query Type: {classification.query_type}")
            print(f"Complexity: {classification.complexity}/10")
            print(f"Requires Decomposition: {classification.requires_decomposition}")
            print(f"Reasoning: {classification.reasoning}")

        if classification.requires_decomposition and self.config.ENABLE_QUERY_DECOMPOSITION:
            if verbose:
                print("\n" + "="*70)
                print("STEP 2: QUERY DECOMPOSITION")
                print("="*70)

            decomposition = self.decompose_query(query)
            result["decomposition"] = decomposition.dict()

            if verbose:
                print(f"Decomposed into {len(decomposition.sub_queries)} sub-queries:")
                for sq in decomposition.sub_queries:
                    print(f"  {sq.order}. {sq.question}")

            sub_answers = {}
            for sub_query in sorted(decomposition.sub_queries, key=lambda x: x.order):
                if verbose:
                    print(f"\n  Processing sub-query {sub_query.order}: {sub_query.question}")

                sub_docs = self.retrieve_documents(sub_query.question, k=3)
                sub_answer = self.generate_answer(sub_query.question, sub_docs)
                sub_answers[sub_query.order] = sub_answer

                result["sub_results"].append({
                    "sub_query": sub_query.question,
                    "answer": sub_answer
                })

            synthesis_context = {
                "sub_answers": sub_answers,
                "synthesis_instruction": decomposition.synthesis_instruction
            }

            documents = self.retrieve_documents(query)
            result["retrieved_documents"] = [doc.page_content for doc in documents]
            answer = self.generate_answer(query, documents, context=synthesis_context)

        else:
            if verbose:
                print("\n" + "="*70)
                print("STEP 2: DOCUMENT RETRIEVAL")
                print("="*70)

            documents = self.retrieve_documents(query)
            result["retrieved_documents"] = [doc.page_content for doc in documents]

            if verbose:
                print(f"Retrieved {len(documents)} documents")

            if verbose:
                print("\n" + "="*70)
                print("STEP 3: RETRIEVAL EVALUATION")
                print("="*70)

            retrieval_eval = self.evaluate_retrieval(query, documents)
            result["retrieval_evaluation"] = retrieval_eval.dict()

            if verbose:
                print(f"Sufficient: {retrieval_eval.is_sufficient}")
                print(f"Confidence: {retrieval_eval.confidence}")
                if not retrieval_eval.is_sufficient:
                    print(f"Missing Info: {retrieval_eval.missing_info}")

            if verbose:
                print("\n" + "="*70)
                print("STEP 4: ANSWER GENERATION")
                print("="*70)

            answer = self.generate_answer(query, documents)

        result["initial_answer"] = answer

        if self.config.ENABLE_SELF_CORRECTION:
            if verbose:
                print("\n" + "="*70)
                print("STEP 5: ANSWER EVALUATION & REFINEMENT")
                print("="*70)

            for iteration in range(self.config.MAX_ITERATIONS):
                result["iterations"] = iteration + 1

                gen_eval = self.evaluate_generation(query, answer, documents)
                result["generation_evaluation"] = gen_eval.dict()

                if verbose:
                    print(f"\nIteration {iteration + 1}:")
                    print(f"  Accurate: {gen_eval.is_accurate}")
                    print(f"  Complete: {gen_eval.is_complete}")
                    print(f"  Grounded: {gen_eval.is_grounded}")
                    print(f"  Confidence: {gen_eval.confidence}")

                if not gen_eval.needs_refinement or gen_eval.confidence > 0.9:
                    if verbose:
                        print("  ‚úÖ Answer quality satisfactory!")
                    break

                if verbose:
                    print(f"  Issues: {gen_eval.issues}")
                    print("  Refining answer...")

                answer = self.refine_answer(query, answer, documents, gen_eval)

        result["final_answer"] = answer

        if verbose:
            print("\n" + "="*70)
            print("FINAL ANSWER")
            print("="*70)
            print(answer)
            print("\n")

        return result

In [58]:
import warnings
warnings.filterwarnings('ignore')

# from config import RAGConfig
# from document_processor import DocumentProcessor
# from agentic_rag import AgenticRAG


def setup_system(pdf_files: list[str], rebuild_db: bool = False):
    print("="*70)
    print("AGENTIC RAG SYSTEM - INITIALIZATION")
    print("="*70)

    config = RAGConfig()

    print("\nüìÑ Setting up document processor...")
    processor = DocumentProcessor(config)

    if rebuild_db:
        print("\nüî® Building vector database...")
        processor.process_and_store(pdf_files)
    else:
        print("\nüìö Loading existing vector database...")
        processor.load_existing_vector_store()

    print("\nü§ñ Initializing Agentic RAG system...")
    rag = AgenticRAG(config, processor.vector_store)

    print("\n‚úÖ System ready!")
    return rag, processor


def run_example_query(rag: AgenticRAG):
    print("\n" + "="*70)
    print("EXAMPLE QUERY")
    print("="*70)

    query = "What is the main topic discussed in the document?"
    result = rag.process_query(query, verbose=True)

    return result


def interactive_mode(rag: AgenticRAG):
    print("\n" + "="*70)
    print("INTERACTIVE MODE")
    print("="*70)
    print("Enter your questions (type 'exit' to quit, 'help' for options)")
    print("="*70)

    verbose = True

    while True:
        query = input("\nüîç Your question: ").strip()

        if query.lower() == 'exit':
            print("üëã Goodbye!")
            break

        if query.lower() == 'help':
            print("\nCommands:")
            print("  exit - Exit interactive mode")
            print("  help - Show this help message")
            print("  verbose on - Enable verbose output")
            print("  verbose off - Disable verbose output")
            continue

        if query.lower() == 'verbose on':
            verbose = True
            print("‚úÖ Verbose mode enabled")
            continue

        if query.lower() == 'verbose off':
            verbose = False
            print("‚úÖ Verbose mode disabled")
            continue

        if not query:
            continue

        result = rag.process_query(query, verbose=verbose)


def main():
    pdf_files = [
        "./Umair_Ali.pdf"
    ]

    rag_system, doc_processor = setup_system(pdf_files, rebuild_db=False)

    run_example_query(rag_system)

    interactive_mode(rag_system)

    print("\n‚úÖ Agentic RAG system demonstration complete!")


if __name__ == "__main__":
    main()

AGENTIC RAG SYSTEM - INITIALIZATION

üìÑ Setting up document processor...

üìö Loading existing vector database...
Loaded existing vector store

ü§ñ Initializing Agentic RAG system...

‚úÖ System ready!

EXAMPLE QUERY

STEP 1: QUERY CLASSIFICATION
Query Type: QueryType.SIMPLE_FACTUAL
Complexity: 5/10
Requires Decomposition: False
Reasoning: Default classification due to parsing error

STEP 2: DOCUMENT RETRIEVAL
Retrieved 0 documents

STEP 3: RETRIEVAL EVALUATION
Sufficient: True
Confidence: 0.85

STEP 4: ANSWER GENERATION

STEP 5: ANSWER EVALUATION & REFINEMENT

Iteration 1:
  Accurate: True
  Complete: True
  Grounded: True
  Confidence: 0.8
  ‚úÖ Answer quality satisfactory!

FINAL ANSWER




INTERACTIVE MODE
Enter your questions (type 'exit' to quit, 'help' for options)

üîç Your question: what is the experience of applier 

STEP 1: QUERY CLASSIFICATION
Query Type: QueryType.SIMPLE_FACTUAL
Complexity: 4/10
Requires Decomposition: False
Reasoning: The query asks for a direct fact