## **Config (paths, model names, imports)**


In [1]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Suppress HuggingFace tokenizers parallelism warning in Jupyter notebooks
# This prevents the "forked process" warning when using HuggingFaceEmbeddings
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

# Set LangChain configuration from environment variables
os.environ['LANGSMITH_TRACING'] = os.getenv('LANGSMITH_TRACING')
os.environ['LANGSMITH_ENDPOINT'] = os.getenv('LANGSMITH_ENDPOINT')
os.environ['LANGSMITH_API_KEY'] = os.getenv('LANGSMITH_API_KEY')    
os.environ['LANGSMITH_PROJECT'] = os.getenv('LANGSMITH_PROJECT')

#API KEY
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')

In [2]:
# Necessary packages
from langchain_community.document_loaders import DirectoryLoader, TextLoader
import yaml
from pathlib import Path
from typing import Dict, List, Any

  from .autonotebook import tqdm as notebook_tqdm


## **Extract metadata from mkdocs.yml**


In [3]:
# Load mkdocs.yml configuration
# Get the project root directory by checking for fastapi directory
current_dir = Path.cwd()
# Check if fastapi directory exists in current dir or parent
if (current_dir / "fastapi").exists():
    project_root = current_dir
elif (current_dir.parent / "fastapi").exists():
    project_root = current_dir.parent
else:
    # Fallback: assume parent directory (notebook is in rag-learning/rag-learning/)
    project_root = current_dir.parent

mkdocs_path = project_root / "fastapi" / "docs" / "en" / "mkdocs.yml"
docs_base_path = project_root / "fastapi" / "docs" / "en" / "docs"

# Convert to string for compatibility
mkdocs_path = str(mkdocs_path)
docs_base_path = str(docs_base_path)

# Pre-process the YAML file to handle the problematic !!python/name: tag
# We'll replace it with a null value since we only need the 'nav' section
import re

with open(mkdocs_path, 'r') as f:
    yaml_content = f.read()
    
# Replace the problematic Python tag with null using regex
# This handles the tag whether it's on its own or part of a value
yaml_content = re.sub(r'!!python/name:[^\s]+', 'null', yaml_content)

# Now parse the cleaned YAML
mkdocs_config = yaml.safe_load(yaml_content)

# Extract navigation structure
nav_structure = mkdocs_config.get('nav', [])

# Build a mapping from file paths to their metadata
def extract_nav_metadata(nav_items: List[Any], parent_path: List[str] = None) -> Dict[str, Dict[str, Any]]:
    """
    Recursively extract navigation metadata from mkdocs nav structure.
    Returns a dictionary mapping file paths to their metadata.
    """
    if parent_path is None:
        parent_path = []
    
    metadata_map = {}
    
    for item in nav_items:
        if isinstance(item, dict):
            # Handle dictionary items (e.g., {"Tutorial - User Guide": [...]})
            for key, value in item.items():
                if isinstance(value, list):
                    # Recursive case: nested navigation
                    new_path = parent_path + [key]
                    metadata_map.update(extract_nav_metadata(value, new_path))
                elif isinstance(value, str):
                    # Leaf case: key is section name, value is file path
                    file_path = value
                    metadata_map[file_path] = {
                        'section': key,
                        'category_path': parent_path + [key],
                        'top_level_category': parent_path[0] if parent_path else key,
                        'subcategory': parent_path[-1] if len(parent_path) > 1 else None,
                    }
        elif isinstance(item, str):
            # Handle simple string items (e.g., "tutorial/first-steps.md")
            file_path = item
            metadata_map[file_path] = {
                'section': None,
                'category_path': parent_path.copy(),
                'top_level_category': parent_path[0] if parent_path else 'Root',
                'subcategory': parent_path[-1] if parent_path else None,
            }
    
    return metadata_map

# Create the metadata mapping
nav_metadata_map = extract_nav_metadata(nav_structure)

# Display some examples
print(f"Total documents in navigation: {len(nav_metadata_map)}")
print("\nExample metadata entries:")
for i, (path, meta) in enumerate(list(nav_metadata_map.items())[:5]):
    print(f"\n{i+1}. {path}")
    print(f"   Category Path: {' > '.join(meta['category_path'])}")
    print(f"   Top Level: {meta['top_level_category']}")
    if meta['subcategory']:
        print(f"   Subcategory: {meta['subcategory']}")


Total documents in navigation: 144

Example metadata entries:

1. index.md
   Category Path: FastAPI
   Top Level: FastAPI

2. features.md
   Category Path: 
   Top Level: Root

3. learn/index.md
   Category Path: Learn
   Top Level: Learn
   Subcategory: Learn

4. python-types.md
   Category Path: Learn
   Top Level: Learn
   Subcategory: Learn

5. async.md
   Category Path: Learn
   Top Level: Learn
   Subcategory: Learn


## **Build dense vs lightweight corpora**


In [4]:
# Helper function to enrich documents with mkdocs metadata
def enrich_document_metadata(doc, metadata_map: Dict[str, Dict[str, Any]], docs_base_path: str):
    """Add mkdocs navigation metadata to a document based on its file path."""
    # Get the relative path from the document's source
    source_path = doc.metadata.get('source', '')
    
    # Convert absolute path to relative path from docs base
    if source_path.startswith(docs_base_path):
        relative_path = source_path[len(docs_base_path):].lstrip('/')
        
        # Normalize paths for comparison (handle both with/without .md extension)
        relative_path_normalized = relative_path.replace('.md', '')
        
        # Try exact match first
        if relative_path in metadata_map:
            nav_meta = metadata_map[relative_path]
        else:
            # Try matching by normalized path (without .md)
            nav_meta = None
            for nav_path, meta in metadata_map.items():
                nav_path_normalized = nav_path.replace('.md', '')
                # Match if paths are the same when normalized
                if nav_path_normalized == relative_path_normalized:
                    nav_meta = meta
                    break
                # Also try if the relative path ends with the nav path
                if relative_path.endswith(nav_path) or nav_path in relative_path:
                    nav_meta = meta
                    break
        
        # Add metadata if found
        if nav_meta:
            doc.metadata['section'] = nav_meta.get('section')
            doc.metadata['category_path'] = ' > '.join(nav_meta.get('category_path', []))
            doc.metadata['top_level_category'] = nav_meta.get('top_level_category')
            doc.metadata['subcategory'] = nav_meta.get('subcategory')
    
    return doc

# Dense corpus: tutorials + advanced guides
dense_loader = DirectoryLoader(
    docs_base_path,
    glob="tutorial/**/*.md",
    loader_cls=TextLoader,
)
dense_docs = dense_loader.load()
for d in dense_docs:
    d.metadata["corpus"] = "dense_docs"
    # Enrich with mkdocs metadata
    enrich_document_metadata(d, nav_metadata_map, docs_base_path)

# Lightweight corpus: e.g. fastapi-best-practices repo
faq_path = project_root / "fastapi-best-practices"
faq_loader = DirectoryLoader(
    str(faq_path),
    glob="README.md",
    loader_cls=TextLoader,
)
faq_docs = faq_loader.load()
for d in faq_docs:
    d.metadata["corpus"] = "faq_docs"

# Display enriched metadata
print("Sample enriched document metadata:")
print(dense_docs[2].metadata)

Sample enriched document metadata:
{'source': '/Users/dimitar/Desktop/Software_Dev/rag-learning/fastapi/docs/en/docs/tutorial/query-params.md', 'corpus': 'dense_docs', 'section': None, 'category_path': 'Learn > Tutorial - User Guide', 'top_level_category': 'Learn', 'subcategory': 'Tutorial - User Guide'}


In [5]:
from typing import Optional
from datetime import date
from pydantic import BaseModel, Field

class FastAPISearch(BaseModel):
    """
    Structured query for searching over FastAPI documentation chunks.
    The LLM will fill this from a natural-language question.
    """

    # What to search for semantically
    text: str = Field(
        ...,
        description=(
            "Main semantic search query over the document content. "
            "Use natural language describing the user's problem or question."
        ),
    )

    # High-level doc classification
    top_level_category: Optional[str] = Field(
        None,
        description=(
            "Top-level documentation category, such as 'Learn', "
            "'Reference', or 'Tutorials'. "
            "Use when the user seems to want conceptual/how-to material "
            "vs pure API reference."
        ),
    )

    subcategory: Optional[str] = Field(
        None,
        description=(
            "More specific documentation subcategory, such as "
            "'Tutorial - User Guide', 'Advanced User Guide', etc. "
            "Use when the user implicitly asks for guides or tutorials."
        ),
    )

    corpus: Optional[str] = Field(
        None,
        description=(
            "Internal corpus label (for example 'dense_docs', 'api_reference', "
            "'examples'). Use when the user implicitly wants a particular type "
            "of docs (e.g., reference vs prose)."
        ),
    )

    # Optional filters you might add later if your metadata has them
    earliest_publish_date: Optional[date] = Field(
        None,
        description=(
            "Earliest publish date for documents, inclusive. "
            "Use only if the user explicitly cares about recent or old docs."
        ),
    )

    latest_publish_date: Optional[date] = Field(
        None,
        description=(
            "Latest publish date for documents, exclusive. "
            "Use only if the user explicitly limits the time range."
        ),
    )

    def pretty_print(self) -> None:
        # Access model_fields from the class, not the instance (Pydantic v2.11+)
        model_fields = self.__class__.model_fields
        for field_name in model_fields.keys():
            field_value = getattr(self, field_name, None)
            field_info = model_fields[field_name]
            # Get default value, handling Pydantic v2 FieldInfo structure
            default_value = getattr(field_info, 'default', None)
            # Skip if value is None or equals the default
            if field_value is not None and field_value != default_value:
                print(f"{field_name}: {field_value}")


In [6]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

system = """You are an assistant that converts natural language questions
into structured search queries for the FastAPI documentation.

Your job is to:
- Extract a semantic search text that captures the user's problem or topic.
- Set metadata fields (top_level_category, subcategory, corpus, etc.)
  only when they are clearly implied by the question.

If you are unsure about a field, leave it empty (null).
Do NOT invent metadata values that are not supported by the schema.
Do NOT try to expand or reinterpret unknown acronyms; keep them as-is."""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)  # or any chat model
structured_llm = llm.with_structured_output(FastAPISearch)
query_analyzer = prompt | structured_llm


## **Chunk + embed + index (two vector stores)**


In [7]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
    chunk_overlap=150,
    length_function=len,)

# dense_docs and faq_docs already loaded and metadata-enriched earlier
dense_chunks = text_splitter.split_documents(dense_docs)
faq_chunks = text_splitter.split_documents(faq_docs)

print(f"Dense chunks: {len(dense_chunks)}")
print(f"FAQ chunks:   {len(faq_chunks)}")


Dense chunks: 388
FAQ chunks:   42


In [8]:
# Embedding
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [9]:
vectordb_dir = Path("chroma_fastapi")
vectordb_dir.mkdir(exist_ok=True)

# Create vector stores with automatic persistence
# When persist_directory is provided, Chroma automatically persists to disk
dense_vs = Chroma.from_documents(
    documents=dense_chunks,
    embedding=embeddings, 
    collection_name="fastapi_dense_docs",
    persist_directory=str(vectordb_dir / "dense"),
)

faq_vs = Chroma.from_documents(
    documents=faq_chunks,
    embedding=embeddings, 
    collection_name="fastapi_faq_docs",
    persist_directory=str(vectordb_dir / "faq"),
)


In [None]:
# Create retrievers with search configuration
# Using the modern LangChain API with search_kwargs
dense_retriever = dense_vs.as_retriever(search_kwargs={"k": 4})
faq_retriever = faq_vs.as_retriever(search_kwargs={"k": 2})


## **Implement routing (which corpus, which strategy)**


In [11]:
def route_query(query: str) -> Dict[str, str]:
    """
    Route a query to the appropriate corpus(es) using rule-based logic.
    
    NOTE: faq_docs corpus is limited (single README.md), so routing is conservative.
    Only routes to faq_docs for very specific error/troubleshooting queries.
    
    Args:
        query: The user's search query
        
    Returns:
        Dictionary with "corpus" key set to "dense_docs", "faq_docs", or "both"
    """
    query_lower = query.lower()
    
    # Step 1: Keyword-based routing
    # FAQ indicators: STRONG error/debugging signals only (faq_docs is limited)
    # Only route to faq_docs for very specific troubleshooting queries
    strong_faq_keywords = [
        "500", "404", "403", "401", "400",  # HTTP error codes
        "stack trace", "traceback",  # Specific error indicators
        "doesn't work", "not working", "broken",  # Clear problem statements
        "troubleshoot", "debug",  # Explicit debugging intent
    ]
    
    # Moderate FAQ indicators: may use "both" for these
    moderate_faq_keywords = [
        "error", "exception", "failed", "failure", "crash", "issue", "bug", "problem"
    ]
    
    # Dense docs indicators: tutorials, guides, concepts, best practices
    # Note: "best practices" queries go to dense_docs since faq_docs is limited
    dense_keywords = [
        "tutorial", "how to", "how do", "guide", "best practices", "best practice",
        "architecture", "design", "deployment", "security", "dependency injection",
        "background tasks", "middleware", "async", "asynchronous", "concurrency",
        "testing", "cors", "authentication", "authorization", "validation",
        "pydantic", "openapi", "swagger", "websocket", "websockets",
        "project structure", "structure", "conventions", "patterns"
    ]
    
    # Count matches
    strong_faq_score = sum(1 for keyword in strong_faq_keywords if keyword in query_lower)
    moderate_faq_score = sum(1 for keyword in moderate_faq_keywords if keyword in query_lower)
    dense_score = sum(1 for keyword in dense_keywords if keyword in query_lower)
    
    # Step 2: Query length analysis
    query_length = len(query.split())
    is_short_query = query_length <= 3
    is_long_query = query_length > 10
    
    # Step 3: Decision logic (conservative - favor dense_docs)
    
    # Very strong FAQ signal (error codes, explicit debugging) → faq_docs
    if strong_faq_score > 0:
        return {"corpus": "faq_docs"}
    
    # Strong dense signal → dense_docs
    if dense_score > 0 and dense_score > moderate_faq_score:
        return {"corpus": "dense_docs"}
    
    # Moderate FAQ + dense signals → use both (faq might have relevant troubleshooting)
    if moderate_faq_score > 0 and dense_score > 0:
        return {"corpus": "both"}
    
    # Moderate FAQ only, but query is long/specific → both (to be safe)
    if moderate_faq_score > 0 and is_long_query:
        return {"corpus": "both"}
    
    # Moderate FAQ only, short query → dense_docs (faq_docs too limited)
    if moderate_faq_score > 0:
        return {"corpus": "dense_docs"}
    
    # Short, generic queries → dense_docs (tutorials/guides)
    if is_short_query:
        return {"corpus": "dense_docs"}
    
    # Default: dense_docs (more comprehensive corpus)
    return {"corpus": "dense_docs"}


def get_retriever_for_query(query: str, dense_retriever, faq_retriever):
    """
    Get the appropriate retriever(s) based on query routing.
    
    Args:
        query: The user's search query
        dense_retriever: Retriever for dense_docs corpus
        faq_retriever: Retriever for faq_docs corpus
        
    Returns:
        Single retriever or list of retrievers based on routing decision
    """
    routing = route_query(query)
    corpus = routing["corpus"]
    
    if corpus == "dense_docs":
        return dense_retriever
    elif corpus == "faq_docs":
        return faq_retriever
    else:  # "both"
        return [dense_retriever, faq_retriever]


## **Implement query translation (multi-query, maybe HyDE later)**


In [16]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate three 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

generate_queries = (
    prompt_perspectives 
    | llm
    | StrOutputParser() 
    # Split on newlines, strip whitespace, drop empties, and cap to 3 variants
    | (lambda x: [q.strip() for q in x.split("\n") if q.strip()][:3])
)

## **Retrieval + fusion + answer generation**


In [17]:
from operator import itemgetter
from langchain_core.load import dumps, loads
from langchain_core.runnables import RunnableLambda

def get_unique_union(documents: list[list]):
    """Unique union of retrieved docs from a list of lists."""
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return Documents reconstructed from their string representation
    return [loads(doc) for doc in unique_docs]


def retrieve_multi_corpus(queries: list[str]):
    """Retrieve documents for multiple query variants across routed corpora.

    - Takes the list of reformulated queries from `generate_queries`.
    - Uses `get_retriever_for_query` to decide which retriever(s) to use.
    - Returns a list-of-lists of Documents, one sublist per query, suitable for
      `get_unique_union`.
    """
    all_results: list[list] = []
    for q in queries:
        retriever_or_list = get_retriever_for_query(q, dense_retriever, faq_retriever)
        docs_for_q = []
        # Handle the case where routing returns multiple retrievers ("both")
        if isinstance(retriever_or_list, list):
            for r in retriever_or_list:
                docs_for_q.extend(r.invoke(q))
        else:
            docs_for_q.extend(retriever_or_list.invoke(q))
        all_results.append(docs_for_q)
    return all_results


# Full retrieval chain: question -> multi-query -> routed retrieval -> dedup
retrieval_chain = (
    generate_queries
    | RunnableLambda(retrieve_multi_corpus)
    | get_unique_union
)

# RAG Chain
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

question = "What is dependency injection in FastAPI and how do I use it?"
final_rag_chain.invoke({"question":question})


'Dependency injection in FastAPI is a system that allows your code to declare things it requires to work and use, called "dependencies." FastAPI will then take care of providing these dependencies to your code. This is useful for shared logic, database connections, security, authentication, and more. \n\nTo use dependency injection in FastAPI, you can define dependencies that can be injected into your path operation functions. You can do this by using the `Depends` class and passing the dependency to it. For example:\n```\nfrom fastapi import FastAPI, Depends\n\napp = FastAPI()\n\ndef dependency_function():\n    # This is a dependency function that returns a value\n    return "Dependency value"\n\n@app.get("/")\ndef read_root(dependency: str = Depends(dependency_function)):\n    # This is a path operation function that depends on the dependency function\n    return {"dependency": dependency}\n```\nIn this example, the `read_root` function depends on the `dependency_function`, which is 

## **Small evaluation loop**


In [None]:
from typing import List, Dict, Any
import textwrap

# Small, cheap evaluation set tailored to FastAPI docs
EVAL_QUESTIONS: List[Dict[str, Any]] = [
    {
        "question": "What is dependency injection in FastAPI and how do I use it?",
        "keywords": ["Depends", "dependency injection"],
    },
    {
        "question": "How do I define a Pydantic model for a request body in FastAPI?",
        "keywords": ["BaseModel", "request body", "pydantic"],
    },
    {
        "question": "How can I run background tasks in FastAPI?",
        "keywords": ["BackgroundTasks", "background task"],
    },
    {
        "question": "How do I enable CORS in FastAPI?",
        "keywords": ["CORSMiddleware", "add_middleware"],
    },
    {
        "question": "How do I declare path and query parameters in FastAPI?",
        "keywords": ["path parameter", "query parameter", "type hints"],
    },
]


def score_answer_keywords(answer: str, keywords: List[str]) -> float:
    """Very small heuristic: fraction of keywords that appear in the answer."""
    answer_lower = answer.lower()
    if not keywords:
        return 0.0
    hits = sum(1 for kw in keywords if kw.lower() in answer_lower)
    return hits / len(keywords)


def run_small_eval(show_sources: bool = False) -> List[Dict[str, Any]]:
    """Run a lightweight eval loop over EVAL_QUESTIONS.

    - Uses `final_rag_chain` to generate answers.
    - Scores answers by keyword coverage (0–1).
    - Optionally prints which source files were retrieved.
    """
    results: List[Dict[str, Any]] = []

    for ex in EVAL_QUESTIONS:
        q = ex["question"]
        keywords = ex.get("keywords", [])

        # Generate answer with current RAG pipeline
        answer = final_rag_chain.invoke({"question": q})
        score = score_answer_keywords(answer, keywords)

        # Inspect retrieval for debugging (optional)
        sources = []
        if show_sources:
            docs = retrieval_chain.invoke({"question": q})
            sources = sorted({d.metadata.get("source", "") for d in docs})

        result = {
            "question": q,
            "keywords": keywords,
            "score": score,
            "answer": answer,
            "sources": sources,
        }
        results.append(result)

        # Compact console view
        print("\n" + "=" * 80)
        print(f"Q: {q}")
        print(f"Score (keyword coverage): {score:.2f}")
        if show_sources and sources:
            print("Sources:")
            for s in sources[:5]:
                print(f"  - {s}")
        print("Answer snippet:")
        print(textwrap.shorten(answer.replace("\n", " "), width=400, placeholder=" ..."))

    avg_score = sum(r["score"] for r in results) / len(results)
    print("\n" + "#" * 80)
    print(f"Average keyword score over {len(results)} questions: {avg_score:.2f}")

    return results


# Example usage (uncomment to run):
# eval_results = run_small_eval(show_sources=True)



Q: What is dependency injection in FastAPI and how do I use it?
Score (keyword coverage): 1.00
Sources:
  - /Users/dimitar/Desktop/Software_Dev/rag-learning/fastapi/docs/en/docs/tutorial/dependencies/index.md
Answer snippet:
Dependency injection in FastAPI is a system that allows you to declare things that your code requires to work and use, and then the system takes care of providing those dependencies. This is useful when you need to have shared logic, share database connections, enforce security, authentication, role requirements, etc. To use dependency injection in FastAPI, you can define dependencies that can ...


KeyboardInterrupt: 