In [1]:
# Import required libraries for advanced RAG
import os
from pathlib import Path
from typing import Dict, List, Optional, Any
from pydantic import BaseModel, Field

# Core LlamaIndex components
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, Settings
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import VectorIndexRetriever

# Vector store
from llama_index.vector_stores.lancedb import LanceDBVectorStore

# Embeddings and LLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.openrouter import OpenRouter

# Advanced RAG components (we'll use these in the assignments)
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.response_synthesizers import TreeSummarize, Refine, CompactAndRefine
from llama_index.core.output_parsers import PydanticOutputParser

print("‚úÖ Advanced RAG libraries imported successfully!")


  from .autonotebook import tqdm as notebook_tqdm


‚úÖ Advanced RAG libraries imported successfully!


In [4]:
# Configure Advanced RAG Settings (Using OpenRouter)
def setup_advanced_rag_settings():
    """
    Configure LlamaIndex with optimized settings for advanced RAG.
    Uses local embeddings and OpenRouter for LLM operations.
    """
    import streamlit as st
    # Check for OpenRouter API key
    api_key = os.getenv("OPENROUTER_API_KEY") or st.secrets.get("OPENROUTER_API_KEY")
    if not api_key:
        print("‚ö†Ô∏è  OPENROUTER_API_KEY not found - LLM operations will be limited")
        print("   You can still complete postprocessor and retrieval exercises")
    else:
        print("‚úÖ OPENROUTER_API_KEY found - full advanced RAG functionality available")
        
        # Configure OpenRouter LLM
        Settings.llm = OpenRouter(
            api_key=api_key,
            model="gpt-4o",
            temperature=0.1  # Lower temperature for more consistent responses
        )
    
    # Configure local embeddings (no API key required)
    Settings.embed_model = HuggingFaceEmbedding(
        model_name="BAAI/bge-small-en-v1.5",
        trust_remote_code=True
    )
    
    # Advanced RAG configuration
    Settings.chunk_size = 512  # Smaller chunks for better precision
    Settings.chunk_overlap = 50
    
    print("‚úÖ Advanced RAG settings configured")
    print("   - Chunk size: 512 (optimized for precision)")
    print("   - Using local embeddings for cost efficiency")
    print("   - OpenRouter LLM ready for response synthesis")

# Setup the configuration
setup_advanced_rag_settings()


‚úÖ OPENROUTER_API_KEY found - full advanced RAG functionality available
‚úÖ Advanced RAG settings configured
   - Chunk size: 512 (optimized for precision)
   - Using local embeddings for cost efficiency
   - OpenRouter LLM ready for response synthesis


In [5]:
# Setup: Create index from Assignment 1 (reuse the basic functionality)
def setup_basic_index(data_folder: str = "../data", force_rebuild: bool = False):
    """
    Create a basic vector index that we'll enhance with advanced techniques.
    This reuses the concepts from Assignment 1.
    """
    # Create vector store
    vector_store = LanceDBVectorStore(
        uri="./advanced_rag_vectordb",
        table_name="documents"
    )
    
    # Load documents
    if not Path(data_folder).exists():
        print(f"‚ùå Data folder not found: {data_folder}")
        return None
        
    reader = SimpleDirectoryReader(input_dir=data_folder, recursive=True)
    documents = reader.load_data()
    
    # Create storage context and index
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex.from_documents(
        documents, 
        storage_context=storage_context,
        show_progress=True
    )
    
    print(f"‚úÖ Basic index created with {len(documents)} documents")
    print("   Ready for advanced RAG techniques!")
    return index

# Create the basic index
print("üìÅ Setting up basic index for advanced RAG...")
index = setup_basic_index()

if index:
    print("üöÄ Ready to implement advanced RAG techniques!")
else:
    print("‚ùå Failed to create index - check data folder path")


Table documents doesn't exist yet. Please add some data to create it.


üìÅ Setting up basic index for advanced RAG...
Failed to load file c:\Users\NSHAR\OneDrive - paramanands limited\AI_projects\src\20251102\session_2\assignments\..\data\audio\ai_agents.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...
Failed to load file c:\Users\NSHAR\OneDrive - paramanands limited\AI_projects\src\20251102\session_2\assignments\..\data\audio\in_the_end.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...
Failed to load file c:\Users\NSHAR\OneDrive - paramanands limited\AI_projects\src\20251102\session_2\assignments\..\data\audio\rags.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...


Parsing nodes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 39/39 [00:00<00:00, 73.56it/s]
Generating embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 92/92 [00:11<00:00,  7.81it/s]
2025-11-02 08:07:14,277 - INFO - Create new table documents adding data.


‚úÖ Basic index created with 39 documents
   Ready for advanced RAG techniques!
üöÄ Ready to implement advanced RAG techniques!


In [17]:
def create_query_engine_with_similarity_filter(index, similarity_cutoff: float = 0.3, top_k: int = 10):
    """
    Create a query engine that filters results based on similarity scores.
    
    TODO: Complete this function to create a query engine with similarity postprocessing.
    HINT: Use index.as_query_engine() with node_postprocessors parameter containing SimilarityPostprocessor
    
    Args:
        index: Vector index to query
        similarity_cutoff: Minimum similarity score (0.0 to 1.0)
        top_k: Number of initial results to retrieve before filtering
        
    Returns:
        Query engine with similarity filtering
    """
    # TODO: Create similarity postprocessor with the cutoff threshold
    similarity_processor = SimilarityPostprocessor(cutoff=similarity_cutoff)
    
    # TODO: Create query engine with similarity filtering
    query_engine = index.as_query_engine(
        retriever_kwargs={"similarity_top_k": top_k},
        node_postprocessors=[similarity_processor]
    )

    return query_engine

    # PLACEHOLDER - Replace with actual implementation
    print(f"TODO: Create query engine with similarity cutoff {similarity_cutoff}")
    return None

# Test the function
if index:
    filtered_engine = create_query_engine_with_similarity_filter(index, similarity_cutoff=0.3)
    
    if filtered_engine:
        print("‚úÖ Query engine with similarity filtering created")
        
        # Test query
        test_query = "What are the benefits of AI agents?"
        print(f"\nüîç Testing query: '{test_query}'")
        
        # Uncomment when implemented:
        response = filtered_engine.query(test_query)
        print(f"üìù Response: {response}")
        print("   (Complete the function above to test the response)")
    else:
        print("‚ùå Failed to create filtered query engine")
else:
    print("‚ùå No index available - run previous cells first")


2025-11-02 08:59:20,978 - INFO - query_type :, vector


‚úÖ Query engine with similarity filtering created

üîç Testing query: 'What are the benefits of AI agents?'


2025-11-02 08:59:21,535 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


üìù Response: AI agents offer several benefits, including the ability to tackle complex multi-step problems that require advanced problem-solving skills. They can be designed with well-defined system prompts, clear leadership, and task division, which enhance their effectiveness. Additionally, AI agents can incorporate dedicated phases for reasoning, planning, execution, and evaluation, as well as dynamic team structures and intelligent message filtering. These features make them more effective across various benchmarks and problem types. However, it's important to note that while promising, there are still challenges to address for their reliable application.
   (Complete the function above to test the response)


In [18]:
def create_query_engine_with_tree_summarize(index, top_k: int = 5):
    """
    Create a query engine that uses TreeSummarize for comprehensive responses.
    
    TODO: Complete this function to create a query engine with TreeSummarize synthesis.
    HINT: Create a TreeSummarize instance, then use index.as_query_engine() with response_synthesizer parameter
    
    Args:
        index: Vector index to query
        top_k: Number of results to retrieve
        
    Returns:
        Query engine with TreeSummarize synthesis
    """
    # TODO: Create TreeSummarize response synthesizer
    tree_synthesizer = TreeSummarize()
    
    # TODO: Create query engine with the synthesizer
    query_engine = index.as_query_engine(
        response_synthesizer=tree_synthesizer,
        retriever_kwargs={"top_k": top_k}
    )
    return query_engine
    # return query_engine
    
    # PLACEHOLDER - Replace with actual implementation
    print(f"TODO: Create query engine with TreeSummarize synthesis")
    return None

# Test the function
if index:
    tree_engine = create_query_engine_with_tree_summarize(index)
    
    if tree_engine:
        print("‚úÖ Query engine with TreeSummarize created")
        
        # Test with a complex analytical query
        analytical_query = "Compare the advantages and disadvantages of different AI agent frameworks"
        print(f"\nüîç Testing analytical query: '{analytical_query}'")
        
        # Uncomment when implemented:
        response = tree_engine.query(analytical_query)
        print(f"üìù TreeSummarize Response:\n{response}")
        print("   (Complete the function above to test comprehensive analysis)")
    else:
        print("‚ùå Failed to create TreeSummarize query engine")
else:
    print("‚ùå No index available - run previous cells first")


2025-11-02 08:59:33,718 - INFO - query_type :, vector


‚úÖ Query engine with TreeSummarize created

üîç Testing analytical query: 'Compare the advantages and disadvantages of different AI agent frameworks'


2025-11-02 08:59:34,265 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


üìù TreeSummarize Response:
Different AI agent frameworks offer various advantages and disadvantages based on their design and application. 

Advantages:
1. **Modularity and Composability**: Many frameworks, such as Agno and CrewAI, emphasize modularity and composability, allowing for flexible and rapid development. This makes it easier to integrate with existing cloud infrastructure and adapt to different use cases.
2. **Scalability**: Frameworks that incorporate multi-agent scaling laws can efficiently manage the performance of systems as the number of agents increases, which is beneficial for handling complex tasks.
3. **Advanced Problem-Solving**: Architectures that include well-defined system prompts, clear task division, and dynamic team structures are effective in tackling complex, multi-step problems.

Disadvantages:
1. **Complexity in Implementation**: The need for dedicated reasoning, planning, execution, and evaluation phases can increase the complexity of implementation an

In [19]:
# First, define the Pydantic models for structured outputs  
class ResearchPaperInfo(BaseModel):
    """Structured information about a research paper or AI concept."""
    title: str = Field(description="The main title or concept name")
    key_points: List[str] = Field(description="3-5 main points or findings")
    applications: List[str] = Field(description="Practical applications or use cases")
    summary: str = Field(description="Brief 2-3 sentence summary")

# Import the missing component
from llama_index.core.program import LLMTextCompletionProgram

def create_structured_output_program(output_model: BaseModel = ResearchPaperInfo):
    """
    Create a structured output program using Pydantic models.
    
    TODO: Complete this function to create a structured output program.
    HINT: Use LLMTextCompletionProgram.from_defaults() with PydanticOutputParser and a prompt template
    
    Args:
        output_model: Pydantic model class for structured output
        
    Returns:
        LLMTextCompletionProgram that returns structured data
    """
    # TODO: Create output parser with the Pydantic model
    output_parser = PydanticOutputParser(output_cls=output_model)
    #output_parser = PydanticOutputParser()
    
    # TODO: Create the structured output program
    program = LLMTextCompletionProgram.from_defaults(
        output_parser=output_parser,
        prompt_template_str=(
            "Extract the following information from the context:\n"
            "- title\n"
            "- key_points\n"
            "- applications\n"
            "- summary\n\n"
            "Context:\n{context}\n\n"
            "Provide the output in JSON format."
        )
    )

    return program

    # PLACEHOLDER - Replace with actual implementation
    print(f"TODO: Create structured output program with {output_model.__name__}")
    return None

# Test the function
if index:
    structured_program = create_structured_output_program(ResearchPaperInfo)
    
    if structured_program:
        print("‚úÖ Structured output program created")
        
        # Test with retrieval and structured extraction
        structure_query = "Tell me about AI agents and their capabilities"
        print(f"\nüîç Testing structured query: '{structure_query}'")
        
        # Get context for structured extraction (Uncomment when implemented)
        retriever = VectorIndexRetriever(index=index, similarity_top_k=3)
        nodes = retriever.retrieve(structure_query)
        context = "\n".join([node.text for node in nodes])
        
        # Uncomment when implemented:
        response = structured_program(context=context, query=structure_query)
        print(f"üìä Structured Response:\n{response}")
        print("   (Complete the function above to get structured JSON output)")
        
        print("\nüí° Expected output format:")
        print("   - title: String")
        print("   - key_points: List of strings")
        print("   - applications: List of strings") 
        print("   - summary: String")
    else:
        print("‚ùå Failed to create structured output program")
else:
    print("‚ùå No index available - run previous cells first")


2025-11-02 08:59:51,713 - INFO - query_type :, vector


‚úÖ Structured output program created

üîç Testing structured query: 'Tell me about AI agents and their capabilities'


2025-11-02 08:59:52,408 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


üìä Structured Response:
title='The Landscape of Emerging AI Agent Architectures for Reasoning, Planning, and Tool Calling: A Survey' key_points=['The best agent architecture varies based on use case, incorporating techniques like system prompts, task division, and feedback.', 'Single and multi-agent patterns are effective in solving complex tasks requiring reasoning and tool execution.', 'Dynamic team structures and intelligent message filtering enhance agent performance.', 'Current AI-driven agents face limitations in benchmarks, real-world applicability, and language model biases.', 'Future research should focus on improving agent evaluation and reliability.'] applications=['AI-driven agents for complex problem-solving tasks', 'Dynamic team structures in AI systems', 'Enhanced reasoning and planning in AI applications'] summary='This survey paper explores advancements in AI agent architectures, highlighting their capabilities in reasoning, planning, and tool execution. It discusses

In [20]:
def create_advanced_rag_pipeline(index, similarity_cutoff: float = 0.3, top_k: int = 10):
    """
    Create a comprehensive advanced RAG pipeline combining multiple techniques.
    
    TODO: Complete this function to create the ultimate advanced RAG query engine.
    HINT: Combine SimilarityPostprocessor + TreeSummarize using index.as_query_engine()
    
    Args:
        index: Vector index to query
        similarity_cutoff: Minimum similarity score for filtering
        top_k: Number of initial results to retrieve
        
    Returns:
        Advanced query engine with filtering and synthesis combined
    """
    # TODO: Create similarity postprocessor
    similarity_processor = SimilarityPostprocessor(cutoff=similarity_cutoff)
    
    # TODO: Create TreeSummarize for comprehensive responses
    tree_synthesizer = TreeSummarize()
    
    # TODO: Create the comprehensive query engine combining both techniques
    advanced_engine = index.as_query_engine(
        retriever_kwargs={"similarity_top_k": top_k},
        node_postprocessors=[similarity_processor],
        response_synthesizer=tree_synthesizer
    )

    return advanced_engine

    # PLACEHOLDER - Replace with actual implementation
    print(f"TODO: Create advanced RAG pipeline with all techniques")
    return None

# Test the comprehensive pipeline
if index:
    advanced_pipeline = create_advanced_rag_pipeline(index)
    
    if advanced_pipeline:
        print("‚úÖ Advanced RAG pipeline created successfully!")
        print("   üîß Similarity filtering: ‚úÖ")
        print("   üå≥ TreeSummarize synthesis: ‚úÖ")
        
        # Test with complex query
        complex_query = "Analyze the current state and future potential of AI agent technologies"
        print(f"\nüîç Testing complex query: '{complex_query}'")
        
        # Uncomment when implemented:
        response = advanced_pipeline.query(complex_query)
        print(f"üöÄ Advanced RAG Response:\n{response}")
        print("   (Complete the function above to test the full pipeline)")
        
        print("\nüéØ This should provide:")
        print("   - Filtered relevant results only")
        print("   - Comprehensive analytical response")
        print("   - Combined postprocessing and synthesis")
    else:
        print("‚ùå Failed to create advanced RAG pipeline")
else:
    print("‚ùå No index available - run previous cells first")


2025-11-02 09:03:24,395 - INFO - query_type :, vector


‚úÖ Advanced RAG pipeline created successfully!
   üîß Similarity filtering: ‚úÖ
   üå≥ TreeSummarize synthesis: ‚úÖ

üîç Testing complex query: 'Analyze the current state and future potential of AI agent technologies'


2025-11-02 09:03:25,347 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


üöÄ Advanced RAG Response:
The current state of AI agent technologies is characterized by advancements in reasoning, planning, and tool execution capabilities, enabling them to tackle complex, multi-step problems. Both single-agent and multi-agent architectures are being explored, with the choice of architecture depending on the specific use case. Effective agent systems often incorporate well-defined system prompts, clear leadership and task division, dedicated phases for reasoning, planning, execution, and evaluation, as well as dynamic team structures and feedback mechanisms.

Despite these advancements, there are notable limitations that need addressing, such as the development of comprehensive benchmarks, ensuring real-world applicability, and mitigating biases inherent in language models. The future potential of AI agent technologies lies in overcoming these challenges, which will enable the creation of more reliable and effective autonomous agents. The progression from static l