# Assignment 3b Solution: Advanced Gradio RAG Frontend
## Day 6 Session 2 - Building Configurable RAG Applications

This notebook contains the complete solution for Assignment 3b.

**Solution demonstrates:**
- Advanced Gradio layout with columns and complex controls
- All RAG configuration parameters using OpenRouter (not OpenAI)
- Professional UI patterns and component organization
- Dynamic parameter handling and real-time configuration display

**Note:** This solution uses OpenRouter for LLM access. Make sure you have your `OPENROUTER_API_KEY` environment variable set.


In [1]:
# Import all required libraries
import gradio as gr
import os
from pathlib import Path
from typing import Dict, List, Optional, Any

# LlamaIndex core components
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, Settings
from llama_index.vector_stores.lancedb import LanceDBVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.openrouter import OpenRouter  # Using OpenRouter, not OpenAI

# Advanced RAG components
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.response_synthesizers import TreeSummarize, Refine, CompactAndRefine
from llama_index.core.retrievers import VectorIndexRetriever

print("✅ All libraries imported successfully!")


  from .autonotebook import tqdm as notebook_tqdm


✅ All libraries imported successfully!


In [2]:
class AdvancedRAGBackend:
    """Advanced RAG backend with configurable parameters."""
    
    def __init__(self):
        self.index = None
        self.available_models = ["gpt-4o", "gpt-4o-mini"]
        self.available_postprocessors = ["SimilarityPostprocessor"]
        self.available_synthesizers = ["TreeSummarize", "Refine", "CompactAndRefine", "Default"]
        self.update_settings()
        
    def update_settings(self, model: str = "gpt-4o-mini", temperature: float = 0.1, chunk_size: int = 512, chunk_overlap: int = 50):
        """Update LlamaIndex settings based on user configuration."""
        # Use OpenRouter API key (not OpenAI)
        api_key = os.getenv("OPENROUTER_API_KEY")
        if api_key:
            Settings.llm = OpenRouter(api_key=api_key, model=model, temperature=temperature)
        Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5", trust_remote_code=True)
        Settings.chunk_size = chunk_size
        Settings.chunk_overlap = chunk_overlap
    
    def initialize_database(self, data_folder="../data"):
        """Initialize the vector database with documents."""
        if not Path(data_folder).exists():
            return f"❌ Data folder '{data_folder}' not found!"
        
        try:
            vector_store = LanceDBVectorStore(uri="./advanced_rag_vectordb", table_name="documents")
            reader = SimpleDirectoryReader(input_dir=data_folder, recursive=True)
            documents = reader.load_data()
            storage_context = StorageContext.from_defaults(vector_store=vector_store)
            self.index = VectorStoreIndex.from_documents(documents, storage_context=storage_context, show_progress=True)
            return f"✅ Database initialized successfully with {len(documents)} documents!"
        except Exception as e:
            return f"❌ Error initializing database: {str(e)}"
    
    def get_postprocessor(self, postprocessor_name: str, similarity_cutoff: float):
        """Get the selected postprocessor."""
        if postprocessor_name == "SimilarityPostprocessor":
            return SimilarityPostprocessor(similarity_cutoff=similarity_cutoff)
        return None
    
    def get_synthesizer(self, synthesizer_name: str):
        """Get the selected response synthesizer."""
        synthesizers = {
            "TreeSummarize": TreeSummarize(),
            "Refine": Refine(),
            "CompactAndRefine": CompactAndRefine()
        }
        return synthesizers.get(synthesizer_name)
    
    def advanced_query(self, question: str, model: str, temperature: float, 
                      chunk_size: int, chunk_overlap: int, similarity_top_k: int,
                      postprocessor_names: List[str], similarity_cutoff: float,
                      synthesizer_name: str) -> Dict[str, Any]:
        """Query the RAG system with advanced configuration."""
        if self.index is None:
            return {"response": "❌ Please initialize the database first!", "sources": [], "config": {}}
        if not question or not question.strip():
            return {"response": "⚠️ Please enter a question first!", "sources": [], "config": {}}
        
        try:
            self.update_settings(model, temperature, chunk_size, chunk_overlap)
            
            postprocessors = [self.get_postprocessor(name, similarity_cutoff) 
                            for name in postprocessor_names if self.get_postprocessor(name, similarity_cutoff)]
            synthesizer = self.get_synthesizer(synthesizer_name)
            
            kwargs = {"similarity_top_k": similarity_top_k}
            if postprocessors: kwargs["node_postprocessors"] = postprocessors
            if synthesizer: kwargs["response_synthesizer"] = synthesizer
            
            query_engine = self.index.as_query_engine(**kwargs)
            response = query_engine.query(question)
            
            sources = []
            if hasattr(response, 'source_nodes'):
                sources = [{"text": node.text[:200] + "...", "score": getattr(node, 'score', 0.0),
                          "source": getattr(node.node, 'metadata', {}).get('file_name', 'Unknown')}
                         for node in response.source_nodes]
            
            return {
                "response": str(response), "sources": sources,
                "config": {"model": model, "temperature": temperature, "chunk_size": chunk_size,
                          "chunk_overlap": chunk_overlap, "similarity_top_k": similarity_top_k,
                          "postprocessors": postprocessor_names, "similarity_cutoff": similarity_cutoff,
                          "synthesizer": synthesizer_name}
            }
        except Exception as e:
            return {"response": f"❌ Error processing query: {str(e)}", "sources": [], "config": {}}

rag_backend = AdvancedRAGBackend()
print("🚀 Advanced RAG Backend initialized and ready!")


🚀 Advanced RAG Backend initialized and ready!


In [3]:
def create_advanced_rag_interface():
    """Complete solution for advanced RAG interface."""
    
    def initialize_db():
        return rag_backend.initialize_database()
    
    def handle_advanced_query(question, model, temperature, chunk_size, chunk_overlap, 
                             similarity_top_k, postprocessors, similarity_cutoff, synthesizer):
        result = rag_backend.advanced_query(
            question, model, temperature, chunk_size, chunk_overlap,
            similarity_top_k, postprocessors, similarity_cutoff, synthesizer
        )
        
        config_text = f"""**Current Configuration:**
- Model: {result['config'].get('model', 'N/A')}
- Temperature: {result['config'].get('temperature', 'N/A')}
- Chunk Size: {result['config'].get('chunk_size', 'N/A')}
- Chunk Overlap: {result['config'].get('chunk_overlap', 'N/A')}
- Similarity Top-K: {result['config'].get('similarity_top_k', 'N/A')}
- Postprocessors: {', '.join(result['config'].get('postprocessors', []))}
- Similarity Cutoff: {result['config'].get('similarity_cutoff', 'N/A')}
- Synthesizer: {result['config'].get('synthesizer', 'N/A')}"""
        
        return result["response"], config_text
    
    with gr.Blocks(title="Advanced RAG Assistant") as interface:
        # Title and description
        gr.Markdown("# 🤖 Advanced RAG Assistant")
        gr.Markdown("Configure all RAG parameters for optimal performance and experiment with different settings!")
        
        # Database initialization
        init_btn = gr.Button("🔄 Initialize Vector Database", variant="primary")
        status_output = gr.Textbox(label="Database Status", lines=2, interactive=False)
        
        # Main layout with columns
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### ⚙️ RAG Configuration")
                
                # Model selection
                model_dropdown = gr.Dropdown(
                    choices=["gpt-4o", "gpt-4o-mini"],
                    value="gpt-4o-mini",
                    label="LLM Model"
                )
                
                # Temperature control
                temperature_slider = gr.Slider(
                    minimum=0.0, maximum=1.0, step=0.1, value=0.1,
                    label="Temperature (0=deterministic, 1=creative)"
                )
                
                # Chunking parameters
                chunk_size_input = gr.Number(
                    value=512, minimum=128, maximum=2048,
                    label="Chunk Size"
                )
                
                chunk_overlap_input = gr.Number(
                    value=50, minimum=0, maximum=200,
                    label="Chunk Overlap"
                )
                
                # Retrieval parameters
                similarity_topk_slider = gr.Slider(
                    minimum=1, maximum=20, step=1, value=5,
                    label="Similarity Top-K (documents to retrieve)"
                )
                
                # Postprocessor selection
                postprocessor_checkbox = gr.CheckboxGroup(
                    choices=["SimilarityPostprocessor"],
                    value=["SimilarityPostprocessor"],
                    label="Node Postprocessors"
                )
                
                # Similarity filtering
                similarity_cutoff_slider = gr.Slider(
                    minimum=0.0, maximum=1.0, step=0.1, value=0.3,
                    label="Similarity Cutoff (0=permissive, 1=strict)"
                )
                
                # Response synthesizer
                synthesizer_dropdown = gr.Dropdown(
                    choices=["TreeSummarize", "Refine", "CompactAndRefine", "Default"],
                    value="TreeSummarize",
                    label="Response Synthesizer"
                )
            
            with gr.Column(scale=2):
                gr.Markdown("### 💬 Query Interface")
                
                # Query input
                query_input = gr.Textbox(
                    label="Ask a question",
                    placeholder="Enter your question about the documents...",
                    lines=3
                )
                
                # Submit button
                submit_btn = gr.Button("🚀 Ask Question", variant="primary")
                
                # Response output
                response_output = gr.Textbox(
                    label="AI Response",
                    lines=12,
                    interactive=False
                )
                
                # Configuration display
                config_display = gr.Textbox(
                    label="Configuration Used",
                    lines=8,
                    interactive=False
                )
        
        # Connect functions to components
        init_btn.click(initialize_db, outputs=[status_output])
        
        submit_btn.click(
            handle_advanced_query,
            inputs=[
                query_input, model_dropdown, temperature_slider,
                chunk_size_input, chunk_overlap_input, similarity_topk_slider,
                postprocessor_checkbox, similarity_cutoff_slider, synthesizer_dropdown
            ],
            outputs=[response_output, config_display]
        )
    
    return interface

# Create the interface
advanced_interface = create_advanced_rag_interface()
print("✅ Advanced RAG interface created successfully!")


✅ Advanced RAG interface created successfully!


In [None]:
# Launch the advanced interface
print("🎉 Launching Advanced RAG Assistant...")
print("🔗 Professional interface with full parameter control!")
print("⚠️  Make sure your OPENROUTER_API_KEY environment variable is set!")

# Launch with sharing enabled
advanced_interface.launch(share=True)


🎉 Launching Advanced RAG Assistant...
🔗 Professional interface with full parameter control!
⚠️  Make sure your OPENROUTER_API_KEY environment variable is set!
* Running on local URL:  http://127.0.0.1:7866
* Running on public URL: https://c5d16b73bdfc55272d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Parsing nodes: 100%|██████████| 42/42 [00:00<00:00, 198.07it/s]
Generating embeddings: 100%|██████████| 94/94 [00:03<00:00, 25.57it/s]
2025-09-21 09:38:46,583 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
2025-09-21 09:38:50,950 - INFO - 1 prompt is loaded, with the key: query
2025-09-21 09:38:52,582 - INFO - query_type :, vector
2025-09-21 09:38:54,751 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"
