In [None]:
!pip install -r "/content/drive/MyDrive/session_2/requirements.txt"

Collecting lancedb (from -r /content/drive/MyDrive/session_2/requirements.txt (line 11))
  Downloading lancedb-0.25.3-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting llama-index (from -r /content/drive/MyDrive/session_2/requirements.txt (line 12))
  Downloading llama_index-0.14.10-py3-none-any.whl.metadata (13 kB)
Collecting llama-index-vector-stores-lancedb (from -r /content/drive/MyDrive/session_2/requirements.txt (line 13))
  Downloading llama_index_vector_stores_lancedb-0.4.2-py3-none-any.whl.metadata (460 bytes)
Collecting llama-index-embeddings-huggingface (from -r /content/drive/MyDrive/session_2/requirements.txt (line 14))
  Downloading llama_index_embeddings_huggingface-0.6.1-py3-none-any.whl.metadata (458 bytes)
Collecting llama-index-llms-huggingface-api (from -r /content/drive/MyDrive/session_2/requirements.txt (line 15))
  Downloading llama_index_llms_huggingface_api-0.6.1-py3-none-any.whl.metadata (1.1 kB)
Collecting llama-index-embeddings-openai (from -r

In [None]:
import gradio as gr
import os
from pathlib import Path
from typing import Dict, List, Optional, Any

# LlamaIndex core components
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, Settings
from llama_index.vector_stores.lancedb import LanceDBVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.openrouter import OpenRouter

# Advanced RAG components
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.response_synthesizers import TreeSummarize, Refine, CompactAndRefine
from llama_index.core.retrievers import VectorIndexRetriever

print("✅ All libraries imported successfully!")

✅ All libraries imported successfully!


In [None]:
import gradio as gr
import os
from pathlib import Path
from typing import Dict, List, Optional, Any
from google.colab import userdata

# LlamaIndex core components
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, Settings
from llama_index.vector_stores.lancedb import LanceDBVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.openrouter import OpenRouter

# Advanced RAG components
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.response_synthesizers import TreeSummarize, Refine, CompactAndRefine
from llama_index.core.retrievers import VectorIndexRetriever

print("✅ All libraries imported successfully!")

class AdvancedRAGBackend:
    """Advanced RAG backend with configurable parameters."""

    def __init__(self):
        self.index = None
        self.available_models = ["gpt-4o", "gpt-4o-mini"]
        self.available_postprocessors = ["SimilarityPostprocessor"]
        self.available_synthesizers = ["TreeSummarize", "Refine", "CompactAndRefine", "Default"]
        self.update_settings()

    def update_settings(self, model: str = "gpt-4o-mini", temperature: float = 0.1, chunk_size: int = 512, chunk_overlap: int = 50):
        """Update LlamaIndex settings based on user configuration."""
        # Set up the LLM using OpenRouter
        api_key = userdata.get("OPENROUTER_API_KEY")
        if api_key:
            Settings.llm = OpenRouter(
                api_key=api_key,
                model=model,
                temperature=temperature
            )

        # Set up the embedding model (keep this constant)
        Settings.embed_model = HuggingFaceEmbedding(
            model_name="BAAI/bge-small-en-v1.5",
            trust_remote_code=True
        )

        # Set chunking parameters from function parameters
        Settings.chunk_size = chunk_size
        Settings.chunk_overlap = chunk_overlap

    def initialize_database(self, data_folder="/content/drive/MyDrive/session_2/data"):
        """Initialize the vector database with documents."""
        # Check if data folder exists
        if not Path(data_folder).exists():
            return f"❌ Data folder '{data_folder}' not found!"

        try:
            # Create vector store
            vector_store = LanceDBVectorStore(
                uri="./advanced_rag_vectordb",
                table_name="documents"
            )

            # Load documents
            reader = SimpleDirectoryReader(input_dir=data_folder, recursive=True)
            documents = reader.load_data()

            # Create storage context and index
            storage_context = StorageContext.from_defaults(vector_store=vector_store)
            self.index = VectorStoreIndex.from_documents(
                documents,
                storage_context=storage_context,
                show_progress=True
            )

            return f"✅ Database initialized successfully with {len(documents)} documents!"

        except Exception as e:
            return f"❌ Error initializing database: {str(e)}"

    def get_postprocessor(self, postprocessor_name: str, similarity_cutoff: float):
        """Get the selected postprocessor."""
        if postprocessor_name == "SimilarityPostprocessor":
            return SimilarityPostprocessor(similarity_cutoff=similarity_cutoff)
        elif postprocessor_name == "None":
            return None
        else:
            return None

    def get_synthesizer(self, synthesizer_name: str):
        """Get the selected response synthesizer."""
        if synthesizer_name == "TreeSummarize":
            return TreeSummarize()
        elif synthesizer_name == "Refine":
            return Refine()
        elif synthesizer_name == "CompactAndRefine":
            return CompactAndRefine()
        elif synthesizer_name == "Default":
            return None
        else:
            return None

    def advanced_query(self, question: str, model: str, temperature: float,
                      chunk_size: int, chunk_overlap: int, similarity_top_k: int,
                      postprocessor_names: List[str], similarity_cutoff: float,
                      synthesizer_name: str) -> Dict[str, Any]:
        """Query the RAG system with advanced configuration."""

        # Check if index exists
        if self.index is None:
            return {"response": "❌ Please initialize the database first!", "sources": [], "config": {}}

        # Check if question is empty
        if not question or not question.strip():
            return {"response": "⚠️ Please enter a question first!", "sources": [], "config": {}}

        try:
            # Update settings with new parameters
            self.update_settings(model, temperature, chunk_size, chunk_overlap)

            # Get postprocessors
            postprocessors = []
            for name in postprocessor_names:
                processor = self.get_postprocessor(name, similarity_cutoff)
                if processor is not None:
                    postprocessors.append(processor)

            # Get synthesizer
            synthesizer = self.get_synthesizer(synthesizer_name)

            # Create query engine with all parameters
            query_engine_kwargs = {"similarity_top_k": similarity_top_k}
            if postprocessors:
                query_engine_kwargs["node_postprocessors"] = postprocessors
            if synthesizer is not None:
                query_engine_kwargs["response_synthesizer"] = synthesizer

            query_engine = self.index.as_query_engine(**query_engine_kwargs)

            # Query and get response
            response = query_engine.query(question)

            # Extract source information if available
            sources = []
            if hasattr(response, 'source_nodes'):
                for node in response.source_nodes:
                    sources.append({
                        "text": node.text[:200] + "...",
                        "score": getattr(node, 'score', 0.0),
                        "source": getattr(node.node, 'metadata', {}).get('file_name', 'Unknown')
                    })

            return {
                "response": str(response),
                "sources": sources,
                "config": {
                    "model": model,
                    "temperature": temperature,
                    "chunk_size": chunk_size,
                    "chunk_overlap": chunk_overlap,
                    "similarity_top_k": similarity_top_k,
                    "postprocessors": postprocessor_names,
                    "similarity_cutoff": similarity_cutoff,
                    "synthesizer": synthesizer_name
                }
            }

        except Exception as e:
            return {"response": f"❌ Error processing query: {str(e)}", "sources": [], "config": {}}

# Initialize the backend
rag_backend = AdvancedRAGBackend()
print("🚀 Advanced RAG Backend initialized and ready!")

✅ All libraries imported successfully!


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

🚀 Advanced RAG Backend initialized and ready!


In [None]:
import gradio as gr

def create_advanced_rag_interface():
    """Create advanced RAG interface with full configuration options."""

    def initialize_db():
        """Handle database initialization."""
        return rag_backend.initialize_database()

    def handle_advanced_query(question, model, temperature, chunk_size, chunk_overlap,
                             similarity_top_k, postprocessors, similarity_cutoff, synthesizer):
        """Handle advanced RAG queries with all configuration options."""
        result = rag_backend.advanced_query(
            question, model, temperature, chunk_size, chunk_overlap,
            similarity_top_k, postprocessors, similarity_cutoff, synthesizer
        )

        # Format configuration for display
        config_text = f"""**Current Configuration:**
- Model: {result['config'].get('model', 'N/A')}
- Temperature: {result['config'].get('temperature', 'N/A')}
- Chunk Size: {result['config'].get('chunk_size', 'N/A')}
- Chunk Overlap: {result['config'].get('chunk_overlap', 'N/A')}
- Similarity Top-K: {result['config'].get('similarity_top_k', 'N/A')}
- Postprocessors: {', '.join(result['config'].get('postprocessors', [])) if result['config'].get('postprocessors') else 'None'}
- Similarity Cutoff: {result['config'].get('similarity_cutoff', 'N/A')}
- Synthesizer: {result['config'].get('synthesizer', 'N/A')}"""

        return result["response"], config_text

    with gr.Blocks(title="Advanced RAG Assistant") as interface:
        # Title and short description
        gr.Markdown(
            """
            # Advanced RAG Assistant
            An advanced interface to configure and run retrieval-augmented generation pipelines.
            Use the controls on the left to set model, retrieval and post-processing options.
            Enter a question on the right, then submit to run the pipeline and view the response + configuration used.
            """
        )

        # Database initialization row
        with gr.Row():
            init_btn = gr.Button("Initialize Database", elem_id="adv-init-db-btn")
            status_output = gr.Textbox(
                label="Initialization Status",
                value="Not initialized",
                interactive=False,
                lines=1
            )

        gr.Markdown("---")

        # Main layout: config controls (left) and query/response (right)
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### ⚙️ RAG Configuration")

                # Model selection
                model_dropdown = gr.Dropdown(
                    choices=["gpt-4o", "gpt-4o-mini", "gpt-4o-small"],
                    value="gpt-4o-mini",
                    label="Model"
                )

                # Temperature control
                temperature_slider = gr.Slider(
                    minimum=0.0, maximum=1.0, step=0.1, value=0.1,
                    label="Temperature"
                )

                # Chunking parameters
                chunk_size_input = gr.Number(value=512, label="Chunk Size (tokens)")
                chunk_overlap_input = gr.Number(value=50, label="Chunk Overlap (tokens)")

                gr.Markdown("#### Retrieval")
                # Retrieval parameters
                similarity_topk_slider = gr.Slider(
                    minimum=1, maximum=20, step=1, value=5,
                    label="Similarity Top-K"
                )

                # Postprocessor selection
                postprocessor_checkbox = gr.CheckboxGroup(
                    choices=["SimilarityPostprocessor"],
                    value=[],
                    label="Postprocessors (select zero or more)"
                )

                # Similarity filtering cutoff
                similarity_cutoff_slider = gr.Slider(
                    minimum=0.0, maximum=1.0, step=0.1, value=0.3,
                    label="Similarity Cutoff"
                )

                # Response synthesizer
                synthesizer_dropdown = gr.Dropdown(
                    choices=["Default", "TreeSummarize", "Refine", "CompactAndRefine"],
                    value="Default",
                    label="Response Synthesizer"
                )

            with gr.Column(scale=2):
                gr.Markdown("### 💬 Query Interface")

                # Query input
                query_input = gr.Textbox(
                    label="Ask a question",
                    placeholder="e.g. What are the benefits of AI agents?",
                    lines=3
                )

                # Submit button
                submit_btn = gr.Button("Run Advanced Query", variant="primary")

                # Response output
                response_output = gr.Textbox(
                    label="Response",
                    value="",
                    interactive=False,
                    lines=12
                )

                # Configuration display
                config_display = gr.Textbox(
                    label="Used Configuration",
                    value="",
                    interactive=False,
                    lines=8
                )

        # Wire up the buttons to their handlers
        init_btn.click(fn=initialize_db, inputs=None, outputs=[status_output])

        submit_btn.click(
            fn=handle_advanced_query,
            inputs=[
                query_input, model_dropdown, temperature_slider,
                chunk_size_input, chunk_overlap_input, similarity_topk_slider,
                postprocessor_checkbox, similarity_cutoff_slider, synthesizer_dropdown
            ],
            outputs=[response_output, config_display]
        )

    return interface

# Create the interface
advanced_interface = create_advanced_rag_interface()
print("✅ Advanced RAG interface created successfully!")



✅ Advanced RAG interface created successfully!


In [None]:
print("🎉 Launching your Advanced RAG Assistant...")
print("🔗 Your application will open in a new browser tab!")
print("")
print("⚠️  Make sure your OPENROUTER_API_KEY environment variable is set!")
print("")
print("📋 Testing Instructions:")
print("1. Click 'Initialize Vector Database' button first")
print("2. Wait for success message")
print("3. Configure your RAG parameters:")
print("   - Choose model (gpt-4o, gpt-4o-mini)")
print("   - Adjust temperature (0.0 = deterministic, 1.0 = creative)")
print("   - Set chunk size and overlap")
print("   - Choose similarity top-k")
print("   - Select postprocessors and synthesizer")
print("4. Enter a question and click 'Ask Question'")
print("5. Review both the response and configuration used")
print("")
print("🧪 Experiments to try:")
print("- Compare different models with the same question")
print("- Test temperature effects (0.1 vs 0.9)")
print("- Try different chunk sizes (256 vs 1024)")
print("- Compare synthesizers (TreeSummarize vs Refine)")
print("- Adjust similarity cutoff to filter results")

# Your code here:
advanced_interface.launch()

🎉 Launching your Advanced RAG Assistant...
🔗 Your application will open in a new browser tab!

⚠️  Make sure your OPENROUTER_API_KEY environment variable is set!

📋 Testing Instructions:
1. Click 'Initialize Vector Database' button first
2. Wait for success message
3. Configure your RAG parameters:
   - Choose model (gpt-4o, gpt-4o-mini)
   - Adjust temperature (0.0 = deterministic, 1.0 = creative)
   - Set chunk size and overlap
   - Choose similarity top-k
   - Select postprocessors and synthesizer
4. Enter a question and click 'Ask Question'
5. Review both the response and configuration used

🧪 Experiments to try:
- Compare different models with the same question
- Test temperature effects (0.1 vs 0.9)
- Try different chunk sizes (256 vs 1024)
- Compare synthesizers (TreeSummarize vs Refine)
- Adjust similarity cutoff to filter results
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this

