<a href="https://colab.research.google.com/github/arthurvalenzuela/Outamation-Externship-Final/blob/main/RAG_Pipeline_%22Lucky%22.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Cell 1: Install Required Libraries

This cell installs all the necessary software packages for the project using pip. It includes libraries for handling PDFs (PyMuPDF, Surya), running AI models (torch), and building the main question-answering system (LlamaIndex and its specific integrations for embeddings, vector stores, LLMs, and utilities). It also checks the installed version of the core llama-index library.



In [None]:
# Cell 1: Install Required Libraries
!pip install PyMuPDF surya-ocr torch --quiet
# Install core LlamaIndex, integrations, and supporting libraries
!pip install llama-index llama-index-embeddings-huggingface sentence-transformers --quiet
# Install Qdrant and Groq integrations separately
!pip install llama-index-vector-stores-qdrant qdrant-client llama-index-llms-groq --quiet

print("--- Library Installations Complete ---")
print("\n--- Checking llama-index version ---")
!pip show llama-index # Displays the installed version for reference
print("--- ---")

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.5/48.5 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.0/20.0 MB[0m [31m78.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.1/154.1 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m69.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m92.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m79.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m47.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Cell 2: Import Libraries into Notebook

 This cell imports the specific functions, classes, and modules from the previously installed libraries that will be used throughout the notebook. Grouping imports here helps keep track of dependencies.

In [None]:
# Cell 2: Import Libraries into Notebook
# Standard libraries
import os
import time # For timing queries and pauses
import numpy as np
from PIL import Image # For handling images during PDF processing

# PDF processing libraries
import fitz  # PyMuPDF for opening PDFs
from surya.detection import DetectionPredictor # Surya for finding text blocks
from surya.recognition import RecognitionPredictor # Surya for reading text (OCR)

# Google Colab specific imports
from google.colab import files # For uploading files
from google.colab import userdata # For accessing API keys securely
from IPython.display import display, Markdown # For formatted output

# LlamaIndex and RAG component imports
from llama_index.core import VectorStoreIndex, StorageContext, Document as LlamaDocument
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.settings import Settings
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.llms.groq import Groq

# Qdrant client library and models
import qdrant_client
from qdrant_client import models as qdrant_models # Import the models submodule

print("Required libraries imported.")

Cell 3: Upload PDF Files

This cell uses Google Colab's file upload feature to allow you to select and upload the PDF documents you want the system to process. It supports uploading multiple files at once and stores their names for later use.

In [None]:
# Cell 3: Upload PDF Files
import os # Ensure os is imported
from google.colab import files

print("Please upload ALL your PDF files for the RAG pipeline:")
# This will open the file upload dialog where you can select multiple PDFs
uploaded_files = files.upload()

pdf_filenames = [] # Initialize the LIST here
if uploaded_files:
    pdf_filenames = list(uploaded_files.keys()) # Get the list of filenames
    print(f"\nUploaded {len(pdf_filenames)} files:")
    for name in pdf_filenames:
      print(f"- {name}")
    # Optional: Check files in the current directory
    # print("\nFiles in current directory:", os.listdir())
else:
    print("No files uploaded. Please run this cell again and upload files.")

# The 'pdf_filenames' list now holds the names of files ready for processing in Cell 5

Cell 4: Configure PDF Processing Settings

This cell sets up important configuration options for processing the PDFs in the next step. It defines the language for the OCR model (Surya) and the image resolution (DPI) used when converting PDF pages to images for analysis. It also initializes the Surya AI models needed for reading the text.

In [None]:
# Cell 4: Configure PDF Processing Settings
import os # Ensure os is imported
# Import Surya classes if not already done in Cell 2
from surya.detection import DetectionPredictor
from surya.recognition import RecognitionPredictor

# Try to speed up Surya using compilation (optional, may add overhead)
# os.environ['COMPILE_ALL'] = 'true'
# print(f"Attempting to enable Surya compilation...")

# Define language(s) for OCR - Helps Surya recognize text better
LANGUAGES = ["en"] # Set to the primary language of your documents

# Set DPI (Dots Per Inch) for rendering PDF pages as images for OCR.
# Lower DPI is faster and uses less memory but might miss small text.
RENDERING_DPI = 96 # Using 96 for better memory efficiency

# Initialize Surya OCR models
det_predictor = None
rec_predictor = None
surya_ready = False # Flag to check initialization
try:
    print("Initializing Surya text detection and recognition models...")
    det_predictor = DetectionPredictor()
    rec_predictor = RecognitionPredictor()
    print("Surya models initialized.")
    surya_ready = True
except Exception as e:
    print(f"Error initializing Surya models: {e}")
    surya_ready = False

if surya_ready:
    print("Configuration set and Surya models ready.")
else:
    print("Surya models failed to initialize. PDF processing cell (Cell 5) may fail.")

Cell 5: Process PDFs and Create Text Chunks

This crucial cell performs the core data processing. It loops through each PDF uploaded in Cell 3, uses the initialized Surya models (from Cell 4) to extract text page-by-page, combines the text for each document, and then splits this text into smaller, overlapping chunks (called "Nodes"). These nodes, along with their source filename, are stored for indexing in the next steps.



In [None]:
# Cell 5: Process PDFs and Create Text Chunks
# Ensure necessary imports are available
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.schema import Document as LlamaDocument
from PIL import Image
import numpy as np
import fitz

# Define batch size for Surya processing (how many pages to process at once)
# BATCH_SIZE = 1 was found to work best for memory constraints in previous tests
BATCH_SIZE = 1

# --- RAG Settings ---
CHUNK_SIZE = 512 # Target size for each text chunk
CHUNK_OVERLAP = 50  # How much text overlaps between chunks

# Initialize list to store LlamaIndex Node objects (text chunks)
all_nodes = []

# Check if files were uploaded (Cell 3) and Surya is ready (Cell 4)
if ('pdf_filenames' in locals() and pdf_filenames and
    'surya_ready' in locals() and surya_ready and
    det_predictor and rec_predictor):

    print(f"\nStarting PDF processing and chunking...")
    # Initialize the text splitter
    node_parser = SentenceSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)

    # Loop through each uploaded PDF filename
    for pdf_filename in pdf_filenames:
        print(f"\n===== Processing PDF: {pdf_filename} =====")
        doc_fitz = None # Initialize variable
        try:
            doc_fitz = fitz.open(pdf_filename) # Open the PDF file
            num_pages = doc_fitz.page_count
            doc_text_content = "" # Accumulate text for this document

            # Process pages in batches
            for i in range(0, num_pages, BATCH_SIZE):
                batch_indices = range(i, min(i + BATCH_SIZE, num_pages))
                print(f"--- Processing Batch: Pages {batch_indices.start + 1} to {batch_indices.stop} ---")

                batch_pil_images = []
                page_numbers_in_batch = []

                # 1. Render pages in batch to images
                for page_num_idx in batch_indices:
                    try:
                        page = doc_fitz[page_num_idx]
                        pix = page.get_pixmap(dpi=RENDERING_DPI)
                        img_pil = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                        batch_pil_images.append(img_pil)
                        page_numbers_in_batch.append(page_num_idx + 1)
                    except Exception as e:
                         print(f"  Error rendering page {page_num_idx + 1} in {pdf_filename}: {e}")
                         continue

                if not batch_pil_images: continue

                # 2. Run Surya OCR prediction
                try:
                    batch_langs = [LANGUAGES] * len(batch_pil_images)
                    batch_predictions = rec_predictor(batch_pil_images, batch_langs, det_predictor)
                except Exception as e:
                    print(f"  Error during Surya prediction for batch starting at page {i+1} in {pdf_filename}: {e}")
                    continue

                # 3. Extract recognized text lines
                for idx, page_prediction in enumerate(batch_predictions):
                    page_num_actual = page_numbers_in_batch[idx]
                    current_page_lines = []
                    if hasattr(page_prediction, 'text_lines') and page_prediction.text_lines is not None:
                         current_page_lines = page_prediction.text_lines
                    for line in current_page_lines:
                         if hasattr(line, 'text'):
                             doc_text_content += line.text + "\n"

            # --- Chunking the extracted text for the document ---
            if doc_text_content:
                print(f"  Chunking text for {pdf_filename}...")
                llama_doc = LlamaDocument(
                    text=doc_text_content,
                    metadata={"file_name": pdf_filename}
                )
                nodes = node_parser.get_nodes_from_documents([llama_doc])
                print(f"  Generated {len(nodes)} nodes (chunks) for {pdf_filename}.")
                all_nodes.extend(nodes) # Add chunks to the main list
            else:
                print(f"  No text content extracted or processed for {pdf_filename}.")

            doc_fitz.close() # Close the PDF file

        except Exception as e:
            print(f"Failed to process PDF {pdf_filename}: {e}")
            if doc_fitz and not doc_fitz.is_closed: # Ensure closure on error
                 doc_fitz.close()

    print(f"\nFinished processing all PDFs. Total nodes created: {len(all_nodes)}")

else:
    print("\nSkipping PDF processing - Check if files were uploaded in Cell 3 and Surya models initialized in Cell 4.")

Cell 6: Configure API Keys

This cell retrieves the necessary API keys you stored securely in Colab's Secrets Manager (accessible via the key icon 🔑 in the left sidebar). These keys are essential for connecting to external services used by the pipeline. Make sure you have added the required secrets before running this cell.

In [None]:
# Cell 6: Configure API Keys
import os
from google.colab import userdata

# --- Secrets Needed (Add via Key Icon 🔑) ---
#   - HUGGINGFACE_API_KEY (Your Hugging Face token)
#   - QDRANT_URL         (Your Qdrant instance URL)
#   - QDRANT_API_KEY      (Your Qdrant API key)
#   - GROQ_API_KEY        (Your Groq API key)
# --- Make sure notebook access is enabled for each secret. ---

print("Retrieving API Keys from Colab Secrets...")
# Load keys into environment variables for libraries to use
os.environ["HUGGINGFACE_API_KEY"] = userdata.get('HUGGINGFACE_API_KEY')
os.environ["QDRANT_API_KEY"] = userdata.get('QDRANT_API_KEY')
os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')
QDRANT_URL = userdata.get('QDRANT_URL') # Store Qdrant URL separately

# Verification checks
key_check_passed = True
print(f"- Checking HUGGINGFACE_API_KEY: {'Found' if os.environ.get('HUGGINGFACE_API_KEY') else '!!! MISSING !!!'}")
if not os.environ.get("HUGGINGFACE_API_KEY"): key_check_passed = False
print(f"- Checking QDRANT_API_KEY: {'Found' if os.environ.get('QDRANT_API_KEY') else '!!! MISSING !!!'}")
if not os.environ.get("QDRANT_API_KEY"): key_check_passed = False
print(f"- Checking GROQ_API_KEY: {'Found' if os.environ.get('GROQ_API_KEY') else '!!! MISSING !!!'}")
if not os.environ.get("GROQ_API_KEY"): key_check_passed = False
print(f"- Checking QDRANT_URL: {'Found' if QDRANT_URL else '!!! MISSING !!!'}")
if not QDRANT_URL: key_check_passed = False

if key_check_passed:
    print("\nRequired API Keys and Qdrant URL retrieved successfully.")
else:
    print("\nOne or more required API keys/URLs missing. Please check Colab Secrets (🔑). Downstream cells might fail.")

Cell 7: Setup Embedding Model and Vector Database Connection

This cell initializes two key components:

The Embedding Model (BAAI/bge-base-en-v1.5 from Hugging Face), which converts text chunks into numerical representations (vectors).
The Vector Database Client (Qdrant), which connects to the service where these vectors and the associated text will be stored and searched. It also prepares the StorageContext needed by LlamaIndex.

In [None]:
# Cell 7: Setup Embedding Model and Vector Database Connection
import os
import qdrant_client
# Ensure LlamaIndex imports are available
from llama_index.core import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.settings import Settings
# Import Qdrant models from Cell 2 or here
from qdrant_client import models as qdrant_models

# --- Initialize Embedding Model ---
print("Initializing embedding model (BAAI/bge-base-en-v1.5)...")
embed_model_ready = False
embed_model = None
if os.environ.get("HUGGINGFACE_API_KEY"):
    try:
        embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
        print("Embedding model initialized.")
        # Set this model as the default for LlamaIndex
        Settings.embed_model = embed_model
        print("Global Settings.embed_model set.")
        embed_model_ready = True
    except Exception as e:
        print(f"Error initializing embedding model: {e}")
else:
    print("HUGGINGFACE_API_KEY not found. Cannot initialize embedding model.")

# --- Initialize Qdrant Client and Vector Store ---
print("Initializing Qdrant client...")
qdrant_ready = False
storage_context = None
vector_store = None
# Define the name for the data collection within Qdrant
# Using a distinct name helps avoid conflicts if re-running
qdrant_collection_name = "rag_pipeline_final_v1"

if 'QDRANT_URL' in locals() and QDRANT_URL and os.environ.get("QDRANT_API_KEY"):
    try:
        # Connect to the Qdrant instance
        qdrant_client_instance = qdrant_client.QdrantClient(
            url=QDRANT_URL,
            api_key=os.environ.get("QDRANT_API_KEY"),
            timeout=60
        )
        qdrant_client_instance.get_collections() # Verify connection
        print("Qdrant client initialized and connection verified.")

        # Prepare the vector store object pointing to the specific collection
        vector_store = QdrantVectorStore(
            client=qdrant_client_instance,
            collection_name=qdrant_collection_name,
        )
        # Prepare the storage context for LlamaIndex
        storage_context = StorageContext.from_defaults(vector_store=vector_store)
        print(f"Qdrant vector store ('{qdrant_collection_name}') and storage context ready.")
        qdrant_ready = True
    except Exception as e:
        print(f"Error initializing Qdrant client or vector store: {e}")
        print("Check QDRANT_URL, QDRANT_API_KEY, and ensure the Qdrant instance is running/accessible.")
else:
    print("QDRANT_URL or QDRANT_API_KEY not found. Cannot initialize Qdrant.")

# Final status message
if embed_model_ready and qdrant_ready:
    print("\nEmbedding model and Qdrant setup complete.")
else:
    print("\nSetup failed for Embedding model and/or Qdrant. Subsequent cells will likely fail.")

Cell 8: Create/Reset the Index in Vector Database

This cell builds the core knowledge base for the RAG system. It first attempts to reset (delete and recreate) the specified collection in the Qdrant database to ensure only the latest documents are included. Then, it takes the text chunks (Nodes) prepared in Cell 5, uses the embedding model (from Cell 7) to convert them into vectors, and stores these vectors along with the original text chunks in the Qdrant collection.

In [None]:
# Cell 8: Create/Reset the Index in Vector Database
import time
# Ensure necessary imports are available
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core.settings import Settings
import qdrant_client
# Use the alias defined in Cell 2 or import here
from qdrant_client import models as qdrant_models

index = None # Initialize index variable

print("\n--- Entering Cell 8: Index Creation/Reset ---")
# Check prerequisites
print(f"Checking prerequisites: qdrant_ready={locals().get('qdrant_ready')}, embed_model_ready={locals().get('embed_model_ready')}")
print(f"Is storage_context valid? {'Yes' if locals().get('storage_context') else 'No'}")
print(f"Is embed_model valid? {'Yes' if locals().get('embed_model') else 'No'}")

if (qdrant_ready and embed_model_ready and storage_context and embed_model):

    # Ensure global embed model is set
    if Settings.embed_model is None: Settings.embed_model = embed_model

    # --- Reset Qdrant Collection ---
    collection_reset = False
    if 'qdrant_client_instance' in locals() and qdrant_client_instance and 'qdrant_collection_name' in locals():
        print(f"Attempting to reset Qdrant collection: '{qdrant_collection_name}'...")
        try:
            # Check existence before delete attempt
            try:
                qdrant_client_instance.get_collection(collection_name=qdrant_collection_name)
                print(f"  Collection '{qdrant_collection_name}' found. Deleting...")
                qdrant_client_instance.delete_collection(collection_name=qdrant_collection_name)
                time.sleep(2)
                print(f"  Collection '{qdrant_collection_name}' deleted.")
            except Exception as e:
                if "doesn't exist" in str(e) or "Not found" in str(e) or "404" in str(e):
                   print(f"  Collection '{qdrant_collection_name}' does not exist yet. Skipping deletion.")
                else:
                   print(f"  Warning during collection check/delete: {e}")

            # Re-create the collection
            vector_size = 768 # Dimension for BAAI/bge-base-en-v1.5
            print(f"  Re-creating collection '{qdrant_collection_name}' with vector size: {vector_size}...")
            qdrant_client_instance.recreate_collection(
                 collection_name=qdrant_collection_name,
                 vectors_config=qdrant_models.VectorParams(size=vector_size, distance=qdrant_models.Distance.COSINE)
            )
            time.sleep(1)
            print(f"  Collection '{qdrant_collection_name}' recreated successfully.")
            collection_reset = True
        except Exception as e:
            print(f"  Error resetting Qdrant collection: {e}")
            collection_reset = False
    else:
        print("  Qdrant client or collection name missing. Cannot reset collection.")
        collection_reset = False
    # --- End Reset ---

    # Proceed only if collection reset was successful
    if collection_reset:
        # Check if nodes exist from Cell 5
        if 'all_nodes' in locals() and all_nodes:
            print(f"Building index from {len(all_nodes)} nodes into fresh Qdrant collection...")
            try:
                # Build the index using the nodes and storage context
                index = VectorStoreIndex(
                    nodes=all_nodes,
                    storage_context=storage_context,
                    embed_model=embed_model,
                    show_progress=True
                 )
                print(f"Index built successfully in Qdrant collection: '{qdrant_collection_name}'.")
            except Exception as e:
                print(f"Error building index after collection reset: {e}")
        else:
             print("Variable 'all_nodes' is empty or missing. Cannot build index.")
             print("Ensure Cell 5 ran correctly and produced nodes.")
    else:
        print("Collection reset failed or was skipped. Index building skipped.")

else:
    print("Skipping index creation - Prerequisites not met.")

# Final status check
if index:
    print("\nIndex object is ready.")
else:
    print("\nIndex object could not be created. Querying will fail.")

Cell 9: Setup AI Language Model (Groq)

This cell initializes the Large Language Model (LLM) that will generate the final answers. We are using the Groq service, which provides very fast access to powerful open-source models like Llama 3. It uses the Groq API key configured in Cell 6.

In [None]:
# Cell 9: Setup AI Language Model (Groq)
import os
# Ensure necessary LlamaIndex imports are available
from llama_index.llms.groq import Groq
from llama_index.core.settings import Settings

# --- Initialize Groq LLM ---
# Choose a model available on Groq (Llama3 8B is fast and capable)
groq_model_name = "llama3-8b-8192"
llm = None
groq_ready = False

# Check for the API key set in Cell 6
groq_api_key = os.environ.get("GROQ_API_KEY")
if groq_api_key:
    print(f"Initializing Groq LLM with model: {groq_model_name}...")
    try:
        # Connect to Groq service
        llm = Groq(model=groq_model_name, api_key=groq_api_key)
        # Set this LLM as the default for LlamaIndex
        Settings.llm = llm
        print("Groq LLM initialized and set globally in Settings.")
        groq_ready = True
    except Exception as e:
        print(f"Error initializing Groq LLM: {e}")
        print("Please ensure your GROQ_API_KEY secret is correct and valid.")
else:
    print("GROQ_API_KEY not found in environment variables. Cannot initialize Groq LLM.")

if groq_ready:
    print("LLM configuration successful.")
else:
    print("LLM configuration failed. Querying will likely fail.")

Cell 10: Setup Reranker (Optimization)

This cell sets up the reranker, which is an optimization technique. After the initial search retrieves relevant text chunks, the reranker analyzes these chunks specifically against the user's query and re-orders them by relevance. This helps ensure the most important information is passed to the LLM, improving answer quality.



In [None]:
# Cell 10: Setup Reranker (Optimization)
# Ensure necessary LlamaIndex imports are available
from llama_index.core.postprocessor import SentenceTransformerRerank

# --- Initialize Reranker ---
# Uses a cross-encoder model optimized for relevance ranking
reranker_model_name = "cross-encoder/ms-marco-MiniLM-L-6-v2"
# How many top chunks to return after reranking (usually 2-5)
rerank_top_n = 3
reranker = None
reranker_ready = False

print(f"Initializing SentenceTransformerRerank with model: {reranker_model_name}...")
try:
    reranker = SentenceTransformerRerank(
        top_n=rerank_top_n,
        model=reranker_model_name
    )
    print(f"Reranker initialized successfully. Will return top {rerank_top_n} nodes.")
    reranker_ready = True
except Exception as e:
    print(f"Error initializing reranker: {e}")
    print("Check if model name is correct and 'sentence-transformers' library is installed.")

if not reranker_ready:
    print("Reranker setup failed.")

Cell 11: Setup Query Engine

This cell assembles the final question-answering engine. It combines the indexed data (Cell 8), the mechanism for retrieving relevant chunks (retriever), the reranker for optimization (Cell 10), and the AI language model for generating answers (Cell 9).

In [None]:
# Cell 11: Setup Query Engine
# Ensure necessary LlamaIndex imports are available
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.settings import Settings

query_engine = None # Initialize

# Check if index, LLM, and reranker are ready
if ('index' in locals() and index and
    Settings.llm and
    'reranker_ready' in locals() and reranker_ready and
    'reranker' in locals() and reranker):

    print("Setting up the RAG query engine...")
    # How many chunks to retrieve initially before reranking
    initial_retrieval_k = 10
    print(f"- Retriever will fetch top {initial_retrieval_k} initial results.")

    # Setup the retriever component
    retriever = VectorIndexRetriever(
        index=index,
        similarity_top_k=initial_retrieval_k,
    )

    # Setup the response synthesizer (uses the LLM)
    response_synthesizer = get_response_synthesizer(llm=Settings.llm)
    print(f"- Response synthesizer configured with LLM: {Settings.llm.model}")

    # Define the reranker as a postprocessor
    node_postprocessors = [reranker]
    print(f"- Reranker configured: {reranker.model} (will return top {reranker.top_n})")

    # Assemble the final query engine
    query_engine = RetrieverQueryEngine(
        retriever=retriever,
        response_synthesizer=response_synthesizer,
        node_postprocessors=node_postprocessors,
    )
    print("\nQuery engine setup complete and ready for questions.")

else:
    print("\nSkipping query engine setup - Prerequisites not met.")
    if not ('index' in locals() and index): print("- Index missing or not loaded (Check Cell 8).")
    if not Settings.llm: print("- LLM not configured in Settings (Check Cell 9).")
    if not ('reranker_ready' in locals() and reranker_ready): print("- Reranker not ready (Check Cell 10).")

Cell 12: Ask Questions ("Lucky" Interface)

This is the main interaction cell where you can ask questions about the documents you uploaded. The system ("Lucky") will use the indexed information and the AI model to provide answers. Type your questions in the input box that appears. Type quit to exit. Including the word source in your query will show the specific text chunks Lucky used to generate the answer.

In [None]:
# Cell 12: Ask Questions ("Lucky" Interface)
import time
# Ensure necessary imports are available
from IPython.display import display, Markdown

# Check if the query engine is ready
if 'query_engine' in locals() and query_engine:
    display_welcome = True # Flag to show welcome message once

    # --- Prepare Context Files String ---
    context_files_str = "N/A (Cell 3 or 5 may not have run correctly)"
    if 'pdf_filenames' in locals() and pdf_filenames:
        context_files_str = ", ".join([f"`{name}`" for name in pdf_filenames])
        if not context_files_str: context_files_str = "No files processed in current session."
    # --- End Prepare Context ---

    # --- Optional: Predefined Sample Queries ---
    # Add 3-5 diverse example questions here for submission/demonstration
    test_queries = [
        "What is the property address mentioned in the appraisal report?",
        "What is the 'Net Pay' on the sample payslip?",
        "Summarize the 'Term and Termination' section of the sample contract.",
    ]
    # Set to True if you want to run the predefined queries first
    run_predefined_queries = False
    if run_predefined_queries and test_queries:
      print("\n--- Running Predefined Sample Queries ---")
      print(f"(Using context from files: {context_files_str})")
      for query in test_queries:
          print(f"\n❓ Query: {query}")
          try:
              response = query_engine.query(query)
              print("\n✅ Response:")
              print(response.response)
              print("-" * 30)
          except Exception as e:
              print(f"\n❌ Error processing query: {e}")
      print("\n--- Predefined Queries Finished ---")
      display_welcome = True # Reset flag if predefined runs

    # --- Interactive Query Loop ---
    if display_welcome:
        display(Markdown("## ✨ Welcome to Lucky 🍀, Your Query Assistant! ✨"))
        display(Markdown("*(Powered by 🍀)*"))
        display(Markdown("---"))
        display_welcome = False

    print("\n--- Starting Interactive Query Session ---")
    print("Type your question and press Enter. Type 'quit' to exit.")
    print("(Hint: Include the word 'source' in your query to see where Lucky looked!)")

    while True:
        try:
            user_query = input("\nEnter your query (or type 'quit' to exit): ")
        except EOFError:
            print("\nInput interrupted. Exiting interactive session.")
            break

        if user_query.lower().strip() == 'quit':
            print("\n🍀 Farewell from Lucky! 🐈‍⬛")
            break

        if not user_query: continue

        show_sources = "source" in user_query.lower()

        try:
            start_time = time.time()
            response = query_engine.query(user_query)
            end_time = time.time()

            # Build "Lucky" Markdown Output
            output_md = f"🫵🏼 **You Asked:**\n> {user_query}\n\n"
            output_md += f"📂 **Querying Based On File(s):** {context_files_str}\n\n"
            output_md += f"⏳ **Answered in:** {end_time - start_time:.2f} seconds\n\n"
            output_md += f"## 🧠 Lucky's Thoughts (Response)\n"

            llm_response_text = "(Sorry, Lucky couldn't generate a response 🙇🏽‍♂️)"
            if response and hasattr(response, 'response') and isinstance(response.response, str):
                llm_response_text = response.response
            elif response and hasattr(response, 'response'):
                 llm_response_text = f"(Response format issue: {str(response.response)} 🙇🏽‍♂️)"
            else:
                 llm_response_text = "(No response object found)"
            output_md += f"```text\n{llm_response_text}\n```\n\n"

            if show_sources:
                output_md += f"## 👨🏽‍🦯 Where Lucky Looked (Sources)\n"
                output_md += f"Lucky searched through the indexed documents:\n\n"
                if response and hasattr(response, 'source_nodes') and response.source_nodes:
                     for i, source_node in enumerate(response.source_nodes):
                        score_str = f"{source_node.score:.4f}" if hasattr(source_node, 'score') and source_node.score is not None else "N/A"
                        output_md += f"* **📄 Source {i+1}** (Score: {score_str})\n"
                        output_md += f"    * **File:** `{source_node.metadata.get('file_name', 'N/A')}`\n"
                        content_snippet = source_node.get_content()[:300] if hasattr(source_node, 'get_content') else "(Error retrieving content)"
                        output_md += f"    * **Content Snippet:**\n        ```text\n        {content_snippet}...\n        ```\n"
                else:
                    output_md += "* No specific source documents were retrieved or strongly used for this response.\n"

            output_md += f"\n---\n"
            output_md += f"🗣️ **What would you like to ask Lucky next?** (Type `quit` to exit)\n"

            display(Markdown(output_md))

        except Exception as e:
            error_md = f"## Query:\n> {user_query}\n\n**❌ Sorry, Lucky encountered an error 🙇🏽‍♂️:**\n```\n{e}\n```"
            display(Markdown(error_md))

else:
    print("\nQuery engine not available. Please run Cell 11 successfully first.")