In [None]:
# SETUP ENVIRONMENT: Install required Python packages for the project
!pip install -q langchain faiss-cpu transformers accelerate bitsandbytes huggingface_hub sentence-transformers langchain-community

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m84.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 MB[0m [31m44.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m112.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests==2.32.4, but you have requests 2.32.5 which is incompatible.[0m[31m
[0m

In [None]:
!pip install assemblyai gtts

In [None]:
# Import required libraries
import os
from pathlib import Path

os.environ["HUGGINGFACE_TOKEN"] = "Huggingface_Token"

# Set cache directory for Transformers models
# Speeds up model loading by storing downloaded models locally
os.environ["TRANSFORMERS_CACHE"] = "/content/hf_cache"

Path("/content/hf_cache").mkdir(exist_ok=True)

In [None]:
aai.settings.api_key = "AssemblyAI_Token"

In [None]:
# Standard libraries
import re
import numpy as np
import pandas as pd
import shutil
import time
from datetime import datetime
import ast
import csv
from pathlib import Path
import os
import io
from base64 import b64decode

# Scikit-learn
from sklearn.metrics.pairwise import cosine_similarity

# LangChain libraries
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline

# Hugging Face Transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Visualization and display
import matplotlib.pyplot as plt
from IPython.display import display, Javascript, Audio

# Google Colab file utilities
from google.colab import files
from google.colab import output

# Speech and audio processing
import assemblyai as aai
from gtts import gTTS

In [None]:
# Set up paths and directories

# Directory to store FAISS vectorstores locally
VECTORSTORE_DIR = Path("./vectorstores_local")
VECTORSTORE_DIR.mkdir(exist_ok=True)  # Create directory if it doesn't exist

# Model configuration

# Embedding model (multilingual)
EMBEDDING_MODEL = "intfloat/multilingual-e5-large"

# Main LLM model
LLM_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"

# Fallback smaller LLM model if main fails
FALLBACK_MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# Paths to preprocessed Excel files
PROJECTS_XLSX = "/content/English_Projects_Cleaned.xlsx"
CITIES_XLSX = "/content/English_Cities_Cleaned.xlsx"
PEOPLE_XLSX = "/content/English_Characters_Cleaned.xlsx"



In [None]:
def load_excel_safe(path: str):
    # Check if file exists
    if not Path(path).exists():
        print(f"File not found: {path}")
        return None

    # Load Excel into DataFrame
    df = pd.read_excel(path)

    # Normalize column names to lowercase
    cols = [c.lower() for c in df.columns]

    # Ensure 'title' column exists
    if "title" not in cols:
        df.rename(columns={df.columns[0]: "title"}, inplace=True)

    # Ensure 'content' column exists
    if "content" not in cols:
        if len(df.columns) > 1:
            df.rename(columns={df.columns[1]: "content"}, inplace=True)
        else:
            # If only one column exists, duplicate 'title' as 'content'
            df["content"] = df["title"]
    return df

# Load datasets safely
datasets = {
    "people": load_excel_safe(PEOPLE_XLSX),
    "projects": load_excel_safe(PROJECTS_XLSX),
    "Cities": load_excel_safe(CITIES_XLSX),
}

# Print loaded dataset shapes
for k, v in datasets.items():
    if v is not None:
        print(f"[Loaded] {k}: {v.shape}")

[✅ Loaded] people: (101, 3)
[✅ Loaded] projects: (47, 3)
[✅ Loaded] Cities: (189, 6)


In [None]:
# Define the local directory for FAISS vectorstores
VECTORSTORE_DIR = Path("./vectorstores_local")

# Check if the directory exists
if VECTORSTORE_DIR.exists():
    # Remove old vectorstore files to start fresh
    shutil.rmtree(VECTORSTORE_DIR)
    print("Deleted old FAISS vectorstores.")

# Recreate the directory
VECTORSTORE_DIR.mkdir(exist_ok=True)

🧹 Deleted old FAISS vectorstores.


In [None]:
def df_to_docs(df, category):
    # Define chunk size and overlap per category
    chunk_cfg = {
        "people": (700, 120),
        "Cities": (300, 50),
        "projects": (500, 80),
    }
    size, overlap = chunk_cfg.get(category, (500, 80))

    # Create a text splitter
    splitter = RecursiveCharacterTextSplitter(chunk_size=size, chunk_overlap=overlap)

    docs = []
    # Iterate through each row of the DataFrame
    for _, r in df.iterrows():
        title, content = str(r["title"]), str(r["content"])
        # Split content into chunks
        for c in splitter.split_text(content):
            # Create a Document with metadata
            docs.append(Document(page_content=c, metadata={"title": title, "category": category}))
    return docs

# Convert all datasets into documents by category
docs_by_cat = {cat: df_to_docs(df, cat) for cat, df in datasets.items() if df is not None}

In [None]:
# Initialize embeddings model from HuggingFace
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)

def build_or_load_vs(cat, docs):
    """
    Build a FAISS vectorstore from documents or load it if already exists.

    Parameters:
    - cat: category name (used as folder name for storage)
    - docs: list of LangChain Document objects to index

    Returns:
    - FAISS vectorstore for the given category
    """

    path = VECTORSTORE_DIR / cat  # path to save/load FAISS index

    if path.exists():
        # Load existing FAISS index
        print(f"[Load FAISS] {cat}")
        vs = FAISS.load_local(str(path), embeddings, allow_dangerous_deserialization=True)
    else:
        # Build new FAISS index from documents
        print(f"[Build FAISS] {cat} ({len(docs)} docs)")
        vs = FAISS.from_documents(docs, embeddings)
        # Save for future use
        vs.save_local(str(path))
        print(f"[Saved] {path}")

    return vs

# Build or load FAISS vectorstores for all document categories
vectorstores = {cat: build_or_load_vs(cat, docs) for cat, docs in docs_by_cat.items()}

  embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/387 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/690 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/418 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/201 [00:00<?, ?B/s]

[⚙️ Build FAISS] people (1010 docs)
[💾 Saved] vectorstores_local/people
[⚙️ Build FAISS] projects (174 docs)
[💾 Saved] vectorstores_local/projects
[⚙️ Build FAISS] Cities (189 docs)
[💾 Saved] vectorstores_local/Cities


In [None]:
def load_local_llm(model_id=LLM_MODEL_ID, fallback_id=FALLBACK_MODEL_ID):
    try:
        # Attempt to load primary model
        print(f"Loading LLM: {model_id}")
        tok = AutoTokenizer.from_pretrained(model_id, token=os.environ["HUGGINGFACE_TOKEN"])
        mdl = AutoModelForCausalLM.from_pretrained(
            model_id,
            token=os.environ["HUGGINGFACE_TOKEN"],
            device_map="auto",
            torch_dtype="auto",
            load_in_4bit=True
        )
    except Exception as e:
        # Fallback to smaller/lightweight model if primary fails
        print(f"[Fallback to {fallback_id}] Reason:", e)
        tok = AutoTokenizer.from_pretrained(fallback_id)
        mdl = AutoModelForCausalLM.from_pretrained(fallback_id, device_map="auto", torch_dtype="auto")

    # Wrap the model in a HuggingFace text-generation pipeline
    gen = pipeline(
        "text-generation",
        model=mdl,
        tokenizer=tok,
        max_new_tokens=256,
        temperature=0.2
    )

    return HuggingFacePipeline(pipeline=gen)

# Load the LLM
llm = load_local_llm()

🚀 Loading LLM: mistralai/Mistral-7B-Instruct-v0.2


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Device set to use cuda:0
  return HuggingFacePipeline(pipeline=gen)


In [None]:
# Define a standardized prompt template for RAG
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template=(
        "Answer the following question based ONLY on the provided context.\n"
        "If the context does not contain the answer, say 'Not enough information.'\n\n"
        "Context:\n{context}\n\n"
        "Question: {question}\n\n"
        "Answer clearly and concisely:"
    ),
)

# Build Retrieval-Augmented Generation (RAG) systems for each category
rag_systems = {}

for cat, vs in vectorstores.items():
    # Create a retriever using top-k relevant documents (k=4)
    retriever = vs.as_retriever(search_kwargs={"k": 4})

    # Wrap the retriever with the LLM using RetrievalQA
    rag = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",  # Simple chain that concatenates retrieved docs
        chain_type_kwargs={
            "prompt": prompt_template,           # Use our custom prompt
            "document_variable_name": "context"  # Variable in prompt representing docs
        },
    )

    # Store the RAG system for the current category
    rag_systems[cat] = rag
    print(f"[RAG ready] {cat}")

[🤖 RAG ready] people
[🤖 RAG ready] projects
[🤖 RAG ready] Cities


In [None]:
# Define brief metadata descriptions for each category
CATEGORY_METADATA = {
    "people": "Biographies and achievements of Saudi figures like King Abdulaziz, King Salman, Crown Prince Mohammed bin Salman, and others.",
    "projects": "Vision 2030 projects such as NEOM, Qiddiya, ROSHN, Riyadh Art, The Line.",
    "Cities": "Information about Saudi cities and landmarks such as Riyadh, Jeddah, Abha, AlUla, Taif."
}

# Precompute embeddings for each category metadata for semantic routing
category_embeddings = {k: embeddings.embed_query(v) for k, v in CATEGORY_METADATA.items()}

# Function to route a query to the appropriate category based on semantic similarity
def route_query_semantic_local(query: str, threshold=0.18):
    # Embed the user query
    q_vec = embeddings.embed_query(query)

    # Compute cosine similarity between query and each category embedding
    sims = {c: float(cosine_similarity([q_vec], [v])[0][0]) for c, v in category_embeddings.items()}
    print("Similarities:", sims)

    # Pick category with highest similarity
    best = max(sims, key=sims.get)

    # Fallback heuristics if similarity is below threshold
    if sims[best] < threshold:
        ql = query.lower()
        if any(x in ql for x in ["who", "king", "prince", "mohammed", "bin salman"]):
            return "people"
        if any(x in ql for x in ["project", "vision", "initiative"]):
            return "projects"
        if any(x in ql for x in ["city", "visit", "where", "travel"]):
            return "Cities"

    # Return the best category based on similarity
    return best

# Function to clean the LLM output by removing the prompt echo
def clean_echo(raw: str):
    t = re.sub(r"(?s).*Answer (clearly|concisely|directly).*?:", "", raw).strip()
    return re.sub(r"\s+", " ", t).strip()

#### RAG Evalution

In [None]:
# Load an open-source model to evaluate the answers generated by RAG
EVAL_MODEL_ID = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(EVAL_MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(EVAL_MODEL_ID, device_map="auto", torch_dtype="auto")

# Create a text-generation pipeline using the evaluation model
eval_pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    temperature=0.2  # Low randomness for stable evaluation outputs
)

# Function: evaluate_answer_multi_metric
def evaluate_answer_multi_metric(question, answer):
    """
    Evaluate an answer on four metrics:
    Accuracy, Relevance, Clarity, Consistency.
    Returns a dictionary with values between 0 and 1 for each metric.
    """
    # Create a prompt to instruct the evaluation model
    prompt = f"""
You are an intelligent evaluator. Evaluate the following RAG model answer
on four metrics: Accuracy, Relevance, Clarity, Consistency.
Score each metric from 0 to 1 (1=excellent, 0=poor).

Question: {question}
Answer: {answer}

Return the results ONLY as a Python dict, for example:
{{'Accuracy': 0.9, 'Relevance': 0.8, 'Clarity': 1.0, 'Consistency': 0.95}}
"""

    # Generate evaluation using the model
    output = eval_pipe(prompt)[0]["generated_text"]

    # Attempt to extract a Python dict from the generated text
    try:
        match = re.search(r"\{.*\}", output, re.DOTALL)
        if match:
            metrics = ast.literal_eval(match.group(0))
        else:
            metrics = {}
    except Exception:
        metrics = {}

    # Ensure all metric values are floats between 0 and 1
    clean_metrics = {}
    for key in ["Accuracy", "Relevance", "Clarity", "Consistency"]:
        val = metrics.get(key, 0)
        try:
            clean_metrics[key] = float(val)
        except:
            clean_metrics[key] = 0.0

    return clean_metrics

# Function: evaluate_prompts_csv
def evaluate_prompts_csv(prompts_csv="prompts.csv", results_csv="rag_evaluation_results.csv"):

    # Load the CSV file containing evaluation questions
    df_prompts = pd.read_csv(prompts_csv)
    results = []

    for idx, row in df_prompts.iterrows():
        q = row['Question'].strip()
        if not q:
            continue  # Skip empty questions

        print(f"\nEvaluating Question {idx+1}: {q}")

        start_time = time.time()  # Track elapsed time

        # Determine the RAG category for this question
        cat = route_query_semantic_local(q)
        rag = rag_systems.get(cat)
        if not rag:
            print("No RAG for this category. Skipping...")
            continue

        # Retrieve top relevant documents for the question
        docs = rag.retriever.invoke(q)
        top_docs = docs[:3]  # Take top 3 documents
        context = "\n\n".join([f"Source {i+1}:\n{d.page_content}" for i, d in enumerate(top_docs)])

        # Generate answer using the RAG model
        raw = rag.invoke({"context": context, "query": q})
        answer = clean_echo(raw["result"] if isinstance(raw, dict) and "result" in raw else str(raw))

        elapsed = time.time() - start_time  # Compute elapsed time

        # Evaluate the generated answer using the evaluation model
        metrics = evaluate_answer_multi_metric(q, answer)

        # Save results for this question
        result_row = {
            "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "Question": q,
            "RAG_Answer": answer,
            "Accuracy": metrics["Accuracy"],
            "Relevance": metrics["Relevance"],
            "Clarity": metrics["Clarity"],
            "Consistency": metrics["Consistency"],
            "Time_sec": round(elapsed, 2)
        }
        results.append(result_row)

        # Print a quick summary for this question
        print(f"Done: Accuracy={metrics['Accuracy']}, Relevance={metrics['Relevance']}, "
              f"Clarity={metrics['Clarity']}, Consistency={metrics['Consistency']}, Time={round(elapsed,2)}s")

    # Save all evaluation results to CSV
    df_results = pd.DataFrame(results)
    df_results.to_csv(results_csv, index=False)
    print(f"\nAll results saved to {results_csv}")

    # Compute and print average metrics across all questions
    avg_metrics = df_results[["Accuracy", "Relevance", "Clarity", "Consistency", "Time_sec"]].mean()
    print("\nTotal Evaluation Summary:")
    print(f"Accuracy: {avg_metrics['Accuracy']:.2f}")
    print(f"Relevance: {avg_metrics['Relevance']:.2f}")
    print(f"Clarity: {avg_metrics['Clarity']:.2f}")
    print(f"Consistency: {avg_metrics['Consistency']:.2f}")
    print(f"Average Time per Question: {avg_metrics['Time_sec']:.2f} sec")

In [None]:
evaluate_prompts_csv()

#### User Prompts

In [None]:
# Path to the CSV file for saving RAG answers and evaluation metrics
csv_file = "rag_evaluation_results.csv"

# This ensures we have a file to save answers and metrics
if not Path(csv_file).exists():
    with open(csv_file, mode='w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        # Write the header row
        writer.writerow(["Timestamp", "Question", "RAG_Answer", "Accuracy", "Relevance", "Clarity", "Consistency"])

# Function: ask_loop
def ask_loop():
    print("Smart local RAG ready. Type 'exit' to quit.\n")

    while True:
        # Get user input
        q = input("Ask: ").strip()
        if q.lower() in ("exit", "quit"):
            break  # Exit the loop if user types 'exit' or 'quit'

        # Route the question to the correct RAG category based on semantic similarity
        cat = route_query_semantic_local(q)
        print(f"Routed to: {cat}")

        # Get the RAG system for this category
        rag = rag_systems.get(cat)
        if not rag:
            print("No RAG for this category.")
            continue  # Skip if no RAG system exists for this category

        # Retrieve top documents relevant to the question
        retriever = rag.retriever
        docs = retriever.invoke(q)
        top_docs = docs[:3]  # Use top 3 chunks for context
        context = "\n\n".join([f"Source {i+1}:\n{d.page_content}" for i, d in enumerate(top_docs)])
        print(f" Used {len(top_docs)} chunks for context.")

        # Generate the answer using RAG
        raw = rag.invoke({"context": context, "query": q})
        answer = clean_echo(raw["result"] if isinstance(raw, dict) and "result" in raw else str(raw))

        # Evaluate the RAG answer using multiple metrics
        evaluation = evaluate_answer_multi_metric(q, answer)

        # Print the question, RAG answer, and evaluation metrics
        print(f"\nQuestion: {q}")
        print(f"RAG Answer: {answer}")
        print("Multi-metric Evaluation:")
        for k, v in evaluation.items():
            print(f"  {k}: {v}")
        print("=" * 60)

        # Append the question, answer, and metrics to the CSV file
        with open(csv_file, mode='a', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow([
                datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                q,
                answer,
                evaluation["Accuracy"],
                evaluation["Relevance"],
                evaluation["Clarity"],
                evaluation["Consistency"]
            ])

In [None]:
ask_loop()

✅ Smart local RAG ready. Type 'exit' to quit.

Ask: what do you know about Alsubaie palace


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


📊 Similarities: {'people': 0.796885505660319, 'projects': 0.7508827942084983, 'Cities': 0.8040225791469391}
🧭 Routed to: Cities
🧩 Used 3 chunks for context.

💡 Question: what do you know about Alsubaie palace
💬 Answer: Not enough information. Alsubaie palace is not mentioned in the context.

Ask: is there any place i can go in shaqra


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


📊 Similarities: {'people': 0.7394240612783167, 'projects': 0.7554180490836482, 'Cities': 0.7713743332819035}
🧭 Routed to: Cities
🧩 Used 3 chunks for context.

💡 Question: is there any place i can go in shaqra
💬 Answer: Not enough information. The context mentions Jabal Shada Reserve, Narcissus Resort Obhur, Kapsarc Mosque, and Khairah Forest Park, but it does not mention Shaqra as a location.

Ask: new murabba


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


📊 Similarities: {'people': 0.7555939697534112, 'projects': 0.759676115567629, 'Cities': 0.7609905613857422}
🧭 Routed to: Cities
🧩 Used 3 chunks for context.

💡 Question: new murabba
💬 Answer: Not enough information. The context mentions Murabba Palace, Boulevard Al Baha, Anaab Farm, and Al Aaqaba View, but it does not provide any information about a new Murabba.

Ask: who is the king of saudi arabia


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


📊 Similarities: {'people': 0.8223720548847431, 'projects': 0.7457642511153758, 'Cities': 0.7841969844760309}
🧭 Routed to: people
🧩 Used 3 chunks for context.

💡 Question: who is the king of saudi arabia
💬 Answer: King Salman Bin Abdulaziz Bin Abdulrahman Bin Faisal Bin Turki Bin Abdullah Bin Mohammed Bin Saud is the King of Saudi Arabia.

Ask: where can i go in jeddah


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


📊 Similarities: {'people': 0.7836238274281048, 'projects': 0.8023020152219991, 'Cities': 0.8277190984525676}
🧭 Routed to: Cities
🧩 Used 3 chunks for context.

💡 Question: where can i go in jeddah
💬 Answer: You can go for a walk in Jeddah, drive on Jeddah Circuit, enjoy the Jeddah Sea Breacher, or go scuba diving with Jeddah TEC Divers.

Ask: how many beaches in jeddah


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


📊 Similarities: {'people': 0.7500118470338937, 'projects': 0.7675152125373956, 'Cities': 0.8009758080976365}
🧭 Routed to: Cities
🧩 Used 3 chunks for context.

💡 Question: how many beaches in jeddah
💬 Answer: Not enough information. The context mentions Oia Beach Resort, but it does not indicate how many beaches there are in Jeddah as a whole.

Ask: exit


In [None]:
# Load the CSV file with RAG evaluation results
csv_file = "rag_evaluation_results.csv"
df = pd.read_csv(csv_file)

# Ensure evaluation columns are numeric, coerce invalid entries to NaN
for col in ["Accuracy", "Relevance", "Clarity", "Consistency"]:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# Calculate average scores for each metric
avg_scores = df[["Accuracy", "Relevance", "Clarity", "Consistency"]].mean()
# Count total number of evaluation records
total_records = len(df)
# Calculate overall performance score (average of all metrics, scaled to 100)
overall_score = avg_scores.mean() * 100

#  Print a summary dashboard of RAG performance
print("RAG Evaluation Dashboard")
print(f"Total Evaluations: {total_records}")
print("\nAverage Scores:")
for k, v in avg_scores.items():
    print(f"   {k}: {v:.2f}")
print(f"\nOverall Performance Score: {overall_score:.1f}/100")

# If response time is available, calculate average, fastest, and slowest
if "Time_sec" in df.columns:
    avg_time = df["Time_sec"].mean()
    print(f" Average Response Time: {avg_time:.2f} seconds")
    fastest = df["Time_sec"].min()
    slowest = df["Time_sec"].max()
    print(f"   Fastest: {fastest:.2f}s | Slowest: {slowest:.2f}s")

# Visualization of average scores per metric
plt.figure(figsize=(6,4))
avg_scores.plot(
    kind="bar",
    color=["skyblue", "lightgreen", "gold", "salmon"]  # Assign colors per metric
)
plt.title("Average RAG Evaluation Metrics")
plt.ylabel("Score (0-1)")
plt.xticks(rotation=45)
plt.ylim(0, 1)
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

In [None]:
# Save the important components of the RAG system to a file
with open("final_rag.pkl", "wb") as f:
    pickle.dump({
        "rag_systems": rag_systems,
        "route_query_semantic_local": route_query_semantic_local,
        "clean_echo": clean_echo
        }, f)

# Notify user that the RAG system has been saved
print("RAG model saved as final_rag.pkl")

✅ RAG model saved as final_rag.pkl


In [None]:
# Path to the saved RAG pickle file
file_path = "/content/final_rag.pkl"

if not os.path.exists(file_path):
    raise FileNotFoundError(f"File not found: {file_path}")

files.download(file_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
def record_audio_js():
    display(Javascript('''
    const sleep  = time => new Promise(resolve => setTimeout(resolve, time))
    const record = time => new Promise(async resolve => {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
      const recorder = new MediaRecorder(stream)
      const data = []
      recorder.ondataavailable = event => data.push(event.data)
      recorder.start()
      await sleep(time)
      recorder.stop()
      await new Promise(resolve => recorder.onstop = resolve)
      const blob = new Blob(data, { type: 'audio/wav' })
      const arrayBuffer = await blob.arrayBuffer()
      const base64 = btoa(String.fromCharCode(...new Uint8Array(arrayBuffer)))
      google.colab.kernel.invokeFunction('notebook.get_recording', [base64], {})
    })
    record(5000)   // Recording duration in milliseconds (5 seconds)
    '''))

# Global variable to store recorded audio
recorded_audio = None

# Callback function to receive the audio from JS
def get_recording(b64_audio):
    global recorded_audio
    # Convert base64 string to BytesIO object
    recorded_audio = io.BytesIO(b64decode(b64_audio))
    print("Recording captured successfully")

# Register the callback in Colab
output.register_callback('notebook.get_recording', get_recording)

In [None]:
record_audio_js()

<IPython.core.display.Javascript object>

🎙️ Recording captured successfully!


In [None]:
# Function to convert recorded audio to text using AssemblyAI (aai) transcriber
def speech_to_text():
    # Check if a recording exists
    if recorded_audio is None:
        print("No recording detected.")
        return ""

    # Initialize the transcriber
    transcriber = aai.Transcriber()

    # Transcribe the recorded audio
    transcript = transcriber.transcribe(recorded_audio)

    # Extract text and remove leading/trailing spaces
    text = transcript.text.strip() if transcript.text else ""

    # Print the transcription
    print(f"You said: {text}")
    return text

# Example usage: convert the recorded audio to text
q = speech_to_text()

🗣️ You said: Who is the king of Saudi Arabia?


In [None]:
# Function to get an answer from the RAG system for a given question
def rag_answer(q: str):
    # Determine the most relevant category for the question
    cat = route_query_semantic_local(q)

    # Retrieve the RAG system for that category
    rag = rag_systems.get(cat)
    if not rag:
        return "Sorry, I don’t have data for that category."

    # Use the retriever to get relevant documents
    retriever = rag.retriever
    docs = retriever.invoke(q)

    # Take the top 3 documents as context
    top_docs = docs[:3]
    context = "\n\n".join([f"Source {i+1}:\n{d.page_content}" for i, d in enumerate(top_docs)])

    # Generate the raw RAG answer using the LLM
    raw = rag.invoke({"context": context, "query": q})

    # Clean the output to remove extra prompts or prefixes
    answer = clean_echo(raw["result"] if isinstance(raw, dict) and "result" in raw else str(raw))

    # Return the final answer
    return answer

In [None]:
# Function to get an answer from the RAG system using a voice query
def rag_answer_voice(query):
    if not query:
        return  # Return if query is empty

    # Route the question to the most relevant category
    cat = route_query_semantic_local(query)
    print(f"Routed to: {cat}")

    # Retrieve the RAG system for that category
    rag = rag_systems.get(cat)
    if not rag:
        print("No RAG for this category.")
        return

    # Retrieve relevant documents from the RAG retriever
    retriever = rag.retriever
    docs = retriever.invoke(query)

    # Take top 3 documents as context
    context = "\n\n".join([
        f"Source {i+1}:\n{d.page_content}" for i, d in enumerate(docs[:3])
    ])

    # Generate raw answer from the LLM
    raw = rag.invoke({"context": context, "query": query})

    # Clean the answer to remove extra text
    answer = clean_echo(raw["result"] if isinstance(raw, dict) and "result" in raw else str(raw))

    # Print and return the answer
    print(f"{answer}")
    return answer


# Function to convert text into speech and play it
def text_to_speech(text):
    tts = gTTS(text=text, lang="en")
    tts.save("response.mp3")
    display(Audio("response.mp3", autoplay=True))

In [None]:
record_audio_js()

<IPython.core.display.Javascript object>

🎙️ Recording captured successfully!


In [None]:
# Convert the user's recorded speech into text
q = speech_to_text()

# Get an answer from the RAG system based on the transcribed text
answer = rag_answer_voice(q)

# Convert the generated answer to speech and play it
text_to_speech(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


🗣️ You said: Who is the king of Saudi Arabia?
📊 Similarities: {'people': 0.8329609534975975, 'projects': 0.7529228596279827, 'Cities': 0.8050122410591485}
🧭 Routed to: people
💬 King Salman Bin Abdulaziz Bin Abdulrahman Bin Faisal Bin Turki Bin Abdullah Bin Mohammed Bin Saud is the current king of Saudi Arabia.


In [None]:
print(rag_answer("Tell me about NEOM"))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


📊 Similarities: {'people': 0.7583109141727007, 'projects': 0.8135794030792883, 'Cities': 0.7643217067028774}
NEOM is a futuristic city project in northwest Saudi Arabia, launched in 2017 by Crown Prince Mohammed bin Salman, aimed at transforming the desert into a smart, sustainable city powered entirely by renewable energy. The project includes major developments like The Line, Oxagon, Trojena, and Sindalah, and envisions a new model for sustainable living, work, and prosperity. Oxagon, a floating coastal city, connects residents and businesses to global markets and features an integrated port and logistics hub. Trojena will host global events and festivals, and NEOM represents Saudi Arabia's commitment to a sustainable, technologically advanced future.


In [None]:
record_audio_js()

<IPython.core.display.Javascript object>

🎙️ Recording captured successfully!


In [None]:
q = speech_to_text()
answer = rag_answer_voice(q)
text_to_speech(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


🗣️ You said: Where can I go in jeddah?
📊 Similarities: {'people': 0.7769664048176991, 'projects': 0.7906762612081156, 'Cities': 0.8167826588621483}
🧭 Routed to: Cities
💬 You can go for a walk in Jeddah, drive around Jeddah Circuit, enjoy the Jeddah Sea Breacher, or go scuba diving with Jeddah TEC Divers.
