<a href="https://colab.research.google.com/github/ahmedsaalman/low-resource-rag-comparison/blob/main/Urdu_RAG_Final_Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 1: Environment Setup & Configuration
# ==========================================
# This cell installs all required libraries and mounts Google Drive.
# It uses %%capture to keep the output clean for presentation.

%%capture
print("🚀 Initializing the Urdu RAG Environment...")

# 1. Install Libraries
# - transformers/bitsandbytes/accelerate: For running the Qwen LLM
# - sentence-transformers/faiss-cpu: For the Retrieval system
# - ipywidgets: For the interactive demo interface
!pip install -q --upgrade transformers bitsandbytes accelerate sentence-transformers faiss-cpu ipywidgets

# 2. Mount Google Drive
# We need this to load your fine-tuned Dense Retriever and your Corpus file.
from google.colab import drive
drive.mount('/content/drive')

# 3. Import Libraries
import torch
import time
import json
import os
import faiss
import ipywidgets as widgets
from IPython.display import display, Markdown
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from sentence_transformers import SentenceTransformer

# 4. Define File Paths (Config)
# These point to the files you created in your training notebook.
DENSE_MODEL_PATH = "/content/drive/MyDrive/models/urdu_dense_retriever_best"
CORPUS_PATH = "/content/drive/MyDrive/data/urdu_covid_passages_min.jsonl"
GENERATOR_ID = "Qwen/Qwen2.5-7B-Instruct"  # We use the Base model as it performed best (85+ BLEU)

print("✅ Environment Ready! Proceed to Cell 2.")

In [None]:
from google.colab import output
output.enable_custom_widget_manager()

In [None]:
# Cell 2: Load Models, Build Index & Define Logic
# ===============================================
print("⏳ Loading RAG System... This may take 2-3 minutes.")

# --- PART A: Load the Retriever ---
# We try to load your fine-tuned model. If missing, we fallback to the base model.
if os.path.exists(DENSE_MODEL_PATH):
    print(f"   📂 Loading Fine-Tuned Retriever from: {DENSE_MODEL_PATH}")
    embedder = SentenceTransformer(DENSE_MODEL_PATH).to("cuda")
else:
    print("   ⚠️ Fine-tuned model not found. Loading Base Retriever (Fallback).")
    embedder = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2").to("cuda")

# --- PART B: Load Data & Build Index ---
print("   🏗️ Building Search Index (FAISS)...")
corpus_texts = []
corpus_ids = []

# Check if corpus exists before loading
if os.path.exists(CORPUS_PATH):
    with open(CORPUS_PATH, "r", encoding="utf-8") as f:
        for line in f:
            item = json.loads(line)
            corpus_texts.append(item["text"])
            corpus_ids.append(item["id"])

    # Create embeddings for the search engine
    passage_embeddings = embedder.encode(corpus_texts, convert_to_numpy=True, show_progress_bar=False)

    # Initialize FAISS (Vector Database)
    faiss.normalize_L2(passage_embeddings)
    index = faiss.IndexFlatIP(passage_embeddings.shape[1])
    index.add(passage_embeddings)
    print(f"      - Indexed {len(corpus_texts)} documents.")
else:
    raise FileNotFoundError(f"❌ Critical Error: Corpus file not found at {CORPUS_PATH}")

# --- PART C: Load the Generator (Qwen 2.5) ---
print(f"   🧠 Loading Generator: {GENERATOR_ID} (4-bit)...")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)
tokenizer = AutoTokenizer.from_pretrained(GENERATOR_ID)
model = AutoModelForCausalLM.from_pretrained(
    GENERATOR_ID, quantization_config=bnb_config, device_map="auto"
)

# --- PART D: Define the RAG Function ---
def ask_ur_rag(query):
    """
    1. Retrieve relevant docs using the Dense Retriever.
    2. Format a prompt for the Qwen model.
    3. Generate the answer in Urdu.
    """
    # 1. Retrieval
    q_emb = embedder.encode([query], convert_to_numpy=True)
    faiss.normalize_L2(q_emb)
    D, I = index.search(q_emb, 3) # Get Top 3 Documents

    # Fetch text for the indices found
    docs = [(corpus_ids[i], corpus_texts[i]) for idx, i in enumerate(I[0])]

    # Create Context String
    context = "\n".join([f"- {d[1]}" for d in docs])

    # 2. Prompt Engineering
    sys_prompt = "آپ ایک ماہر ڈاکٹر ہیں۔ نیچے دی گئی معلومات کی بنیاد پر سوال کا اردو میں جواب دیں۔"
    user_prompt = f"معلومات:\n{context}\n\nسوال: {query}"

    messages = [
        {"role": "system", "content": sys_prompt},
        {"role": "user", "content": user_prompt}
    ]

    # 3. Generation
    text_input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([text_input], return_tensors="pt").to("cuda")

    with torch.no_grad():
        generated_ids = model.generate(
            **model_inputs,
            max_new_tokens=256,
            temperature=0.3, # Low temperature for factual consistency
            do_sample=True
        )

    # Decode and clean response
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    # Extract only the assistant's response (remove the prompt)
    if "assistant" in response:
        response = response.split("assistant")[-1].strip()

    return response, docs

print("✅ System Fully Operational! Ready for Demo.")

⏳ Loading RAG System... This may take 2-3 minutes.
   📂 Loading Fine-Tuned Retriever from: /content/drive/MyDrive/models/urdu_dense_retriever_best
   🏗️ Building Search Index (FAISS)...
      - Indexed 60 documents.
   🧠 Loading Generator: Qwen/Qwen2.5-7B-Instruct (4-bit)...


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.56G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/243 [00:00<?, ?B/s]

✅ System Fully Operational! Ready for Demo.


In [None]:
# Cell 3 (Backup): Simple Text Loop
# ================================
print("🩺 Urdu COVID-19 AI Assistant (Simple Mode)")
print("Type 'exit' to stop.")
print("-" * 40)

while True:
    print("\n👇 نیچے سوال لکھیں (Enter Question):")
    query = input() # Standard Python Input

    if query.lower() in ['exit', 'quit', 'x']:
        print("👋 Allah Hafiz!")
        break

    if not query.strip(): continue

    print(f"\n🤔 Thinking...")
    try:
        start = time.time()
        ans, docs = ask_ur_rag(query)

        print("\n" + "="*40)
        print(f"📢 جواب: {ans}")
        print("="*40)
        print(f"📚 حوالہ: {docs[0][1][:100]}...")
        print(f"⏱️ Time: {time.time()-start:.2f}s")

    except Exception as e:
        print(f"Error: {e}")

🩺 Urdu COVID-19 AI Assistant (Simple Mode)
Type 'exit' to stop.
----------------------------------------

👇 نیچے سوال لکھیں (Enter Question):
کورونا وائرس کی عام علامات کیا ہیں؟

🤔 Thinking...

📢 جواب: کورona وائرس کی عام علامات میں بخار، کھانسی اور سانس لینے میں دشواری شامل ہیں۔
📚 حوالہ: کورونا وائرس مرض 2019 (COVID-19) ایک متعدی بیماری ہے جس کی عام علامات میں بخار، کھانسی اور سانس لینے...
⏱️ Time: 6.06s

👇 نیچے سوال لکھیں (Enter Question):
ماسک کی کون سی اقسام ہیں؟

🤔 Thinking...

📢 جواب: مسک کی کئی اقسام ہیں، جن میں سے نومریں ہیں:

1. N95 ماسک
2. سرجیکل ماسک 
3. کپڑے کے ماسک

یہ اقسام مختلف طرز کے تحفظ کے لیے استعمال ہوتے ہیں۔
📚 حوالہ: ماسک پہننا، خاص طور پر جب عوامی جگہوں پر بھیڑ ہو، سانس کے ذریعے پھیلنے والے ذرات کے خطرے کو کم کرتا ...
⏱️ Time: 8.08s

👇 نیچے سوال لکھیں (Enter Question):
exit
👋 Allah Hafiz!


In [None]:
from google.colab import output
output.enable_custom_widget_manager()

Support for third party widgets will remain active for the duration of the session. To disable support:

In [None]:
from google.colab import output
output.disable_custom_widget_manager()

Support for third party widgets will remain active for the duration of the session. To disable support:

In [None]:
from google.colab import output
output.disable_custom_widget_manager()