## Run in Google Colab notebook: 08_rag_end_to_end.ipynb (Environment: CPU)


In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
project_root = "/content/drive/MyDrive/RAG Research"
os.chdir(project_root)
print(os.getcwd())

!pip install faiss-cpu transformers datasets sentence-transformers accelerate -q

In [None]:
import sys
import json
sys.path.append("/content/drive/MyDrive/RAG Research/src")

# 1.Import Retriever and Generator components
from src.retrieval.retriever import Retriever
from src.generation.generator import Generator

# 2. Initialize Retriever
retriever = Retriever(
    model_name="intfloat/e5-large",
    index_path="/content/drive/MyDrive/RAG Research/data/processed/hnsw_index.faiss",
    k=20
)

# 3. Initialize Generator
generator = Generator(model_name="google/flan-t5-base")

# 4. Run retrieval + generation
# Run a sample query to test full pipeline
query = "What is the capital of France?"
indices, distances = retriever.search(query)

# 5. Load contexts.json for retrieved data
with open("/content/drive/MyDrive/RAG Research/data/processed/contexts.json", "r") as f:
    contexts = json.load(f)

# 6. Combine top-k contexts
top_contexts = [contexts[i] for i in indices]
combined_context = " ".join(top_contexts[:3])

# 7.Generate final answer
answer = generator.generate_answer(query, combined_context)

print("\n============================")
print(f"Q: {query}")
print(f"A: {answer}")
print("============================")

## End-to-End Answer Accuracy

In [None]:
!pip install transformers datasets faiss-cpu sentence-transformers accelerate -q

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset
import torch, json
from tqdm import tqdm
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# 1️⃣ Load models
print("[INFO] Loading models...")
embedder = SentenceTransformer("intfloat/e5-large")
generator_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
generator_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base").to("cpu")
print("[INFO] Models ready")

# 2️⃣ Load dataset (use train split to align question and retrieval domain)
dataset = load_dataset("squad_v2", split="train[:200]")
contexts = list(set(dataset["context"]))
questions = dataset["question"]
answers = [a["text"] for a in dataset["answers"]]

# 3️⃣ Generate embeddings (for contexts only)
context_embeddings = embedder.encode(contexts, convert_to_numpy=True)
print(f"[INFO] Context embeddings shape: {context_embeddings.shape}")

# 4️⃣ Build HNSW index
d = context_embeddings.shape[1]
index = faiss.IndexHNSWFlat(d, 32)
index.hnsw.efConstruction = 200
index.add(context_embeddings)
index.hnsw.efSearch = 64
print(f"[INFO] HNSW built with {index.ntotal} items")

# 5️⃣ End-to-End evaluation
k = 20
correct = 0
total = len(questions)

for i in tqdm(range(total)):
    q = questions[i]
    q_emb = embedder.encode([q], convert_to_numpy=True)
    D, I = index.search(q_emb, k)
    top_ctx = " ".join([contexts[j] for j in I[0][:3]])

    # Prompt for generator
    prompt = f"Question: {q}\nContext: {top_ctx}\nAnswer:"
    inputs = generator_tokenizer(prompt, return_tensors="pt", truncation=True).to(generator_model.device)
    output = generator_model.generate(**inputs, max_new_tokens=80)
    pred = generator_tokenizer.decode(output[0], skip_special_tokens=True).lower()

    if any(a.lower() in pred for a in answers[i]):
        correct += 1

accuracy = correct / total
print(f"\n✅ Final Accuracy (E5-Large + HNSW + FLAN-T5-Base): {accuracy:.3f}")