In [None]:
# ✅ 1. Setup
!pip install -q faiss-cpu datasets
from google.colab import drive
drive.mount('/content/drive')

import faiss
import numpy as np
import time
from datasets import load_dataset

# Load precomputed embeddings (same as 06)
context_embeddings = np.load("/content/drive/MyDrive/RAG Research/embeddings/context_embeddings.npy")
question_embeddings = np.load("/content/drive/MyDrive/RAG Research/embeddings/question_embeddings.npy")


d = context_embeddings.shape[1]
print(f"Embedding dim: {d}, Contexts: {len(context_embeddings)}, Questions: {len(question_embeddings)}")

# Ground truth
dataset = load_dataset("squad_v2", split="train")
ground_truth = np.arange(len(dataset))

In [None]:
# ✅ 2. Evaluation function (accuracy + latency)
def evaluate_index(index, questions, ground_truth, k=10, n_trials=100):
    start = time.time()
    D, I = index.search(questions, k)
    latency = (time.time() - start) / len(questions)  # avg per query

    recall_at_k = np.mean([
        1 if ground_truth[i] in I[i] else 0
        for i in range(len(questions))
    ])

    mrr = np.mean([
        1 / (list(I[i]).index(ground_truth[i]) + 1)
        if ground_truth[i] in I[i] else 0
        for i in range(len(questions))
    ])

    return recall_at_k, mrr, latency

In [None]:
# ✅ 3. Different k values (e.g., 5, 10, 20)
k_values = [5, 10, 20]

# Flat Index (baseline)
index_flat = faiss.IndexFlatL2(d)
index_flat.add(context_embeddings)

for k in k_values:
    recall, mrr, latency = evaluate_index(index_flat, question_embeddings, ground_truth, k=k)
    print(f"[Flat][k={k}] Recall@{k}: {recall:.4f}, MRR: {mrr:.4f}, Latency: {latency*1000:.2f} ms/query")

In [None]:
# ✅ 4. IVFFlat (tuning nlist, nprobe)
nlist = 100  # number of clusters
quantizer = faiss.IndexFlatL2(d)
index_ivf = faiss.IndexIVFFlat(quantizer, d, nlist)
index_ivf.train(context_embeddings)
index_ivf.add(context_embeddings)

for nprobe in [1, 5, 10, 20]:
    index_ivf.nprobe = nprobe
    for k in k_values:
        recall, mrr, latency = evaluate_index(index_ivf, question_embeddings, ground_truth, k=k)
        print(f"[IVFFlat][nprobe={nprobe}, k={k}] Recall@{k}: {recall:.4f}, MRR: {mrr:.4f}, Latency: {latency*1000:.2f} ms/query")

In [None]:
# ✅ 5. HNSW (tuning efSearch)
M = 32
index_hnsw = faiss.IndexHNSWFlat(d, M)
index_hnsw.hnsw.efConstruction = 200
index_hnsw.add(context_embeddings)

for efSearch in [16, 32, 64, 128]:
    index_hnsw.hnsw.efSearch = efSearch
    for k in k_values:
        recall, mrr, latency = evaluate_index(index_hnsw, question_embeddings, ground_truth, k=k)
        print(f"[HNSW][efSearch={efSearch}, k={k}] Recall@{k}: {recall:.4f}, MRR: {mrr:.4f}, Latency: {latency*1000:.2f} ms/query")