## Running on Google Colab | Runtime type: GPU (T4)

In [None]:
!pip install -q datasets sentence-transformers


In [None]:
from datasets import load_dataset

# Load SQuAD v2 train
squad = load_dataset("squad_v2", split="train")

# Use 200 data samples
sample = squad.shuffle(seed=42).select(range(200))

queries = [q["question"] for q in sample]
contexts = [q["context"] for q in sample]


## e5 large - Speed Test & Embedding Shape

In [None]:
from sentence_transformers import SentenceTransformer
import time

model = SentenceTransformer("intfloat/e5-large")

start = time.time()
embeddings = model.encode(contexts, batch_size=32, show_progress_bar=True)
end = time.time()

print("e5-large shape:", embeddings.shape)
print("Time taken:", round(end-start, 2), "seconds")

## bge-m3 - Speed Test & Embedding Shape

In [None]:
model = SentenceTransformer("BAAI/bge-m3")

start = time.time()
embeddings = model.encode(contexts, batch_size=32, show_progress_bar=True)
end = time.time()

print("bge-m3 shape:", embeddings.shape)
print("Time taken:", round(end-start, 2), "seconds")

## all-mpnet-base-v2 - Speed Test & Embedding Shape

In [None]:
model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

start = time.time()
embeddings = model.encode(contexts, batch_size=32, show_progress_bar=True)
end = time.time()

print("mpnet shape:", embeddings.shape)
print("Time taken:", round(end-start, 2), "seconds")

## e5 large - Precision@k, Recall@k

In [None]:
import numpy as np
from sentence_transformers import SentenceTransformer, util
from datasets import load_dataset

# 1. Load a small sample of SQuAD v2
squad = load_dataset("squad_v2", split="train[:200]")  # 200 samples
queries = squad["question"]
contexts = squad["context"]

# 2. Pick model
model = SentenceTransformer("intfloat/e5-large")

# 3. Embed contexts
context_embeddings = model.encode(contexts, batch_size=32, convert_to_tensor=True, show_progress_bar=True)

# 4. Function: evaluate retrieval with Precision@k, Recall@k
def evaluate(model, queries, contexts, context_embeddings, k=5):
    precision_scores = []
    recall_scores = []

    for i, query in enumerate(queries):
        # Query embedding
        query_emb = model.encode(query, convert_to_tensor=True)

        # Cosine similarity search
        hits = util.semantic_search(query_emb, context_embeddings, top_k=k)[0]

        # Ground-truth context (the one that contains the answer text)
        gold_context = contexts[i]

        # Retrieved contexts
        retrieved_contexts = [contexts[hit["corpus_id"]] for hit in hits]

        # Precision@k: did we retrieve the correct context among top-k?
        correct_hits = sum([gold_context in rc for rc in retrieved_contexts])
        precision = correct_hits / k
        precision_scores.append(precision)

        # Recall@k: out of all possible gold contexts, did we cover?
        # (here 1 gold context only, so recall is just 1 if found, else 0)
        recall = 1.0 if gold_context in retrieved_contexts else 0.0
        recall_scores.append(recall)

    return np.mean(precision_scores), np.mean(recall_scores)

# 5. Run evaluation
precision, recall = evaluate(model, queries, contexts, context_embeddings, k=5)
print(f"Precision@5: {precision:.3f}")
print(f"Recall@5: {recall:.3f}")

## bge-m3 - Precision@k, Recall@k

In [None]:
from sentence_transformers import SentenceTransformer, util
import numpy as np

# Load model
model = SentenceTransformer("BAAI/bge-m3")

# Encode contexts and queries
context_embeds = model.encode(contexts, batch_size=32, convert_to_tensor=True, show_progress_bar=True)
query_embeds = model.encode(queries, batch_size=32, convert_to_tensor=True, show_progress_bar=True)

# Compute cosine similarity
scores = util.cos_sim(query_embeds, context_embeds)

# Precision@k and Recall@k calculation
def precision_recall_at_k(scores, ground_truths, k=5):
    precisions, recalls = [], []
    for i, row in enumerate(scores):
        top_k = row.topk(k).indices.cpu().numpy()
        # Assume the i-th context is the ground truth
        gt = [i]
        correct = len(set(top_k) & set(gt))
        precisions.append(correct / k)
        recalls.append(correct / len(gt))
    return np.mean(precisions), np.mean(recalls)

precision, recall = precision_recall_at_k(scores, queries, k=5)
print("bge-m3 Precision@5:", round(precision, 3))
print("bge-m3 Recall@5:", round(recall, 3))

## all-mpnet-base-v2 - Precision@k, Recall@k

In [None]:
from sentence_transformers import SentenceTransformer, util
import numpy as np

# Load model
model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

# Encode contexts and queries
context_embeds = model.encode(contexts, batch_size=32, convert_to_tensor=True, show_progress_bar=True)
query_embeds = model.encode(queries, batch_size=32, convert_to_tensor=True, show_progress_bar=True)

# Compute cosine similarity
scores = util.cos_sim(query_embeds, context_embeds)

# Precision@k and Recall@k calculation
def precision_recall_at_k(scores, ground_truths, k=5):
    precisions, recalls = [], []
    for i, row in enumerate(scores):
        top_k = row.topk(k).indices.cpu().numpy()
        # Assume the i-th context is the ground truth
        gt = [i]
        correct = len(set(top_k) & set(gt))
        precisions.append(correct / k)
        recalls.append(correct / len(gt))
    return np.mean(precisions), np.mean(recalls)

precision, recall = precision_recall_at_k(scores, queries, k=5)
print("mpnet Precision@5:", round(precision, 3))
print("mpnet Recall@5:", round(recall, 3))