In [None]:
pip install sentence_transformers

Collecting sentence_transformers
  Downloading sentence_transformers-3.0.0-py3-none-any.whl (224 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/224.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.7/224.7 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.11.0->sentence_transformers)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-no

In [None]:
import pandas as pd
import numpy as np
import torch  # Import torch for device handling
from sentence_transformers import SentenceTransformer, util
import pickle
import os

  from tqdm.autonotebook import tqdm, trange


In [None]:
df1 = pd.read_parquet("/content/arcd_train.parquet")
df2 = pd.read_parquet("/content/arcd_validation.parquet")
df = pd.concat([df1, df2])

In [None]:
# Models to Benchmark
models = [
    # "paraphrase-multilingual-mpnet-base-v2",
    # "all-mpnet-base-v2",
    # "all-distilroberta-v1",
    # "multi-qa-distilbert-cos-v1",
    # "all-MiniLM-L12-v2",
    # "all-MiniLM-L6-v2",
    # "multi-qa-MiniLM-L6-cos-v1",
    # "paraphrase-albert-small-v2",
    # "paraphrase-multilingual-MiniLM-L12-v2",
    # "paraphrase-MiniLM-L3-v2",
    # "distiluse-base-multilingual-cased-v1",
    # "distiluse-base-multilingual-cased-v2",
    # "OmarAlsaabi/e5-base-mlqa-finetuned-arabic-for-rag",
    "intfloat/multilingual-e5-large"
]

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")  # Print the device used

Using device: cuda


In [None]:
results = []
for model_name in models:
    # Load Model
    model = SentenceTransformer(model_name).to(device)

    # Encode Questions and Contexts
    question_embeddings = model.encode(df['question'].tolist(), convert_to_tensor=True, batch_size=32, device=device)
    context_embeddings = model.encode(df['context'].tolist(), convert_to_tensor=True, batch_size=32, device=device)
    question_embeddings = question_embeddings.cpu()
    context_embeddings = context_embeddings.cpu()

    # Calculate Similarity Scores
    cosine_scores = util.cos_sim(question_embeddings, context_embeddings)

    ranks = []
    reciprocal_ranks = []
    hit_at_1 = 0
    hit_at_3 = 0
    hit_at_5 = 0
    hit_at_10 = 0
    hit_at_20 = 0
    hit_at_50 = 0

    for i in range(len(cosine_scores)):
        gold_idx = i
        rank = (cosine_scores[i] >= cosine_scores[i, gold_idx]).sum()
        ranks.append(rank)
        reciprocal_ranks.append(1/rank if rank > 0 else 0)

        if rank == 1:
            hit_at_1 += 1
        if rank <= 3:
            hit_at_3 += 1
        if rank <= 5:
            hit_at_5 += 1
        if rank <= 10:
            hit_at_10 += 1
        if rank <= 20:
            hit_at_20 += 1
        if rank <= 50:
            hit_at_50 += 1

    # Calculate Metrics
    mr = np.mean(ranks)
    mrr = np.mean(reciprocal_ranks)
    hit_at_1_ratio = hit_at_1 / len(cosine_scores)
    hit_at_3_ratio = hit_at_3 / len(cosine_scores)
    hit_at_5_ratio = hit_at_5 / len(cosine_scores)
    hit_at_10_ratio = hit_at_10 / len(cosine_scores)
    hit_at_20_ratio = hit_at_20 / len(cosine_scores)
    hit_at_50_ratio = hit_at_50 / len(cosine_scores)

    result = {"model": model_name, "MR": mr, "MRR": mrr,
                   "Hit@1": hit_at_1_ratio, "Hit@3": hit_at_3_ratio, "Hit@5": hit_at_5_ratio, "Hit@10": hit_at_10_ratio, "Hit@20": hit_at_20_ratio, "Hit@50": hit_at_50_ratio}
    print(result)
    results.append(result)

{'model': 'intfloat/multilingual-e5-large', 'MR': 10.329749103942651, 'MRR': 0.2670758, 'Hit@1': 0.0035842293906810036, 'Hit@3': 0.6774193548387096, 'Hit@5': 0.6817204301075269, 'Hit@10': 0.8982078853046594, 'Hit@20': 0.9448028673835125, 'Hit@50': 0.9670250896057347}


KeyboardInterrupt: 

In [None]:
# Display Results
results_df = pd.DataFrame(results).sort_values(by="MRR", ascending=False)
print(results_df)

                                   model          MR       MRR     Hit@1  \
0  paraphrase-multilingual-mpnet-base-v2   51.061649  0.173011  0.002151   
3             multi-qa-distilbert-cos-v1  573.710394  0.016607  0.000000   
4                      all-MiniLM-L12-v2  539.965591  0.015943  0.000000   
6              multi-qa-MiniLM-L6-cos-v1  645.935484  0.011622  0.000000   
1                      all-mpnet-base-v2  607.413620  0.011329  0.000000   
5                       all-MiniLM-L6-v2  530.260215  0.010086  0.000000   
2                   all-distilroberta-v1  664.854480  0.006730  0.000000   

      Hit@3     Hit@5  
0  0.375627  0.377061  
3  0.020789  0.021505  
4  0.017921  0.017921  
6  0.014337  0.014337  
1  0.012903  0.012903  
5  0.007885  0.007885  
2  0.003584  0.003584  


In [None]:
# Display Results
results_df = pd.DataFrame(results).sort_values(by="MRR", ascending=False)
print(results_df)

                                   model          MR       MRR     Hit@1  \
3   distiluse-base-multilingual-cased-v1   54.482437  0.178481  0.001434   
4   distiluse-base-multilingual-cased-v2   59.876703  0.169888  0.001434   
1  paraphrase-multilingual-MiniLM-L12-v2   82.306093  0.154329  0.000717   
2                paraphrase-MiniLM-L3-v2  446.967742  0.024096  0.000000   
0             paraphrase-albert-small-v2  684.392115  0.007455  0.000000   

      Hit@3     Hit@5  
3  0.380645  0.382079  
4  0.362007  0.364875  
1  0.326882  0.326882  
2  0.020789  0.020789  
0  0.007168  0.007168  


In [None]:
# Display Results
results_df = pd.DataFrame(results).sort_values(by="MRR", ascending=False)
print(results_df)