In [1]:
%load_ext autoreload
%autoreload 2

import os
import gc
import torch
import faiss
import numpy as np
import pandas as pd
from datasets import load_dataset

from rag.embeddings import LocalEmbedder
from rag.utils import embed_biorag_datasets, precision_at_k, recall_at_k, mrr_at_k, ndcg_at_k, get_hit_flags, \
    get_metrics

doc_ds = load_dataset("rag-datasets/rag-mini-bioasq", "text-corpus")['passages']
query_ds = load_dataset("rag-datasets/rag-mini-bioasq", "question-answer-passages")['test']

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Precompute
doc_id_to_text = doc_ds.select_columns(['id', 'passage']).to_pandas().set_index('id')['passage'].to_dict()
index_to_doc_id = np.array(doc_ds['id'])
queries = np.array(query_ds['question'])

qrels = [np.array(eval(gold)) for gold in query_ds['relevant_passage_ids']]
qrels_counts = [len(s) for s in qrels]

In [3]:
from rag.utils import embed_dataset
# Check Different models
import os
import torch
from time import time

embedder_models = [
        "all-MiniLM-L6-v2",
        "all-MiniLM-L12-v2",
        "all-mpnet-base-v2",
        "nomic-ai/nomic-embed-text-v1.5",
        "BAAI/bge-small-en-v1.5",
        "BAAI/bge-base-en-v1.5",
        "BAAI/bge-large-en-v1.5",
        "Alibaba-NLP/gte-multilingual-base",
        "Snowflake/snowflake-arctic-embed-l-v2.0",
        "jinaai/jina-embeddings-v3",
        "intfloat/e5-base-v2",
        "BAAI/bge-m3",
        "Lajavaness/bilingual-embedding-base",
        "Qwen/Qwen3-Embedding-0.6B",
]

for i, model_name in enumerate(embedder_models):
        print("=" * 20, f"[{i + 1}/{len(embedder_models)}]", "=" * 20)

        try:
            embedder = LocalEmbedder(model_name, device="cuda")
            start_time = time()
            doc_ds = embed_dataset(doc_ds, embedder, column="passage")
            query_ds = embed_dataset(query_ds, embedder, column="question")
            elapsed_time = time() - start_time
        except Exception as e:
            print(f"Failed to embed {model_name}: {e}")
            del embedder
            gc.collect()
            torch.cuda.empty_cache()b
            continue

        for faiss_metric in ["IP", "L2"]:
            doc_ds.add_faiss_index(
                column='embedding',
                string_factory='Flat',
                metric_type=faiss.METRIC_L2 if faiss_metric == 'L2' else faiss.METRIC_INNER_PRODUCT,
                batch_size=128,
            )

            metrics = {}

            for k in [1, 3, 5, 10]:
                res = doc_ds.get_index('embedding').search_batch(np.array(query_ds['embedding']), k=k)
                retrieved_ids = index_to_doc_id[res.total_indices]

                metrics = {
                    **metrics,
                    **get_metrics(retrieved_ids, query_ds, k),
                }

            res_dict = {
                'model': model_name,
                'faiss_metric': faiss_metric,
                'chunked': False,
                'chunk_size': None,
                'chunk_overlap': None,
                'rerank_model': None,
                **{k: round(v,3) for k,v in metrics.items()},
                "elapsed_time": round(elapsed_time, 1),
            }

            res_df = pd.DataFrame([res_dict])
            csv_path = "results.csv"
            append = os.path.exists(csv_path) and os.path.getsize(csv_path) > 0
            res_df.to_csv(csv_path, mode='a', header=not append, index=False)

        print(model_name)
        print(f"P@10    {metrics['P@10']:.3f}")
        print(f"R@10    {metrics['R@10']:.3f}")
        print(f"MRR@10  {metrics['MRR@10']:.3f}")
        print(f"nDCG@10 {metrics['nDCG@10']:.3f}")


gc.collect()
torch.cuda.empty_cache()



Map: 100%|██████████| 40221/40221 [00:22<00:00, 1796.59 examples/s]
Map: 100%|██████████| 4719/4719 [00:01<00:00, 3173.14 examples/s]
100%|██████████| 315/315 [00:00<00:00, 4474.16it/s]
100%|██████████| 315/315 [00:00<00:00, 4833.86it/s]


all-MiniLM-L6-v2
P@10    0.284
R@10    0.373
MRR@10  0.631
nDCG@10 0.461


Map: 100%|██████████| 40221/40221 [00:27<00:00, 1463.20 examples/s]
Map: 100%|██████████| 4719/4719 [00:02<00:00, 2091.09 examples/s]
100%|██████████| 315/315 [00:00<00:00, 4397.14it/s]
100%|██████████| 315/315 [00:00<00:00, 4910.89it/s]


all-MiniLM-L12-v2
P@10    0.270
R@10    0.351
MRR@10  0.605
nDCG@10 0.436


Map: 100%|██████████| 40221/40221 [01:31<00:00, 437.44 examples/s]
Map: 100%|██████████| 4719/4719 [00:03<00:00, 1452.23 examples/s]
100%|██████████| 315/315 [00:00<00:00, 2995.51it/s]
100%|██████████| 315/315 [00:00<00:00, 3078.08it/s]


all-mpnet-base-v2
P@10    0.271
R@10    0.352
MRR@10  0.599
nDCG@10 0.437
Failed to embed nomic-ai/nomic-embed-text-v1.5: nomic-ai/nomic-bert-2048 You can inspect the repository content at https://hf.co/nomic-ai/nomic-embed-text-v1.5.
Please pass the argument `trust_remote_code=True` to allow custom code to be run.


Map: 100%|██████████| 40221/40221 [00:43<00:00, 920.26 examples/s] 
Map: 100%|██████████| 4719/4719 [00:02<00:00, 2105.11 examples/s]
100%|██████████| 315/315 [00:00<00:00, 4960.52it/s]
100%|██████████| 315/315 [00:00<00:00, 4973.30it/s]


BAAI/bge-small-en-v1.5
P@10    0.340
R@10    0.450
MRR@10  0.738
nDCG@10 0.563


Map: 100%|██████████| 40221/40221 [01:34<00:00, 426.93 examples/s]
Map: 100%|██████████| 4719/4719 [00:03<00:00, 1562.18 examples/s]
100%|██████████| 315/315 [00:00<00:00, 3217.30it/s]
100%|██████████| 315/315 [00:00<00:00, 3325.91it/s]


BAAI/bge-base-en-v1.5
P@10    0.350
R@10    0.463
MRR@10  0.749
nDCG@10 0.577


Map: 100%|██████████| 40221/40221 [03:48<00:00, 176.33 examples/s]
Map: 100%|██████████| 4719/4719 [00:06<00:00, 748.99 examples/s]
100%|██████████| 315/315 [00:00<00:00, 2743.48it/s]
100%|██████████| 315/315 [00:00<00:00, 2700.99it/s]


BAAI/bge-large-en-v1.5
P@10    0.356
R@10    0.473
MRR@10  0.755
nDCG@10 0.589
Failed to embed Alibaba-NLP/gte-multilingual-base: Alibaba-NLP/new-impl You can inspect the repository content at https://hf.co/Alibaba-NLP/gte-multilingual-base.
Please pass the argument `trust_remote_code=True` to allow custom code to be run.


Map: 100%|██████████| 40221/40221 [04:57<00:00, 135.20 examples/s]
Map: 100%|██████████| 4719/4719 [00:06<00:00, 708.34 examples/s]
100%|██████████| 315/315 [00:00<00:00, 2351.13it/s]
100%|██████████| 315/315 [00:00<00:00, 2477.59it/s]


Snowflake/snowflake-arctic-embed-l-v2.0
P@10    0.320
R@10    0.430
MRR@10  0.713
nDCG@10 0.535
Failed to embed jinaai/jina-embeddings-v3: No module named 'custom_st'


Map: 100%|██████████| 40221/40221 [01:34<00:00, 425.15 examples/s]
Map: 100%|██████████| 4719/4719 [00:03<00:00, 1508.28 examples/s]
100%|██████████| 315/315 [00:00<00:00, 2938.92it/s]
100%|██████████| 315/315 [00:00<00:00, 3057.24it/s]


intfloat/e5-base-v2
P@10    0.341
R@10    0.453
MRR@10  0.739
nDCG@10 0.565


Map: 100%|██████████| 40221/40221 [04:54<00:00, 136.44 examples/s]
Map: 100%|██████████| 4719/4719 [00:06<00:00, 711.69 examples/s]
100%|██████████| 315/315 [00:00<00:00, 2322.52it/s]
100%|██████████| 315/315 [00:00<00:00, 2376.91it/s]


BAAI/bge-m3
P@10    0.335
R@10    0.448
MRR@10  0.741
nDCG@10 0.561
Failed to embed Lajavaness/bilingual-embedding-base: dangvantuan/bilingual_impl You can inspect the repository content at https://hf.co/Lajavaness/bilingual-embedding-base.
Please pass the argument `trust_remote_code=True` to allow custom code to be run.


Map:  74%|███████▍  | 29808/40221 [05:43<01:59, 86.79 examples/s] 

Failed to embed Qwen/Qwen3-Embedding-0.6B: CUDA out of memory. Tried to allocate 2.74 GiB. GPU 0 has a total capacity of 31.37 GiB of which 1.51 GiB is free. Including non-PyTorch memory, this process has 29.85 GiB memory in use. Of the allocated memory 21.15 GiB is allocated by PyTorch, and 8.11 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)





NameError: name 'embedder' is not defined