# Метрики на Baseline моделях

### Проверка CUDA

In [1]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())

2.5.1+cu124
True


#### Скачивание датасета

In [2]:
from datasets import load_dataset

# Load dataset from Hugging Face Hub
dataset = load_dataset("fitlemon/rag-labor-codex-dataset")

# Access train/test splits
train_data = dataset["train"]
test_data = dataset["test"]

### Подготовка данных

In [3]:
corpus = test_data['chunk']
queries = test_data['question']

# corpus ids as indexes of list 
corpus = dict(zip(map(str, range(len(corpus))), corpus))  # Our corpus (cid => document)
queries = dict(zip(map(str, range(len(queries))), queries))  # Our queries (qid => question)

# relevant docs as indexes of list
relevant_docs = {}
for qid, corpus_id in zip(queries.keys(), corpus.keys()):
    relevant_docs[qid] = {corpus_id}

### Получение метрик

In [4]:
from sentence_transformers.evaluation import InformationRetrievalEvaluator

# Define our evaluator
ir_evaluator = InformationRetrievalEvaluator(
    queries=queries,
    corpus=corpus,
    relevant_docs=relevant_docs,
    name="Codex-RAG-test",
)

## Инициализация моделей

### Модель intfloat/multilingual-e5-large-instruct

In [5]:
from sentence_transformers import SentenceTransformer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_mle5 = SentenceTransformer('intfloat/multilingual-e5-large-instruct', device=device)

In [6]:
results_mle5 = ir_evaluator(model_mle5)

In [7]:
results_mle5

{'Codex-RAG-test_cosine_accuracy@1': 0.21124620060790272,
 'Codex-RAG-test_cosine_accuracy@3': 0.5106382978723404,
 'Codex-RAG-test_cosine_accuracy@5': 0.6223404255319149,
 'Codex-RAG-test_cosine_accuracy@10': 0.7484802431610942,
 'Codex-RAG-test_cosine_precision@1': 0.21124620060790272,
 'Codex-RAG-test_cosine_precision@3': 0.1702127659574468,
 'Codex-RAG-test_cosine_precision@5': 0.12446808510638299,
 'Codex-RAG-test_cosine_precision@10': 0.07484802431610943,
 'Codex-RAG-test_cosine_recall@1': 0.21124620060790272,
 'Codex-RAG-test_cosine_recall@3': 0.5106382978723404,
 'Codex-RAG-test_cosine_recall@5': 0.6223404255319149,
 'Codex-RAG-test_cosine_recall@10': 0.7484802431610942,
 'Codex-RAG-test_cosine_ndcg@10': 0.47220043881314827,
 'Codex-RAG-test_cosine_mrr@10': 0.38442279394027046,
 'Codex-RAG-test_cosine_map@100': 0.3934289076926531}

### Модель BAAI/bge-m3

In [8]:
model_bge = SentenceTransformer('BAAI/bge-m3', device=device)

In [9]:
results_bge = ir_evaluator(model_bge)

In [10]:
results_bge

{'Codex-RAG-test_cosine_accuracy@1': 0.2393617021276596,
 'Codex-RAG-test_cosine_accuracy@3': 0.5524316109422492,
 'Codex-RAG-test_cosine_accuracy@5': 0.6664133738601824,
 'Codex-RAG-test_cosine_accuracy@10': 0.791033434650456,
 'Codex-RAG-test_cosine_precision@1': 0.2393617021276596,
 'Codex-RAG-test_cosine_precision@3': 0.18414387031408305,
 'Codex-RAG-test_cosine_precision@5': 0.13328267477203645,
 'Codex-RAG-test_cosine_precision@10': 0.07910334346504558,
 'Codex-RAG-test_cosine_recall@1': 0.2393617021276596,
 'Codex-RAG-test_cosine_recall@3': 0.5524316109422492,
 'Codex-RAG-test_cosine_recall@5': 0.6664133738601824,
 'Codex-RAG-test_cosine_recall@10': 0.791033434650456,
 'Codex-RAG-test_cosine_ndcg@10': 0.5091873544391511,
 'Codex-RAG-test_cosine_mrr@10': 0.4194450475225551,
 'Codex-RAG-test_cosine_map@100': 0.4273286505466429}

### Модель jinaai/jina-embeddings-v3

In [14]:
model_jina = SentenceTransformer("jinaai/jina-embeddings-v3", trust_remote_code=True, device=device)

flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn i

In [15]:
results_jina = ir_evaluator(model_jina)

In [16]:
results_jina

{'Codex-RAG-test_cosine_accuracy@1': 0.20896656534954408,
 'Codex-RAG-test_cosine_accuracy@3': 0.49544072948328266,
 'Codex-RAG-test_cosine_accuracy@5': 0.6155015197568389,
 'Codex-RAG-test_cosine_accuracy@10': 0.7401215805471124,
 'Codex-RAG-test_cosine_precision@1': 0.20896656534954408,
 'Codex-RAG-test_cosine_precision@3': 0.16514690982776087,
 'Codex-RAG-test_cosine_precision@5': 0.12310030395136776,
 'Codex-RAG-test_cosine_precision@10': 0.07401215805471124,
 'Codex-RAG-test_cosine_recall@1': 0.20896656534954408,
 'Codex-RAG-test_cosine_recall@3': 0.49544072948328266,
 'Codex-RAG-test_cosine_recall@5': 0.6155015197568389,
 'Codex-RAG-test_cosine_recall@10': 0.7401215805471124,
 'Codex-RAG-test_cosine_ndcg@10': 0.46458197494773545,
 'Codex-RAG-test_cosine_mrr@10': 0.3772968229845124,
 'Codex-RAG-test_cosine_map@100': 0.38637642392182375}

### Итоговые метрики

In [18]:
# combine all results and save in dataframe as results of models
import pandas as pd

results = pd.DataFrame({
    "mle5": results_mle5,
    "bge": results_bge,
    "jina": results_jina
})

In [19]:
results

Unnamed: 0,mle5,bge,jina
Codex-RAG-test_cosine_accuracy@1,0.211246,0.239362,0.208967
Codex-RAG-test_cosine_accuracy@3,0.510638,0.552432,0.495441
Codex-RAG-test_cosine_accuracy@5,0.62234,0.666413,0.615502
Codex-RAG-test_cosine_accuracy@10,0.74848,0.791033,0.740122
Codex-RAG-test_cosine_precision@1,0.211246,0.239362,0.208967
Codex-RAG-test_cosine_precision@3,0.170213,0.184144,0.165147
Codex-RAG-test_cosine_precision@5,0.124468,0.133283,0.1231
Codex-RAG-test_cosine_precision@10,0.074848,0.079103,0.074012
Codex-RAG-test_cosine_recall@1,0.211246,0.239362,0.208967
Codex-RAG-test_cosine_recall@3,0.510638,0.552432,0.495441
