In [1]:
# !pip install datasets
# !pip install urllib3
# !pip uninstall urllib3
# !pip install --upgrade botocore
# !pip install --upgrade datasets
# !pip install -U datasets
# !pip install python-terrier

In [2]:
!python --version

Python 3.11.5


In [3]:
# !python -m venv rag_eval

# IMPORTS

In [4]:
from datasets import load_dataset
from tqdm.auto import tqdm
from utils.metrics_utils import *
import numpy as np

In [5]:
tqdm.pandas()

# Dowload dataset

In [6]:
# Загрузка конфигурации question-answer-passages
qa_dataset = load_dataset("enelpol/rag-mini-bioasq", "question-answer-passages")

# Загрузка конфигурации text-corpus
text_dataset = load_dataset("enelpol/rag-mini-bioasq", "text-corpus")

In [7]:
df_rag = qa_dataset["train"].to_pandas()

In [8]:
indexed_passages = text_dataset["test"].to_pandas()

In [9]:
display(df_rag.head(2), indexed_passages.head(2))

Unnamed: 0,question,answer,id,relevant_passage_ids
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]"
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361..."


Unnamed: 0,passage,id
0,New data on viruses isolated from patients wit...,9797
1,We describe an improved method for detecting d...,11906


In [10]:
def generate_from_retriever(row, indexed_passages, prob=0.85, top_k=20):
    """
    Создает столбец 'from_retriever', имитируя поиск и ранжирование.
    
    Параметры:
    row - строка из DataFrame с колонками 'question', 'answer', 'id', 'relevant_passage_ids'.
    indexed_passages - DataFrame с проиндексированными пассажами.
    prob - вероятность добавления релевантного пассажа.
    top_k - количество возвращаемых пассажей (топ-K).
    
    Возвращает:
    Список из top_k документов (ID), который содержит релевантные документы с вероятностью 'prob' и остальные документы.
    """
    relevant_passage_ids = row['relevant_passage_ids']
    from_retriever = []

    for passage_id in relevant_passage_ids:
        if np.random.rand() <= prob:
            from_retriever.append(passage_id)

    if len(from_retriever) < top_k:
        available_ids = set(indexed_passages['id']) - set(from_retriever)
        additional_passages = list(np.random.choice(list(available_ids), size=(top_k - len(from_retriever)), replace=False))
        from_retriever.extend(additional_passages)
    
    np.random.shuffle(from_retriever)

    return from_retriever

In [11]:
df_rag['from_retriever'] = df_rag.progress_apply(generate_from_retriever, args=(indexed_passages,), axis=1)

  0%|          | 0/4012 [00:00<?, ?it/s]

In [12]:
df_rag.head(2)

Unnamed: 0,question,answer,id,relevant_passage_ids,from_retriever
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]","[24495750, 23179372, 25736164, 10848505, 16428..."
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361...","[24055171, 1574115, 11424143, 27865001, 318147..."


In [13]:
def generate_from_ranker(row, prob=0.8):
    """
    Создает столбец 'from_ranker', имитируя ранжирование результатов поиска.
    
    Параметры:
    row - строка из DataFrame с колонками 'from_retriever' и 'relevant_passage_ids'.
    prob - базовая вероятность поднятия релевантного документа в топ списка.
    
    Возвращает:
    Список документов (ID) с ранжированием, где релевантные документы поднимаются выше с увеличенной вероятностью.
    """
    from_retriever = row['from_retriever']
    relevant_passage_ids = set(row['relevant_passage_ids'])
    from_ranker = from_retriever.copy()

    for i in range(len(from_ranker) - 1, -1, -1):
        if from_ranker[i] in relevant_passage_ids:
            if np.random.rand() <= prob:
                weights = np.array([1 / (j + 1) for j in range(i + 1)])  # Инвертированное линейное распределение
                weights = weights / weights.sum()  # Нормализация весов для получения вероятностей
                new_position = np.random.choice(range(i + 1), p=weights)
                from_ranker.insert(new_position, from_ranker.pop(i))
    
    return from_ranker

In [14]:
df_rag['from_ranker'] = df_rag.progress_apply(generate_from_ranker, axis=1)

  0%|          | 0/4012 [00:00<?, ?it/s]

In [15]:
df_rag.head(2)

Unnamed: 0,question,answer,id,relevant_passage_ids,from_retriever,from_ranker
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]","[24495750, 23179372, 25736164, 10848505, 16428...","[23179372, 24495750, 25736164, 10848505, 16428..."
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361...","[24055171, 1574115, 11424143, 27865001, 318147...","[18276799, 26997282, 27298259, 15361841, 24055..."


=============================================================================================================

# МЕТРИКИ БЕЗ УЧЕТА РАНЖИРОВАНИЯ

## PRECISION@K

In [16]:
df_rag = calculate_precision(df_rag, "from_retriever", "relevant_passage_ids", output_col='Precision', k=None)
df_rag.head(2)

  0%|          | 0/4012 [00:00<?, ?it/s]

Unnamed: 0,question,answer,id,relevant_passage_ids,from_retriever,from_ranker,Precision
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]","[24495750, 23179372, 25736164, 10848505, 16428...","[23179372, 24495750, 25736164, 10848505, 16428...",0.15
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361...","[24055171, 1574115, 11424143, 27865001, 318147...","[18276799, 26997282, 27298259, 15361841, 24055...",0.4


In [17]:
df_rag = precision_sklearn(df_rag, "from_retriever", "relevant_passage_ids", output_col='Precision_sk', k=None)
df_rag.head(2)

  0%|          | 0/4012 [00:00<?, ?it/s]

Unnamed: 0,question,answer,id,relevant_passage_ids,from_retriever,from_ranker,Precision,Precision_sk
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]","[24495750, 23179372, 25736164, 10848505, 16428...","[23179372, 24495750, 25736164, 10848505, 16428...",0.15,0.15
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361...","[24055171, 1574115, 11424143, 27865001, 318147...","[18276799, 26997282, 27298259, 15361841, 24055...",0.4,0.4


In [18]:
for k in [20, 10, 5, 3]:
    df_rag = precision_sklearn(df_rag, "from_retriever", "relevant_passage_ids", output_col=f'Precision_sk_{k}', k=k)
    print(f"mean_precision_sklearn_{k} = {round(df_rag[f'Precision_sk_{k}'].mean(), 3)}")
df_rag.head(2)

  0%|          | 0/4012 [00:00<?, ?it/s]

mean_precision_sklearn_20 = 0.355


  0%|          | 0/4012 [00:00<?, ?it/s]

mean_precision_sklearn_10 = 0.355


  0%|          | 0/4012 [00:00<?, ?it/s]

mean_precision_sklearn_5 = 0.359


  0%|          | 0/4012 [00:00<?, ?it/s]

mean_precision_sklearn_3 = 0.357


Unnamed: 0,question,answer,id,relevant_passage_ids,from_retriever,from_ranker,Precision,Precision_sk,Precision_sk_20,Precision_sk_10,Precision_sk_5,Precision_sk_3
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]","[24495750, 23179372, 25736164, 10848505, 16428...","[23179372, 24495750, 25736164, 10848505, 16428...",0.15,0.15,0.15,0.3,0.2,0.333333
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361...","[24055171, 1574115, 11424143, 27865001, 318147...","[18276799, 26997282, 27298259, 15361841, 24055...",0.4,0.4,0.4,0.2,0.0,0.0


In [19]:
for k in [20, 10, 5, 3]:
    df_rag = calculate_precision(df_rag, "from_retriever", "relevant_passage_ids", output_col=f'Precision_{k}', k=k)
    print(f"mean_precision_{k} = {round(df_rag[f'Precision_{k}'].mean(), 3)}")
df_rag.head(2)

  0%|          | 0/4012 [00:00<?, ?it/s]

mean_precision_20 = 0.355


  0%|          | 0/4012 [00:00<?, ?it/s]

mean_precision_10 = 0.355


  0%|          | 0/4012 [00:00<?, ?it/s]

mean_precision_5 = 0.359


  0%|          | 0/4012 [00:00<?, ?it/s]

mean_precision_3 = 0.357


Unnamed: 0,question,answer,id,relevant_passage_ids,from_retriever,from_ranker,Precision,Precision_sk,Precision_sk_20,Precision_sk_10,Precision_sk_5,Precision_sk_3,Precision_20,Precision_10,Precision_5,Precision_3
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]","[24495750, 23179372, 25736164, 10848505, 16428...","[23179372, 24495750, 25736164, 10848505, 16428...",0.15,0.15,0.15,0.3,0.2,0.333333,0.15,0.3,0.2,0.333333
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361...","[24055171, 1574115, 11424143, 27865001, 318147...","[18276799, 26997282, 27298259, 15361841, 24055...",0.4,0.4,0.4,0.2,0.0,0.0,0.4,0.2,0.0,0.0


In [20]:
for k in [20, 10, 5, 3]:
    df_rag = precision_sklearn(df_rag, "from_ranker", "relevant_passage_ids", output_col=f'Precision_rank{k}', k=k)
    print(f"precision_sk_rank{k} = {round(df_rag[f'Precision_rank{k}'].mean(), 3)}")
df_rag.head(2)

  0%|          | 0/4012 [00:00<?, ?it/s]

precision_sk_rank20 = 0.355


  0%|          | 0/4012 [00:00<?, ?it/s]

precision_sk_rank10 = 0.458


  0%|          | 0/4012 [00:00<?, ?it/s]

precision_sk_rank5 = 0.595


  0%|          | 0/4012 [00:00<?, ?it/s]

precision_sk_rank3 = 0.694


Unnamed: 0,question,answer,id,relevant_passage_ids,from_retriever,from_ranker,Precision,Precision_sk,Precision_sk_20,Precision_sk_10,Precision_sk_5,Precision_sk_3,Precision_20,Precision_10,Precision_5,Precision_3,Precision_rank20,Precision_rank10,Precision_rank5,Precision_rank3
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]","[24495750, 23179372, 25736164, 10848505, 16428...","[23179372, 24495750, 25736164, 10848505, 16428...",0.15,0.15,0.15,0.3,0.2,0.333333,0.15,0.3,0.2,0.333333,0.15,0.3,0.2,0.333333
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361...","[24055171, 1574115, 11424143, 27865001, 318147...","[18276799, 26997282, 27298259, 15361841, 24055...",0.4,0.4,0.4,0.2,0.0,0.0,0.4,0.2,0.0,0.0,0.4,0.5,0.8,1.0


In [21]:
for k in [20, 10, 5, 3]:
    df_rag = calculate_precision(df_rag, "from_ranker", "relevant_passage_ids", output_col=f'Precision_rank_{k}', k=k)
    print(f"precision_rank_{k} = {round(df_rag[f'Precision_rank_{k}'].mean(), 3)}")
df_rag.head(2)

  0%|          | 0/4012 [00:00<?, ?it/s]

precision_rank_20 = 0.355


  0%|          | 0/4012 [00:00<?, ?it/s]

precision_rank_10 = 0.458


  0%|          | 0/4012 [00:00<?, ?it/s]

precision_rank_5 = 0.595


  0%|          | 0/4012 [00:00<?, ?it/s]

precision_rank_3 = 0.694


Unnamed: 0,question,answer,id,relevant_passage_ids,from_retriever,from_ranker,Precision,Precision_sk,Precision_sk_20,Precision_sk_10,...,Precision_5,Precision_3,Precision_rank20,Precision_rank10,Precision_rank5,Precision_rank3,Precision_rank_20,Precision_rank_10,Precision_rank_5,Precision_rank_3
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]","[24495750, 23179372, 25736164, 10848505, 16428...","[23179372, 24495750, 25736164, 10848505, 16428...",0.15,0.15,0.15,0.3,...,0.2,0.333333,0.15,0.3,0.2,0.333333,0.15,0.3,0.2,0.333333
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361...","[24055171, 1574115, 11424143, 27865001, 318147...","[18276799, 26997282, 27298259, 15361841, 24055...",0.4,0.4,0.4,0.2,...,0.0,0.0,0.4,0.5,0.8,1.0,0.4,0.5,0.8,1.0


## RECALL@K

In [22]:
for k in [20, 10, 5, 3]:
    df_rag = recall_sklearn(df_rag, "from_ranker", "relevant_passage_ids", output_col=f'recall_rank{k}', k=k)
    print(f"recall_rank{k} = {round(df_rag[f'recall_rank{k}'].mean(), 3)}")

  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank20 = 0.834


  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank10 = 0.629


  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank5 = 0.486


  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank3 = 0.39


In [23]:
for k in [20, 10, 5, 3]:
    df_rag = calculate_recall(df_rag, "from_ranker", "relevant_passage_ids", output_col=f'recall_rank_{k}', k=k)
    print(f"recall_rank{k} = {round(df_rag[f'recall_rank_{k}'].mean(), 3)}")

  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank20 = 0.834


  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank10 = 0.629


  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank5 = 0.486


  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank3 = 0.39


## F1@K

In [24]:
for k in [20, 10, 5, 3]:
    df_rag = fbeta_sklearn(df_rag, "from_retriever", "relevant_passage_ids", output_col=f'f1@{k}', k=k)
    print(f"f1@{k} = {round(df_rag[f'f1@{k}'].mean(), 3)}")

  0%|          | 0/4012 [00:00<?, ?it/s]

f1@20 = 0.888


  0%|          | 0/4012 [00:00<?, ?it/s]

f1@10 = 0.531


  0%|          | 0/4012 [00:00<?, ?it/s]

f1@5 = 0.296


  0%|          | 0/4012 [00:00<?, ?it/s]

f1@3 = 0.187


=======================================================================================================================

# МЕТРИКИ, УЧИТЫВАЮЩИЕ РАНЖИРОВАНИЕ

## MRR

In [25]:
df_rag, mrr = calculate_mrr(df_rag, "from_retriever", "relevant_passage_ids", output_col="RR_ret")

  0%|          | 0/4012 [00:00<?, ?it/s]

MRR = 0.51


In [26]:
df_rag, mrr = calculate_mrr(df_rag, "from_ranker", "relevant_passage_ids", output_col="RR_ran")

  0%|          | 0/4012 [00:00<?, ?it/s]

MRR = 0.88


## AP/MAP

In [27]:
for k in [20, 10, 5, 3]:
    df_rag, _map_1 = calculate_map(df_rag, "from_retriever", "relevant_passage_ids", output_col=f'AP@{k}', k=k)

  0%|          | 0/4012 [00:00<?, ?it/s]

MAP@20 = 0.366


  0%|          | 0/4012 [00:00<?, ?it/s]

MAP@10 = 0.215


  0%|          | 0/4012 [00:00<?, ?it/s]

MAP@5 = 0.131


  0%|          | 0/4012 [00:00<?, ?it/s]

MAP@3 = 0.091


In [28]:
for k in [20, 10, 5, 3]:
    df_rag, _map_2 = calculate_map(df_rag, "from_ranker", "relevant_passage_ids", output_col=f'_AP@{k}', k=k)

  0%|          | 0/4012 [00:00<?, ?it/s]

MAP@20 = 0.645


  0%|          | 0/4012 [00:00<?, ?it/s]

MAP@10 = 0.533


  0%|          | 0/4012 [00:00<?, ?it/s]

MAP@5 = 0.437


  0%|          | 0/4012 [00:00<?, ?it/s]

MAP@3 = 0.364


## NDCG

In [29]:
# base

In [30]:

# for k in [20, 10, 5, 3]:
#     df_rag, ndcg_1 = calculate_ndcg_at_k(df_rag, "from_retriever", "relevant_passage_ids", output_col=f'NDCG@{k}', k=k)

In [31]:
for k in [10, 5, 3]:
    df_rag, _map = calculate_ndcg_at_k(df_rag, "from_ranker", "relevant_passage_ids", output_col=f'_NDCG@{k}', k=k)

  0%|          | 0/4012 [00:00<?, ?it/s]

MEAN NDCG@10 = 0.744


  0%|          | 0/4012 [00:00<?, ?it/s]

MEAN NDCG@5 = 0.767


  0%|          | 0/4012 [00:00<?, ?it/s]

MEAN NDCG@3 = 0.798


In [32]:
# sklearn

In [33]:
# for k in [10, 5, 3]:
#     df_rag, _map = calculate_ndcg_sklearn(df_rag, "from_ranker", "relevant_passage_ids", output_col=f'sk_NDCG@{k}', k=k)