In [1]:
# !pip install datasets
# !pip install urllib3
# !pip uninstall urllib3
# !pip install --upgrade botocore
# !pip install --upgrade datasets
# !pip install -U datasets
# !pip install python-terrier

In [2]:
# !python -m venv rag_eval

## IMPORTS

In [3]:
from datasets import load_dataset
from tqdm.auto import tqdm
from utils.metrics_utils import *
import numpy as np

In [4]:
tqdm.pandas()

## dowload dataset

In [5]:
# Загрузка конфигурации question-answer-passages
qa_dataset = load_dataset("enelpol/rag-mini-bioasq", "question-answer-passages")

# Загрузка конфигурации text-corpus
text_dataset = load_dataset("enelpol/rag-mini-bioasq", "text-corpus")

In [6]:
df_rag = qa_dataset["train"].to_pandas()

In [7]:
indexed_passages = text_dataset["test"].to_pandas()

In [8]:
display(df_rag.head(2), indexed_passages.head(2))

Unnamed: 0,question,answer,id,relevant_passage_ids
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]"
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361..."


Unnamed: 0,passage,id
0,New data on viruses isolated from patients wit...,9797
1,We describe an improved method for detecting d...,11906


In [9]:
def generate_from_retriever(row, indexed_passages, prob=0.85, top_k=20):
    """
    Создает столбец 'from_retriever', имитируя поиск и ранжирование.
    
    Параметры:
    row - строка из DataFrame с колонками 'question', 'answer', 'id', 'relevant_passage_ids'.
    indexed_passages - DataFrame с проиндексированными пассажами.
    prob - вероятность добавления релевантного пассажа.
    top_k - количество возвращаемых пассажей (топ-K).
    
    Возвращает:
    Список из top_k документов (ID), который содержит релевантные документы с вероятностью 'prob' и остальные документы.
    """
    relevant_passage_ids = row['relevant_passage_ids']
    from_retriever = []

    # Добавляем релевантные документы с заданной вероятностью
    for passage_id in relevant_passage_ids:
        if np.random.rand() <= prob:
            from_retriever.append(passage_id)

    # Добираем список до top_k случайными документами
    if len(from_retriever) < top_k:
        # Находим доступные документы, которые не в from_retriever
        available_ids = set(indexed_passages['id']) - set(from_retriever)
        additional_passages = list(np.random.choice(list(available_ids), size=(top_k - len(from_retriever)), replace=False))
        from_retriever.extend(additional_passages)
    
    # Перемешиваем список для имитации реального поиска
    np.random.shuffle(from_retriever)

    return from_retriever

In [10]:
df_rag['from_retriever'] = df_rag.progress_apply(generate_from_retriever, args=(indexed_passages,), axis=1)

  0%|          | 0/4012 [00:00<?, ?it/s]

In [11]:
df_rag.head(2)

Unnamed: 0,question,answer,id,relevant_passage_ids,from_retriever
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]","[11973307, 21562564, 22959745, 24462468, 10037..."
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361...","[19822671, 16005652, 10881785, 23651670, 28430..."


In [12]:
def generate_from_ranker(row, prob=0.8):
    """
    Создает столбец 'from_ranker', имитируя ранжирование результатов поиска.
    
    Параметры:
    row - строка из DataFrame с колонками 'from_retriever' и 'relevant_passage_ids'.
    prob - базовая вероятность поднятия релевантного документа в топ списка.
    
    Возвращает:
    Список документов (ID) с ранжированием, где релевантные документы поднимаются выше с увеличенной вероятностью.
    """
    from_retriever = row['from_retriever']
    relevant_passage_ids = set(row['relevant_passage_ids'])
    
    # Копируем список для ранжирования
    from_ranker = from_retriever.copy()

    # Проходимся по списку снизу вверх
    for i in range(len(from_ranker) - 1, -1, -1):
        if from_ranker[i] in relevant_passage_ids:
            if np.random.rand() <= prob:
                # Определяем вес для каждой позиции: чем выше позиция, тем выше вероятность
                weights = np.array([1 / (j + 1) for j in range(i + 1)])  # Инвертированное линейное распределение
                weights = weights / weights.sum()  # Нормализация весов для получения вероятностей

                # Выбираем новую позицию на основе весов
                new_position = np.random.choice(range(i + 1), p=weights)
                
                # Перемещаем документ в новый индекс
                from_ranker.insert(new_position, from_ranker.pop(i))
    
    return from_ranker

In [13]:
df_rag['from_ranker'] = df_rag.progress_apply(generate_from_ranker, axis=1)

  0%|          | 0/4012 [00:00<?, ?it/s]

In [14]:
df_rag.head(2)

Unnamed: 0,question,answer,id,relevant_passage_ids,from_retriever,from_ranker
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]","[11973307, 21562564, 22959745, 24462468, 10037...","[23184418, 11973307, 21562564, 22959745, 24462..."
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361...","[19822671, 16005652, 10881785, 23651670, 28430...","[27298259, 21589869, 19822671, 26997282, 16005..."


=============================================================================================================

# МЕТРИКИ БЕЗ УЧЕТА РАНЖИРОВАНИЯ

## PRECISION@K

In [15]:
df_rag = calculate_precision(df_rag, "from_retriever", "relevant_passage_ids", output_col='Precision', k=None)
df_rag.head(2)

  0%|          | 0/4012 [00:00<?, ?it/s]

Unnamed: 0,question,answer,id,relevant_passage_ids,from_retriever,from_ranker,Precision
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]","[11973307, 21562564, 22959745, 24462468, 10037...","[23184418, 11973307, 21562564, 22959745, 24462...",0.15
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361...","[19822671, 16005652, 10881785, 23651670, 28430...","[27298259, 21589869, 19822671, 26997282, 16005...",0.4


In [16]:
df_rag = precision_sklearn(df_rag, "from_retriever", "relevant_passage_ids", output_col='Precision_sk', k=None)
df_rag.head(2)

  0%|          | 0/4012 [00:00<?, ?it/s]

Unnamed: 0,question,answer,id,relevant_passage_ids,from_retriever,from_ranker,Precision,Precision_sk
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]","[11973307, 21562564, 22959745, 24462468, 10037...","[23184418, 11973307, 21562564, 22959745, 24462...",0.15,0.15
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361...","[19822671, 16005652, 10881785, 23651670, 28430...","[27298259, 21589869, 19822671, 26997282, 16005...",0.4,0.4


In [17]:
df_rag['Precision'].mean().round(3)

0.356

In [18]:
df_rag['Precision_sk'].mean().round(3)

0.356

In [19]:
for k in [20, 10, 5, 3]:
    df_rag = precision_sklearn(df_rag, "from_retriever", "relevant_passage_ids", output_col=f'Precision_sk_{k}', k=k)
df_rag.head(2)

  0%|          | 0/4012 [00:00<?, ?it/s]

  0%|          | 0/4012 [00:00<?, ?it/s]

  0%|          | 0/4012 [00:00<?, ?it/s]

  0%|          | 0/4012 [00:00<?, ?it/s]

Unnamed: 0,question,answer,id,relevant_passage_ids,from_retriever,from_ranker,Precision,Precision_sk,Precision_sk_20,Precision_sk_10,Precision_sk_5,Precision_sk_3
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]","[11973307, 21562564, 22959745, 24462468, 10037...","[23184418, 11973307, 21562564, 22959745, 24462...",0.15,0.15,0.15,0.1,0.0,0.0
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361...","[19822671, 16005652, 10881785, 23651670, 28430...","[27298259, 21589869, 19822671, 26997282, 16005...",0.4,0.4,0.4,0.3,0.4,0.333333


In [20]:
for k in [20, 10, 5, 3]:
    print(f"precision_sk_{k} = {df_rag[f'Precision_sk_{k}'].mean().round(3)}")

precision_sk_20 = 0.356
precision_sk_10 = 0.356
precision_sk_5 = 0.354
precision_sk_3 = 0.353


In [21]:
for k in [20, 10, 5, 3]:
    df_rag = precision_sklearn(df_rag, "from_ranker", "relevant_passage_ids", output_col=f'Precision_rank{k}', k=k)
df_rag.head(2)

  0%|          | 0/4012 [00:00<?, ?it/s]

  0%|          | 0/4012 [00:00<?, ?it/s]

  0%|          | 0/4012 [00:00<?, ?it/s]

  0%|          | 0/4012 [00:00<?, ?it/s]

Unnamed: 0,question,answer,id,relevant_passage_ids,from_retriever,from_ranker,Precision,Precision_sk,Precision_sk_20,Precision_sk_10,Precision_sk_5,Precision_sk_3,Precision_rank20,Precision_rank10,Precision_rank5,Precision_rank3
0,What is the implication of histone lysine meth...,"Aberrant patterns of H3K4, H3K9, and H3K27 his...",1682,"[23179372, 19270706, 23184418]","[11973307, 21562564, 22959745, 24462468, 10037...","[23184418, 11973307, 21562564, 22959745, 24462...",0.15,0.15,0.15,0.1,0.0,0.0,0.15,0.2,0.2,0.333333
1,What is the role of STAG1/STAG2 proteins in di...,STAG1/STAG2 proteins are tumour suppressor pro...,3722,"[26997282, 21589869, 19822671, 29867216, 15361...","[19822671, 16005652, 10881785, 23651670, 28430...","[27298259, 21589869, 19822671, 26997282, 16005...",0.4,0.4,0.4,0.3,0.4,0.333333,0.4,0.5,0.8,1.0


In [22]:
for k in [20, 10, 5, 3]:
    print(f"precision_{k} = {df_rag[f'Precision_rank{k}'].mean().round(3)}")

precision_20 = 0.356
precision_10 = 0.457
precision_5 = 0.595
precision_3 = 0.695


## RECALL@K

In [23]:
for k in [20, 10, 5, 3]:
    df_rag = recall_sklearn(df_rag, "from_ranker", "relevant_passage_ids", output_col=f'recall_rank{k}', k=k)
    print(f"recall_rank{k} = {df_rag[f'recall_rank{k}'].mean().round(3)}")

  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank20 = 0.837


  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank10 = 0.634


  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank5 = 0.488


  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank3 = 0.393


In [24]:
for k in [20, 10, 5, 3]:
    df_rag =calculate_recall(df_rag, "from_ranker", "relevant_passage_ids", output_col=f'recall_rank{k}', k=k)
    print(f"recall_rank{k} = {df_rag[f'recall_rank{k}'].mean().round(3)}")

  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank20 = 0.837


  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank10 = 0.634


  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank5 = 0.488


  0%|          | 0/4012 [00:00<?, ?it/s]

recall_rank3 = 0.393


## F1@K

In [25]:
for k in [20, 10, 5, 3]:
    df_rag =fbeta_sklearn(df_rag, "from_retriever", "relevant_passage_ids", output_col=f'f1@{k}', k=k)
    print(f"f1@{k} = {df_rag[f'f1@{k}'].mean().round(3)}")

  0%|          | 0/4012 [00:00<?, ?it/s]

f1@20 = 0.89


  0%|          | 0/4012 [00:00<?, ?it/s]

f1@10 = 0.539


  0%|          | 0/4012 [00:00<?, ?it/s]

f1@5 = 0.295


  0%|          | 0/4012 [00:00<?, ?it/s]

f1@3 = 0.186


=======================================================================================================================

# МЕТРИКИ, УЧИТЫВАЮЩИЕ РАНЖИРОВАНИЕ

## MRR

In [26]:
df_rag, mrr = calculate_mrr(df_rag, "from_retriever", "relevant_passage_ids", output_col="RR_ret")

MRR = 0.505


In [27]:
df_rag, mrr = calculate_mrr(df_rag, "from_ranker", "relevant_passage_ids", output_col="RR_ran")

MRR = 0.881


## AP/MAP

In [28]:
for k in [20, 10, 5, 3]:
    df_rag, _map =calculate_map(df_rag, "from_retriever", "relevant_passage_ids", output_col=f'AP@{k}', k=k)

MAP@20 = 0.366
MAP@10 = 0.216
MAP@5 = 0.13
MAP@3 = 0.091


In [30]:
for k in [20, 10, 5, 3]:
    df_rag, _map =calculate_map(df_rag, "from_ranker", "relevant_passage_ids", output_col=f'_AP@{k}', k=k)

MAP@20 = 0.647
MAP@10 = 0.534
MAP@5 = 0.439
MAP@3 = 0.366
