In [72]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix

$\Rightarrow$ Desconsideramos os cálculos de f1_score e recall, porque, da maneira como estamos considerando os casos de "predicted" (a saída dos 5 mais relevantes de cada modelo), nunca haverá predições iguais a 0. Como recall considera a quantidade false negatives (o que pressupõe a predição de negatives), e f1_score é uma média harmônica entre precisão e recall, não faz sentido considerar essas métricas por ora.

In [73]:
def compute_binary_metrics(y_true, y_pred):
    """
    Compute binary classification metrics
    :param y_true: true labels
    :param y_pred: predicted labels
    """
    print("Precision: ", precision_score(y_true, y_pred))

In [74]:
def discounted_cumulative_gain(df):
    """
    Compute Discounted Cumulative Gain
    :param y_true: true labels
    :param y_pred: predicted labels
    :param k: number of top predictions to consider
    """
    dcgs = []
    for query in df['Query'].unique():
        df_query = df.query('Query == @query')
        dcg = 0
        for i in df_query["Resultado"].values:
            score = df_query.query('Resultado == @i')['Score'].values[0]
            dcg += (2 ** score - 1) / np.log2(i + 1)
        dcgs.append(dcg)
    
    dcg_total = sum(dcgs)
    mean_dcg = dcg_total / len(df['Query'].unique())
    
    max_dcg = 0
    for i in range(5):
        max_dcg += (2 ** 5 - 1) / np.log2(i + 1 + 1)
    
    normalized_mean_dcg = mean_dcg / max_dcg
    return mean_dcg, normalized_mean_dcg

## **Performance of BM25**

In [75]:
bm25 = pd.read_csv("results/results_bm25.csv", sep=";")
bm25["Predicted"] = [1] * len(bm25)

In [76]:
bm25.head()

Unnamed: 0,Query,Resultado,Score,Binario,Predicted
0,1,1,4,1,1
1,1,2,5,1,1
2,1,3,4,1,1
3,1,4,4,1,1
4,1,5,1,0,1


In [77]:
compute_binary_metrics(bm25["Binario"], bm25["Predicted"])

Precision:  0.6727272727272727


In [78]:
discounted_cumulative_gain(bm25)

(54.342082838367624, 0.5945378054986006)

## **Performance Semantic**

In [79]:
semantic = pd.read_csv("results/results_semantic.csv", sep=";")
semantic["Predicted"] = [1] * len(semantic)

In [80]:
compute_binary_metrics(semantic["Binario"], semantic["Predicted"])

Precision:  0.6


In [81]:
discounted_cumulative_gain(semantic)

(46.800780362808695, 0.5120310411230845)

## **Fusion**

In [82]:
fusion = pd.read_csv("results/fusion.csv", sep=";")
fusion["Predicted"] = [1] * len(fusion)

In [83]:
compute_binary_metrics(fusion["Binario"], fusion["Predicted"])

Precision:  0.8363636363636363


In [84]:
discounted_cumulative_gain(fusion)

(60.613734953439355, 0.6631537673938405)