# Caderno 10 - Compila todas as métricas.

As tabelas são as combinações dos conjuntos de queries (3 conjuntos) e k = [5, 10, 20].

In [1]:
import pandas as pd

# Modelos
#MODELOS = ['rufimelo/Legal-BERTimbau-sts-large-ma-v3',
#           'sentence-transformers/paraphrase-multilingual-mpnet-base-v2',
#           'neuralmind/bert-large-portuguese-cased']

MODELOS = ['rufimelo/Legal-BERTimbau-sts-large-ma-v3',
           'sentence-transformers/paraphrase-multilingual-mpnet-base-v2']

# Seleciona o tipo de camada oculta
TIPOS_CAMADA_OCULTA = ['mean_hidden_state',
                       'cls_hidden_state']

PASTA_DADOS = './dados/'
PASTA_RESULTADO_CADERNO = f'{PASTA_DADOS}outputs/10_compila_todas_metricas/'
ARQUIVO_METRICAS_BM25 = f'{PASTA_DADOS}outputs/4_metricas_bm25_padrao/metricas_bm25.pickle'

## 1. Carrega dados de jurisprudência

In [2]:
import pandas as pd
from metricas import metricas

# A pasta dos JURIS aqui não é a pasta original, e sim o resultado do caderno 1 (os documentos já estão filtrados)
PASTA_JURIS_TCU = f'{PASTA_DADOS}outputs/1_tratamento_juris_tcu/'

# Carrega os arquivos 
def carrega_juris_tcu():
    doc1 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_1.csv', sep='|')
    doc2 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_2.csv', sep='|')
    doc3 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_3.csv', sep='|')
    doc4 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_4.csv', sep='|')
    doc = pd.concat([doc1, doc2, doc3, doc4], ignore_index=True)
    query = pd.read_csv(f'{PASTA_JURIS_TCU}query_tratado.csv', sep='|')
    qrel = pd.read_csv(f'{PASTA_JURIS_TCU}qrel_tratado.csv', sep='|')

    return doc, query, qrel

docs, queries, qrels = carrega_juris_tcu()

## 2. Extrai as métricas

Carrega a lista dos resultados das queries realizadas no banco vetorial.

In [3]:
import pickle

# Carrega métricas do bm25_padrao
mapa_metricas = {}

with open(ARQUIVO_METRICAS_BM25, 'rb') as arquivo:
    metricas_bm25_padrao = pickle.load(arquivo)
    
mapa_metricas['bm25_padrao'] = metricas_bm25_padrao

In [4]:
# Função para encontrar a correspondência entre keys
def find_key(doc_key, docs):
    matches = docs[docs['KEY'].str.contains(doc_key, na=False)]
    if not matches.empty:
        return matches.iloc[0]['KEY']
    return doc_key

# Função que constroi dataframe de resultados no formato esperado pela função de métricas
def processa_resultado(I, docs):
    col_resultado_query_key = []
    col_resultado_doc_key = []
    col_resultado_rank = []

    # Preenchendo as listas
    for query_idx in range(len(I)):
        for rank_idx in range(len(I[0])):
            col_resultado_query_key.append(query_idx + 1)
            col_resultado_doc_key.append(I[query_idx, rank_idx])
            col_resultado_rank.append(rank_idx + 1)

    df_resultados = pd.DataFrame({
        "QUERY_KEY": col_resultado_query_key,
        "DOC_KEY": col_resultado_doc_key,
        "RANK": col_resultado_rank,
    })

    df_resultados['DOC_KEY'] = df_resultados['DOC_KEY'].astype(str)
    df_resultados['DOC_KEY'] = df_resultados['DOC_KEY'].apply(lambda x: find_key(x, docs))
    
    return df_resultados

In [5]:
import pickle
from metricas import metricas
from tqdm import tqdm

for modelo in tqdm(MODELOS, desc='Processando MODELOS'):
    
    caminho_modelo = modelo.split("/")[-1]
    pasta_resultado_busca = f'{PASTA_DADOS}outputs/8_armazena_dados_em_banco_vetorial/{caminho_modelo}/'
    
    for tipo_camada_oculta in tqdm(TIPOS_CAMADA_OCULTA, desc=f'Processando {modelo}', leave=False):
        tipo = tipo_camada_oculta.split('_')[0]
        arquivo_resultado_busca = f'{pasta_resultado_busca}{caminho_modelo}_{tipo_camada_oculta}_resultado_query.pickle'
        
        with open(arquivo_resultado_busca, 'rb') as arquivo:
            I = pickle.load(arquivo)
            
        df_resultados = processa_resultado(I, docs)
        metrica_calculada = metricas(df_resultados, qrels, aproximacao_trec_eval=True)
        mapa_metricas[f'{caminho_modelo}_{tipo}'] = metrica_calculada

Processando MODELOS:   0%|          | 0/2 [00:00<?, ?it/s]
Processando rufimelo/Legal-BERTimbau-sts-large-ma-v3:   0%|          | 0/2 [00:00<?, ?it/s][A
Processando rufimelo/Legal-BERTimbau-sts-large-ma-v3:  50%|█████     | 1/2 [01:12<01:12, 72.83s/it][A
Processando rufimelo/Legal-BERTimbau-sts-large-ma-v3: 100%|██████████| 2/2 [02:26<00:00, 73.36s/it][A
Processando MODELOS:  50%|█████     | 1/2 [02:26<02:26, 146.56s/it]                                [A
Processando sentence-transformers/paraphrase-multilingual-mpnet-base-v2:   0%|          | 0/2 [00:00<?, ?it/s][A
Processando sentence-transformers/paraphrase-multilingual-mpnet-base-v2:  50%|█████     | 1/2 [01:08<01:08, 68.32s/it][A
Processando sentence-transformers/paraphrase-multilingual-mpnet-base-v2: 100%|██████████| 2/2 [02:21<00:00, 70.97s/it][A
Processando MODELOS: 100%|██████████| 2/2 [04:47<00:00, 143.87s/it]                                                   [A


## 4. Exibe as métricas

In [6]:
# Imprime as métricas para o conjunto de queries 1 (0:50), 2 (100:150), ou 3 (100:150) 
# e para um determinado k (foi gerado para k = 5, 10, 20 e 50.

def compara_metricas(con_query, k):
    # Acumula as métricas
    precisao = []
    recall = []
    mrr = []
    ndcg = []

    for nome in mapa_metricas.keys():
        estatisticas = mapa_metricas[nome][50*(con_query-1):50*(con_query)].describe()
        precisao.append(estatisticas.loc['mean', f'P@{k}'])
        recall.append(estatisticas.loc['mean', f'R@{k}'])
        mrr.append(estatisticas.loc['mean', f'MRR@{k}'])
        ndcg.append(estatisticas.loc['mean', f'nDCG@{k}'])

    df = pd.DataFrame({
        "Modelo": mapa_metricas.keys(),
        f"P@{k}": precisao,
        f"R@{k}": recall,
        f"MRR@{k}": mrr,
        f"nDCG@{k}": ndcg
    })
    return df

def compara_metricas_todas_queries(k):
    # Acumula as métricas
    precisao = []
    recall = []
    mrr = []
    ndcg = []

    for nome in mapa_metricas.keys():
        estatisticas = mapa_metricas[nome].describe()
        precisao.append(estatisticas.loc['mean', f'P@{k}'])
        recall.append(estatisticas.loc['mean', f'R@{k}'])
        mrr.append(estatisticas.loc['mean', f'MRR@{k}'])
        ndcg.append(estatisticas.loc['mean', f'nDCG@{k}'])

    df = pd.DataFrame({
        "Modelo": mapa_metricas.keys(),
        f"P@{k}": precisao,
        f"R@{k}": recall,
        f"MRR@{k}": mrr,
        f"nDCG@{k}": ndcg
    })
    return df

pd.set_option('display.precision', 4)

In [7]:
for con_query in [1, 2, 3]:
#    for k in [5, 10, 20]:
    for k in [10]:
        print(f'Resultados para conjunto de query {con_query} e k={k}')
        display(compara_metricas(con_query, k))

Resultados para conjunto de query 1 e k=10


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.244,0.2024,0.5407,0.28
1,Legal-BERTimbau-sts-large-ma-v3_mean,0.108,0.088,0.2877,0.1214
2,Legal-BERTimbau-sts-large-ma-v3_cls,0.024,0.0179,0.132,0.0385
3,paraphrase-multilingual-mpnet-base-v2_mean,0.146,0.1164,0.3512,0.1608
4,paraphrase-multilingual-mpnet-base-v2_cls,0.098,0.0784,0.3193,0.113


Resultados para conjunto de query 2 e k=10


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.376,0.3165,0.8876,0.5176
1,Legal-BERTimbau-sts-large-ma-v3_mean,0.166,0.1372,0.4637,0.2099
2,Legal-BERTimbau-sts-large-ma-v3_cls,0.094,0.0782,0.2995,0.1205
3,paraphrase-multilingual-mpnet-base-v2_mean,0.246,0.2028,0.6051,0.3146
4,paraphrase-multilingual-mpnet-base-v2_cls,0.126,0.1031,0.465,0.1819


Resultados para conjunto de query 3 e k=10


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.37,0.3294,0.9139,0.5193
1,Legal-BERTimbau-sts-large-ma-v3_mean,0.182,0.1611,0.4921,0.2338
2,Legal-BERTimbau-sts-large-ma-v3_cls,0.124,0.1096,0.3684,0.1565
3,paraphrase-multilingual-mpnet-base-v2_mean,0.344,0.3047,0.7919,0.4519
4,paraphrase-multilingual-mpnet-base-v2_cls,0.234,0.206,0.7361,0.3381


In [8]:
for k in [5, 10, 20]:
    display(compara_metricas_todas_queries(k))

Unnamed: 0,Modelo,P@5,R@5,MRR@5,nDCG@5
0,bm25_padrao,0.4347,0.1861,0.7734,0.4928
1,Legal-BERTimbau-sts-large-ma-v3_mean,0.196,0.0836,0.397,0.2088
2,Legal-BERTimbau-sts-large-ma-v3_cls,0.1027,0.0439,0.2561,0.1182
3,paraphrase-multilingual-mpnet-base-v2_mean,0.3387,0.143,0.5703,0.3541
4,paraphrase-multilingual-mpnet-base-v2_cls,0.2293,0.097,0.4984,0.2535


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.33,0.2827,0.7807,0.439
1,Legal-BERTimbau-sts-large-ma-v3_mean,0.152,0.1288,0.4145,0.1884
2,Legal-BERTimbau-sts-large-ma-v3_cls,0.0807,0.0686,0.2666,0.1052
3,paraphrase-multilingual-mpnet-base-v2_mean,0.2453,0.208,0.5827,0.3091
4,paraphrase-multilingual-mpnet-base-v2_cls,0.1527,0.1292,0.5068,0.211


Unnamed: 0,Modelo,P@20,R@20,MRR@20,nDCG@20
0,bm25_padrao,0.2413,0.4116,0.7822,0.4936
1,Legal-BERTimbau-sts-large-ma-v3_mean,0.1117,0.1884,0.4224,0.2154
2,Legal-BERTimbau-sts-large-ma-v3_cls,0.0577,0.0984,0.2726,0.1186
3,paraphrase-multilingual-mpnet-base-v2_mean,0.1643,0.2749,0.5851,0.3356
4,paraphrase-multilingual-mpnet-base-v2_cls,0.1007,0.1686,0.5102,0.2266


In [9]:
# Gravando mapa_metricas
caminho_arquivo = f'{PASTA_RESULTADO_CADERNO}mapa_metricas.pickle'
with open(caminho_arquivo, 'wb') as arquivo_pickle:
    pickle.dump(mapa_metricas, arquivo_pickle)