# Caderno 10 - Compila todas as métricas.

As tabelas são as combinações dos conjuntos de queries (3 conjuntos) e k = [5, 10, 20].

In [1]:
import pandas as pd

# Modelos
MODELOS = ['rufimelo/Legal-BERTimbau-sts-large-ma-v3',
           'sentence-transformers/paraphrase-multilingual-mpnet-base-v2',
           'neuralmind/bert-large-portuguese-cased',
           'stjiris/bert-large-portuguese-cased-legal-mlm-sts-v1.0',
           'stjiris/bert-large-portuguese-cased-legal-mlm-nli-sts-v1',
           'Luciano/bert-base-portuguese-cased-finetuned-tcu-acordaos']

# Seleciona o tipo de camada oculta
TIPOS_CAMADA_OCULTA = ['mean_hidden_state',
                       'cls_hidden_state']

PASTA_DADOS = './dados/'
PASTA_RESULTADO_CADERNO = f'{PASTA_DADOS}outputs/10_compila_todas_metricas/'
ARQUIVO_METRICAS_BM25 = f'{PASTA_DADOS}outputs/4_metricas_bm25_padrao/metricas_bm25.pickle'
ARQUIVO_MAPA_METRICAS = f'{PASTA_RESULTADO_CADERNO}mapa_metricas.pickle'

## 1. Carrega dados de jurisprudência

In [2]:
import pandas as pd
from metricas import metricas

# A pasta dos JURIS aqui não é a pasta original, e sim o resultado do caderno 1 (os documentos já estão filtrados)
PASTA_JURIS_TCU = f'{PASTA_DADOS}outputs/1_tratamento_juris_tcu/'

# Carrega os arquivos 
def carrega_juris_tcu():
    doc1 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_1.csv', sep='|')
    doc2 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_2.csv', sep='|')
    doc3 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_3.csv', sep='|')
    doc4 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_4.csv', sep='|')
    doc = pd.concat([doc1, doc2, doc3, doc4], ignore_index=True)
    query = pd.read_csv(f'{PASTA_JURIS_TCU}query_tratado.csv', sep='|')
    qrel = pd.read_csv(f'{PASTA_JURIS_TCU}qrel_tratado.csv', sep='|')

    return doc, query, qrel

docs, queries, qrels = carrega_juris_tcu()

## 2. Extrai as métricas

Carrega a lista dos resultados das queries realizadas no banco vetorial.

In [3]:
import pickle

# Carrega mapa_metricas
with open(ARQUIVO_MAPA_METRICAS, 'rb') as arquivo:
    mapa_metricas = pickle.load(arquivo)

# Carrega métricas do bm25_padrao
with open(ARQUIVO_METRICAS_BM25, 'rb') as arquivo:
    metricas_bm25_padrao = pickle.load(arquivo)
    
mapa_metricas['bm25_padrao'] = metricas_bm25_padrao

In [4]:
# Função para encontrar a correspondência entre keys
def find_key(doc_key, docs):
    matches = docs[docs['KEY'].str.contains(doc_key, na=False)]
    if not matches.empty:
        return matches.iloc[0]['KEY']
    return doc_key

# Função que constroi dataframe de resultados no formato esperado pela função de métricas
def processa_resultado(I, docs):
    col_resultado_query_key = []
    col_resultado_doc_key = []
    col_resultado_rank = []

    # Preenchendo as listas
    for query_idx in range(len(I)):
        for rank_idx in range(len(I[0])):
            col_resultado_query_key.append(query_idx + 1)
            col_resultado_doc_key.append(I[query_idx, rank_idx])
            col_resultado_rank.append(rank_idx + 1)

    df_resultados = pd.DataFrame({
        "QUERY_KEY": col_resultado_query_key,
        "DOC_KEY": col_resultado_doc_key,
        "RANK": col_resultado_rank,
    })

    df_resultados['DOC_KEY'] = df_resultados['DOC_KEY'].astype(str)
    df_resultados['DOC_KEY'] = df_resultados['DOC_KEY'].apply(lambda x: find_key(x, docs))
    
    return df_resultados

# Função que verifica se uma chave já existe em um mapa
def verifica_chave(mapa_metricas, chave):
    return chave in mapa_metricas

In [5]:
import pickle
from metricas import metricas
from tqdm import tqdm

for modelo in tqdm(MODELOS, desc='Processando MODELOS'):
    
    caminho_modelo = modelo.split("/")[-1]
    pasta_resultado_busca = f'{PASTA_DADOS}outputs/8_armazena_dados_em_banco_vetorial/{caminho_modelo}/'
    
    for tipo_camada_oculta in tqdm(TIPOS_CAMADA_OCULTA, desc=f'Processando {modelo}', leave=False):
        
        tipo = tipo_camada_oculta.split('_')[0]
        
        # Se as métricas do modelo já está em mapa_metricas continue
        if verifica_chave(mapa_metricas,f'{caminho_modelo}_{tipo}'):
            continue        
        
        arquivo_resultado_busca = f'{pasta_resultado_busca}{caminho_modelo}_{tipo_camada_oculta}_resultado_query.pickle'
        
        with open(arquivo_resultado_busca, 'rb') as arquivo:
            I = pickle.load(arquivo)
            
        df_resultados = processa_resultado(I, docs)
        metrica_calculada = metricas(df_resultados, qrels, aproximacao_trec_eval=True)
        mapa_metricas[f'{caminho_modelo}_{tipo}'] = metrica_calculada

Processando MODELOS:   0%|          | 0/6 [00:00<?, ?it/s]
Processando rufimelo/Legal-BERTimbau-sts-large-ma-v3:   0%|          | 0/2 [00:00<?, ?it/s][A
                                                                                           [A
Processando sentence-transformers/paraphrase-multilingual-mpnet-base-v2:   0%|          | 0/2 [00:00<?, ?it/s][A
                                                                                                              [A
Processando neuralmind/bert-large-portuguese-cased:   0%|          | 0/2 [00:00<?, ?it/s][A
                                                                                         [A
Processando stjiris/bert-large-portuguese-cased-legal-mlm-sts-v1.0:   0%|          | 0/2 [00:00<?, ?it/s][A
                                                                                                         [A
Processando stjiris/bert-large-portuguese-cased-legal-mlm-nli-sts-v1:   0%|          | 0/2 [00:00<?, ?it/s][A
Processa

In [6]:
verifica_chave(mapa_metricas,f'{caminho_modelo}_{tipo}')

True

## 4. Exibe as métricas

In [7]:
# Imprime as métricas para o conjunto de queries 1 (0:50), 2 (100:150), ou 3 (100:150) 
# e para um determinado k (foi gerado para k = 5, 10, 20 e 50.

def compara_metricas(con_query, k):
    # Acumula as métricas
    precisao = []
    recall = []
    mrr = []
    ndcg = []

    for nome in mapa_metricas.keys():
        estatisticas = mapa_metricas[nome][50*(con_query-1):50*(con_query)].describe()
        precisao.append(estatisticas.loc['mean', f'P@{k}'])
        recall.append(estatisticas.loc['mean', f'R@{k}'])
        mrr.append(estatisticas.loc['mean', f'MRR@{k}'])
        ndcg.append(estatisticas.loc['mean', f'nDCG@{k}'])

    df = pd.DataFrame({
        "Modelo": mapa_metricas.keys(),
        f"P@{k}": precisao,
        f"R@{k}": recall,
        f"MRR@{k}": mrr,
        f"nDCG@{k}": ndcg
    })
    return df

def compara_metricas_todas_queries(k):
    # Acumula as métricas
    precisao = []
    recall = []
    mrr = []
    ndcg = []

    for nome in mapa_metricas.keys():
        estatisticas = mapa_metricas[nome].describe()
        precisao.append(estatisticas.loc['mean', f'P@{k}'])
        recall.append(estatisticas.loc['mean', f'R@{k}'])
        mrr.append(estatisticas.loc['mean', f'MRR@{k}'])
        ndcg.append(estatisticas.loc['mean', f'nDCG@{k}'])

    df = pd.DataFrame({
        "Modelo": mapa_metricas.keys(),
        f"P@{k}": precisao,
        f"R@{k}": recall,
        f"MRR@{k}": mrr,
        f"nDCG@{k}": ndcg
    })
    return df

pd.set_option('display.precision', 4)

In [8]:
for con_query in [1, 2, 3]:
#    for k in [5, 10, 20]:
    for k in [10]:
        print(f'Resultados para conjunto de query {con_query} e k={k}')
        display(compara_metricas(con_query, k))

Resultados para conjunto de query 1 e k=10


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.238,0.1966,0.5386,0.2753
1,Legal-BERTimbau-sts-large-ma-v3_mean,0.108,0.088,0.2877,0.1214
2,Legal-BERTimbau-sts-large-ma-v3_cls,0.024,0.0179,0.132,0.0385
3,paraphrase-multilingual-mpnet-base-v2_mean,0.146,0.1164,0.3512,0.1608
4,paraphrase-multilingual-mpnet-base-v2_cls,0.098,0.0784,0.3193,0.113
5,bert-large-portuguese-cased_mean,0.074,0.0625,0.1988,0.0833
6,bert-large-portuguese-cased_cls,0.04,0.0328,0.153,0.0473
7,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.202,0.1632,0.424,0.2261
8,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.124,0.1022,0.2569,0.1358
9,bert-large-portuguese-cased-legal-mlm-nli-sts-...,0.176,0.1439,0.3903,0.2028


Resultados para conjunto de query 2 e k=10


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.378,0.3176,0.8665,0.5106
1,Legal-BERTimbau-sts-large-ma-v3_mean,0.166,0.1372,0.4637,0.2099
2,Legal-BERTimbau-sts-large-ma-v3_cls,0.094,0.0782,0.2995,0.1205
3,paraphrase-multilingual-mpnet-base-v2_mean,0.246,0.2028,0.6051,0.3146
4,paraphrase-multilingual-mpnet-base-v2_cls,0.126,0.1031,0.465,0.1819
5,bert-large-portuguese-cased_mean,0.156,0.1317,0.4178,0.187
6,bert-large-portuguese-cased_cls,0.02,0.0176,0.0572,0.0234
7,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.302,0.2475,0.7134,0.3851
8,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.228,0.1889,0.5871,0.2856
9,bert-large-portuguese-cased-legal-mlm-nli-sts-...,0.262,0.2161,0.635,0.334


Resultados para conjunto de query 3 e k=10


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.388,0.3451,0.9175,0.5328
1,Legal-BERTimbau-sts-large-ma-v3_mean,0.182,0.1611,0.4921,0.2338
2,Legal-BERTimbau-sts-large-ma-v3_cls,0.124,0.1096,0.3684,0.1565
3,paraphrase-multilingual-mpnet-base-v2_mean,0.344,0.3047,0.7919,0.4519
4,paraphrase-multilingual-mpnet-base-v2_cls,0.234,0.206,0.7361,0.3381
5,bert-large-portuguese-cased_mean,0.222,0.1964,0.607,0.2893
6,bert-large-portuguese-cased_cls,0.038,0.0354,0.136,0.0479
7,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.348,0.3065,0.8675,0.4603
8,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.308,0.2734,0.7752,0.4008
9,bert-large-portuguese-cased-legal-mlm-nli-sts-...,0.314,0.2755,0.7973,0.421


In [9]:
for k in [5, 10, 20]:
    display(compara_metricas_todas_queries(k))

Unnamed: 0,Modelo,P@5,R@5,MRR@5,nDCG@5
0,bm25_padrao,0.4307,0.1841,0.7674,0.4856
1,Legal-BERTimbau-sts-large-ma-v3_mean,0.196,0.0836,0.397,0.2088
2,Legal-BERTimbau-sts-large-ma-v3_cls,0.1027,0.0439,0.2561,0.1182
3,paraphrase-multilingual-mpnet-base-v2_mean,0.3387,0.143,0.5703,0.3541
4,paraphrase-multilingual-mpnet-base-v2_cls,0.2293,0.097,0.4984,0.2535
5,bert-large-portuguese-cased_mean,0.192,0.0841,0.392,0.2074
6,bert-large-portuguese-cased_cls,0.044,0.0191,0.1044,0.0469
7,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.3773,0.1598,0.657,0.4033
8,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.2827,0.1197,0.5277,0.3035
9,bert-large-portuguese-cased-legal-mlm-nli-sts-...,0.3387,0.145,0.5972,0.3627


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.3347,0.2864,0.7742,0.4396
1,Legal-BERTimbau-sts-large-ma-v3_mean,0.152,0.1288,0.4145,0.1884
2,Legal-BERTimbau-sts-large-ma-v3_cls,0.0807,0.0686,0.2666,0.1052
3,paraphrase-multilingual-mpnet-base-v2_mean,0.2453,0.208,0.5827,0.3091
4,paraphrase-multilingual-mpnet-base-v2_cls,0.1527,0.1292,0.5068,0.211
5,bert-large-portuguese-cased_mean,0.1507,0.1302,0.4079,0.1865
6,bert-large-portuguese-cased_cls,0.0327,0.0286,0.1154,0.0395
7,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.284,0.239,0.6683,0.3572
8,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.22,0.1882,0.5398,0.2741
9,bert-large-portuguese-cased-legal-mlm-nli-sts-...,0.2507,0.2118,0.6076,0.3193


Unnamed: 0,Modelo,P@20,R@20,MRR@20,nDCG@20
0,bm25_padrao,0.2497,0.4258,0.7762,0.5004
1,Legal-BERTimbau-sts-large-ma-v3_mean,0.1117,0.1884,0.4224,0.2154
2,Legal-BERTimbau-sts-large-ma-v3_cls,0.0577,0.0984,0.2726,0.1186
3,paraphrase-multilingual-mpnet-base-v2_mean,0.1643,0.2749,0.5851,0.3356
4,paraphrase-multilingual-mpnet-base-v2_cls,0.1007,0.1686,0.5102,0.2266
5,bert-large-portuguese-cased_mean,0.1063,0.1825,0.4146,0.2099
6,bert-large-portuguese-cased_cls,0.025,0.0433,0.1232,0.0471
7,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.2033,0.3415,0.6717,0.4011
8,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.16,0.2716,0.5457,0.3124
9,bert-large-portuguese-cased-legal-mlm-nli-sts-...,0.1727,0.2902,0.612,0.3494


In [10]:
# Gravando mapa_metricas
caminho_arquivo = f'{PASTA_RESULTADO_CADERNO}mapa_metricas.pickle'
with open(caminho_arquivo, 'wb') as arquivo_pickle:
    pickle.dump(mapa_metricas, arquivo_pickle)