# Caderno 10 - Compila todas as métricas.

As tabelas são as combinações dos conjuntos de queries (3 conjuntos) e k = [5, 10, 20].

In [2]:
import pandas as pd

# Modelos
MODELOS = ['Luciano/bert-base-portuguese-cased-finetuned-tcu-acordaos',
           'neuralmind/bert-base-portuguese-cased',
           'neuralmind/bert-large-portuguese-cased',
           'rufimelo/Legal-BERTimbau-sts-large-ma-v3',
           'stjiris/bert-large-portuguese-cased-legal-mlm-sts-v1.0',
           'stjiris/bert-large-portuguese-cased-legal-mlm-nli-sts-v1',    
           'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'
           #'text-embedding-ada-002',
           #'text-embedding-3-small',
           #'text-embedding-3-large'           
           ]

# Seleciona o tipo de camada oculta
#TIPOS_CAMADA_OCULTA = ['mean_hidden_state']
TIPOS_CAMADA_OCULTA = ['mean_hidden_state',
                       'cls_hidden_state']

PASTA_DADOS = './dados/'
PASTA_RESULTADO_CADERNO = f'{PASTA_DADOS}outputs/10_compila_todas_metricas/'
ARQUIVO_METRICAS_BM25 = f'{PASTA_DADOS}outputs/4_metricas_bm25_padrao/metricas_bm25.pickle'
ARQUIVO_METRICAS_ATUAIS = f'{PASTA_DADOS}outputs/3_calcula_metricas_atuais/'
ARQUIVO_METRICAS_ATUAIS_SELECT = f'{ARQUIVO_METRICAS_ATUAIS}metricas_atuais_select.pickle'
ARQUIVO_METRICAS_ATUAIS_SELECT_SWAN = f'{ARQUIVO_METRICAS_ATUAIS}metricas_atuais_select_swan.pickle'
ARQUIVO_METRICAS_ATUAIS_SELECT_SWAN_SYNONYM = f'{ARQUIVO_METRICAS_ATUAIS}metricas_atuais_select_swan_synonym.pickle'
ARQUIVO_MAPA_METRICAS = f'{PASTA_RESULTADO_CADERNO}mapa_metricas.pickle'

## 1. Carrega dados de jurisprudência

In [4]:
import pandas as pd
from metricas import metricas

# A pasta dos JURIS aqui não é a pasta original, e sim o resultado do caderno 1 (os documentos já estão filtrados)
PASTA_JURIS_TCU = f'{PASTA_DADOS}outputs/1_tratamento_juris_tcu/'

# Carrega os arquivos 
def carrega_juris_tcu():
    doc1 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_1.csv', sep='|')
    doc2 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_2.csv', sep='|')
    doc3 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_3.csv', sep='|')
    doc4 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_4.csv', sep='|')
    doc = pd.concat([doc1, doc2, doc3, doc4], ignore_index=True)
    query = pd.read_csv(f'{PASTA_JURIS_TCU}query_tratado.csv', sep='|')
    qrel = pd.read_csv(f'{PASTA_JURIS_TCU}qrel_tratado.csv', sep='|')

    return doc, query, qrel

docs, queries, qrels = carrega_juris_tcu()

## 2. Extrai as métricas

Carrega a lista dos resultados das queries realizadas no banco vetorial.

In [6]:
import pickle

# Carrega mapa_metricas
try:
    with open(ARQUIVO_MAPA_METRICAS, 'rb') as arquivo:
        mapa_metricas = pickle.load(arquivo)
except FileNotFoundError:
    mapa_metricas = {} 

# Carrega métricas do bm25_padrao
with open(ARQUIVO_METRICAS_BM25, 'rb') as arquivo:
    metricas_bm25_padrao = pickle.load(arquivo)
    
#mapa_metricas['bm25_padrao'] = metricas_bm25_padrao

# Carrega métricas do atuais
with open(ARQUIVO_METRICAS_ATUAIS_SELECT, 'rb') as arquivo:
    metricas_atuais_select = pickle.load(arquivo)
    
with open(ARQUIVO_METRICAS_ATUAIS_SELECT_SWAN, 'rb') as arquivo:
    metricas_atuais_select_swan = pickle.load(arquivo)
    
with open(ARQUIVO_METRICAS_ATUAIS_SELECT_SWAN_SYNONYM, 'rb') as arquivo:
    metricas_atuais_select_swan_synonym = pickle.load(arquivo)
    
#mapa_metricas['select'] = metricas_atuais_select
#mapa_metricas['select_swan'] = metricas_atuais_select_swan
#mapa_metricas['select_swan_synonym'] = metricas_atuais_select_swan_synonym


In [7]:
# Função para encontrar a correspondência entre keys
def find_key(doc_key, docs):
    matches = docs[docs['KEY'].str.contains(doc_key, na=False)]
    if not matches.empty:
        return matches.iloc[0]['KEY']
    return doc_key

# Função que constroi dataframe de resultados no formato esperado pela função de métricas
def processa_resultado(I, docs):
    col_resultado_query_key = []
    col_resultado_doc_key = []
    col_resultado_rank = []

    # Preenchendo as listas
    for query_idx in range(len(I)):
        for rank_idx in range(len(I[0])):
            col_resultado_query_key.append(query_idx + 1)
            col_resultado_doc_key.append(I[query_idx, rank_idx])
            col_resultado_rank.append(rank_idx + 1)

    df_resultados = pd.DataFrame({
        "QUERY_KEY": col_resultado_query_key,
        "DOC_KEY": col_resultado_doc_key,
        "RANK": col_resultado_rank,
    })

    df_resultados['DOC_KEY'] = df_resultados['DOC_KEY'].astype(str)
    df_resultados['DOC_KEY'] = df_resultados['DOC_KEY'].apply(lambda x: find_key(x, docs))
    
    return df_resultados

# Função que verifica se uma chave já existe em um mapa
def verifica_chave(mapa_metricas, chave):
    return chave in mapa_metricas

In [8]:
import pickle
from metricas import metricas
from tqdm import tqdm

for modelo in tqdm(MODELOS, desc='Processando MODELOS'):
    
    caminho_modelo = modelo.split("/")[-1]
    pasta_resultado_busca = f'{PASTA_DADOS}outputs/8_armazena_dados_em_banco_vetorial/{caminho_modelo}/'
    
    for tipo_camada_oculta in tqdm(TIPOS_CAMADA_OCULTA, desc=f'Processando {modelo}', leave=False):
        
        tipo = tipo_camada_oculta.split('_')[0]
        
        # Se as métricas do modelo já está em mapa_metricas continue
        if verifica_chave(mapa_metricas,f'{caminho_modelo}_{tipo}'):
            continue        
        
        arquivo_resultado_busca = f'{pasta_resultado_busca}{caminho_modelo}_{tipo_camada_oculta}_resultado_query.pickle'
        
        with open(arquivo_resultado_busca, 'rb') as arquivo:
            I = pickle.load(arquivo)
            
        df_resultados = processa_resultado(I, docs)
        metrica_calculada = metricas(df_resultados, qrels, aproximacao_trec_eval=True)
        mapa_metricas[f'{caminho_modelo}_{tipo}'] = metrica_calculada

Processando MODELOS:   0%|          | 0/7 [00:00<?, ?it/s]
Processando Luciano/bert-base-portuguese-cased-finetuned-tcu-acordaos:   0%|          | 0/2 [00:00<?, ?it/s][A
Processando Luciano/bert-base-portuguese-cased-finetuned-tcu-acordaos:  50%|█████     | 1/2 [01:01<01:01, 61.96s/it][A
Processando Luciano/bert-base-portuguese-cased-finetuned-tcu-acordaos: 100%|██████████| 2/2 [02:03<00:00, 61.74s/it][A
Processando MODELOS:  14%|█▍        | 1/7 [02:03<12:21, 123.57s/it]                                                 [A
Processando neuralmind/bert-base-portuguese-cased:   0%|          | 0/2 [00:00<?, ?it/s][A
Processando neuralmind/bert-base-portuguese-cased:  50%|█████     | 1/2 [01:05<01:05, 65.10s/it][A
Processando neuralmind/bert-base-portuguese-cased: 100%|██████████| 2/2 [02:08<00:00, 63.93s/it][A
Processando MODELOS:  29%|██▊       | 2/7 [04:11<10:31, 126.32s/it]                             [A
Processando neuralmind/bert-large-portuguese-cased:   0%|          | 0/2 [00:

In [9]:
verifica_chave(mapa_metricas,f'{caminho_modelo}_{tipo}')

True

## 4. Exibe as métricas

In [11]:
# Imprime as métricas para o conjunto de queries 1 (0:50), 2 (100:150), ou 3 (100:150) 
# e para um determinado k (foi gerado para k = 5, 10, 20 e 50.

def compara_metricas(con_query, k):
    # Acumula as métricas
    precisao = []
    recall = []
    mrr = []
    ndcg = []

    for nome in mapa_metricas.keys():
        estatisticas = mapa_metricas[nome][50*(con_query-1):50*(con_query)].describe()
        precisao.append(estatisticas.loc['mean', f'P@{k}'])
        recall.append(estatisticas.loc['mean', f'R@{k}'])
        mrr.append(estatisticas.loc['mean', f'MRR@{k}'])
        ndcg.append(estatisticas.loc['mean', f'nDCG@{k}'])

    df = pd.DataFrame({
        "Modelo": mapa_metricas.keys(),
        f"P@{k}": precisao,
        f"R@{k}": recall,
        f"MRR@{k}": mrr,
        f"nDCG@{k}": ndcg
    })
    return df

def compara_metricas_todas_queries(k):
    # Acumula as métricas
    precisao = []
    recall = []
    mrr = []
    ndcg = []

    for nome in mapa_metricas.keys():
        estatisticas = mapa_metricas[nome].describe()
        precisao.append(estatisticas.loc['mean', f'P@{k}'])
        recall.append(estatisticas.loc['mean', f'R@{k}'])
        mrr.append(estatisticas.loc['mean', f'MRR@{k}'])
        ndcg.append(estatisticas.loc['mean', f'nDCG@{k}'])

    df = pd.DataFrame({
        "Modelo": mapa_metricas.keys(),
        f"P@{k}": precisao,
        f"R@{k}": recall,
        f"MRR@{k}": mrr,
        f"nDCG@{k}": ndcg
    })
    return df

pd.set_option('display.precision', 4)

In [12]:
for con_query in [1, 2, 3]:
    for k in [20]:
        print(f'Resultados para conjunto de query {con_query} e k={k}')
        df_metricas = compara_metricas(con_query, k)
        df_metricas.to_csv(f'{PASTA_RESULTADO_CADERNO}metricas-conj-{con_query}-k-{k}.csv', index=False)
        display(df_metricas)

Resultados para conjunto de query 1 e k=20


Unnamed: 0,Modelo,P@20,R@20,MRR@20,nDCG@20
0,bert-base-portuguese-cased-finetuned-tcu-acord...,0.035,0.0567,0.1824,0.0649
1,bert-base-portuguese-cased-finetuned-tcu-acord...,0.052,0.0811,0.2163,0.0861
2,bert-base-portuguese-cased_mean,0.032,0.0499,0.1849,0.0648
3,bert-base-portuguese-cased_cls,0.037,0.0571,0.118,0.0536
4,bert-large-portuguese-cased_mean,0.07,0.1169,0.211,0.1112
5,bert-large-portuguese-cased_cls,0.027,0.0451,0.1598,0.0515
6,Legal-BERTimbau-sts-large-ma-v3_mean,0.08,0.129,0.2972,0.1376
7,Legal-BERTimbau-sts-large-ma-v3_cls,0.022,0.0341,0.1357,0.0459
8,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.139,0.2261,0.4271,0.2514
9,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.097,0.1587,0.2664,0.1634


Resultados para conjunto de query 2 e k=20


Unnamed: 0,Modelo,P@20,R@20,MRR@20,nDCG@20
0,bert-base-portuguese-cased-finetuned-tcu-acord...,0.077,0.1291,0.3644,0.1487
1,bert-base-portuguese-cased-finetuned-tcu-acord...,0.054,0.0874,0.2519,0.1022
2,bert-base-portuguese-cased_mean,0.076,0.1275,0.328,0.139
3,bert-base-portuguese-cased_cls,0.038,0.0638,0.1932,0.0669
4,bert-large-portuguese-cased_mean,0.104,0.1744,0.42,0.2041
5,bert-large-portuguese-cased_cls,0.017,0.0288,0.0648,0.0283
6,Legal-BERTimbau-sts-large-ma-v3_mean,0.119,0.1956,0.4689,0.2344
7,Legal-BERTimbau-sts-large-ma-v3_cls,0.064,0.1078,0.3048,0.1314
8,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.216,0.353,0.7162,0.4293
9,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.161,0.2642,0.594,0.3197


Resultados para conjunto de query 3 e k=20


Unnamed: 0,Modelo,P@20,R@20,MRR@20,nDCG@20
0,bert-base-portuguese-cased-finetuned-tcu-acord...,0.152,0.2695,0.6134,0.3266
1,bert-base-portuguese-cased-finetuned-tcu-acord...,0.085,0.1514,0.3652,0.1829
2,bert-base-portuguese-cased_mean,0.134,0.2396,0.6066,0.3045
3,bert-base-portuguese-cased_cls,0.044,0.0789,0.1809,0.0888
4,bert-large-portuguese-cased_mean,0.145,0.2562,0.6126,0.3145
5,bert-large-portuguese-cased_cls,0.031,0.056,0.145,0.0614
6,Legal-BERTimbau-sts-large-ma-v3_mean,0.136,0.2406,0.5012,0.2742
7,Legal-BERTimbau-sts-large-ma-v3_cls,0.087,0.1532,0.3774,0.1785
8,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.255,0.4454,0.8718,0.5224
9,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.222,0.3917,0.7767,0.4539


In [13]:
for k in [20]:
    df_metricas = compara_metricas_todas_queries(k)
    df_metricas.to_csv(f'{PASTA_RESULTADO_CADERNO}metricas-k-{k}.csv', index=False)
    display(df_metricas)

Unnamed: 0,Modelo,P@20,R@20,MRR@20,nDCG@20
0,bert-base-portuguese-cased-finetuned-tcu-acord...,0.088,0.1518,0.3867,0.1801
1,bert-base-portuguese-cased-finetuned-tcu-acord...,0.0637,0.1066,0.2778,0.1237
2,bert-base-portuguese-cased_mean,0.0807,0.139,0.3732,0.1695
3,bert-base-portuguese-cased_cls,0.0397,0.0666,0.164,0.0698
4,bert-large-portuguese-cased_mean,0.1063,0.1825,0.4146,0.2099
5,bert-large-portuguese-cased_cls,0.025,0.0433,0.1232,0.0471
6,Legal-BERTimbau-sts-large-ma-v3_mean,0.1117,0.1884,0.4224,0.2154
7,Legal-BERTimbau-sts-large-ma-v3_cls,0.0577,0.0984,0.2726,0.1186
8,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.2033,0.3415,0.6717,0.4011
9,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.16,0.2716,0.5457,0.3124


In [14]:
# Gravando mapa_metricas
caminho_arquivo = f'{PASTA_RESULTADO_CADERNO}mapa_metricas.pickle'
with open(caminho_arquivo, 'wb') as arquivo_pickle:
    pickle.dump(mapa_metricas, arquivo_pickle)

In [15]:
mapa_metricas

{'bert-base-portuguese-cased-finetuned-tcu-acordaos_mean':      QUERY_KEY  P@5  P@10  P@20  P@50     R@5    R@10    R@20    R@50   MRR@5  \
 0            1  0.0   0.0  0.05  0.02  0.0000  0.0000  0.0667  0.0667  0.0000   
 1            2  0.0   0.0  0.05  0.02  0.0000  0.0000  0.1429  0.1429  0.0000   
 2            3  0.0   0.0  0.00  0.00  0.0000  0.0000  0.0000  0.0000  0.0000   
 3            4  0.2   0.2  0.10  0.06  0.0769  0.1538  0.1538  0.2308  0.3333   
 4            5  0.2   0.1  0.10  0.04  0.0769  0.0769  0.1538  0.1538  0.5000   
 ..         ...  ...   ...   ...   ...     ...     ...     ...     ...     ...   
 145        146  0.2   0.1  0.10  0.06  0.0833  0.0833  0.1667  0.2500  1.0000   
 146        147  0.0   0.0  0.05  0.04  0.0000  0.0000  0.0714  0.1429  0.0000   
 147        148  0.0   0.1  0.15  0.10  0.0000  0.0769  0.2308  0.3846  0.0000   
 148        149  0.2   0.1  0.05  0.06  0.0833  0.0833  0.0833  0.2500  1.0000   
 149        150  0.2   0.1  0.10  0.08  