# Caderno 11 - Compila todas as métricas para o artigo.

As tabelas são as combinações dos conjuntos de queries (3 conjuntos) e k = [5, 10].

In [13]:
import pandas as pd

# Modelos
MODELOS = ['Luciano/bert-base-portuguese-cased-finetuned-tcu-acordaos',
           'neuralmind/bert-large-portuguese-cased',
           'rufimelo/Legal-BERTimbau-sts-large-ma-v3',
           'stjiris/bert-large-portuguese-cased-legal-mlm-sts-v1.0',   
           'sentence-transformers/paraphrase-multilingual-mpnet-base-v2',
           'text-embedding-3-small',
           'text-embedding-3-large'           
           ]

# Seleciona o tipo de camada oculta
TIPOS_CAMADA_OCULTA = ['mean_hidden_state']

PASTA_DADOS = './dados/'
PASTA_RESULTADO_CADERNO = f'{PASTA_DADOS}outputs/11_compila_todas_metricas_artigo/'
ARQUIVO_METRICAS_BM25 = f'{PASTA_DADOS}outputs/4_metricas_bm25_padrao/metricas_bm25.pickle'
ARQUIVO_METRICAS_ATUAIS = f'{PASTA_DADOS}outputs/3_calcula_metricas_atuais/'
ARQUIVO_MAPA_METRICAS = f'{PASTA_RESULTADO_CADERNO}mapa_metricas.pickle'

## 1. Carrega dados de jurisprudência

In [3]:
import pandas as pd
from metricas import metricas

# A pasta dos JURIS aqui não é a pasta original, e sim o resultado do caderno 1 (os documentos já estão filtrados)
PASTA_JURIS_TCU = f'{PASTA_DADOS}outputs/1_tratamento_juris_tcu/'

# Carrega os arquivos 
def carrega_juris_tcu():
    doc1 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_1.csv', sep='|')
    doc2 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_2.csv', sep='|')
    doc3 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_3.csv', sep='|')
    doc4 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_4.csv', sep='|')
    doc = pd.concat([doc1, doc2, doc3, doc4], ignore_index=True)
    query = pd.read_csv(f'{PASTA_JURIS_TCU}query_tratado.csv', sep='|')
    qrel = pd.read_csv(f'{PASTA_JURIS_TCU}qrel_tratado.csv', sep='|')

    return doc, query, qrel

docs, queries, qrels = carrega_juris_tcu()

## 2. Extrai as métricas

Carrega a lista dos resultados das queries realizadas no banco vetorial.

In [7]:
import pickle

# Carrega mapa_metricas
try:
    with open(ARQUIVO_MAPA_METRICAS, 'rb') as arquivo:
        mapa_metricas = pickle.load(arquivo)
except FileNotFoundError:
    mapa_metricas = {} 

# Carrega métricas do bm25_padrao
with open(ARQUIVO_METRICAS_BM25, 'rb') as arquivo:
    metricas_bm25_padrao = pickle.load(arquivo)
    
mapa_metricas['bm25_padrao'] = metricas_bm25_padrao

In [9]:
# Função para encontrar a correspondência entre keys
def find_key(doc_key, docs):
    matches = docs[docs['KEY'].str.contains(doc_key, na=False)]
    if not matches.empty:
        return matches.iloc[0]['KEY']
    return doc_key

# Função que constroi dataframe de resultados no formato esperado pela função de métricas
def processa_resultado(I, docs):
    col_resultado_query_key = []
    col_resultado_doc_key = []
    col_resultado_rank = []

    # Preenchendo as listas
    for query_idx in range(len(I)):
        for rank_idx in range(len(I[0])):
            col_resultado_query_key.append(query_idx + 1)
            col_resultado_doc_key.append(I[query_idx, rank_idx])
            col_resultado_rank.append(rank_idx + 1)

    df_resultados = pd.DataFrame({
        "QUERY_KEY": col_resultado_query_key,
        "DOC_KEY": col_resultado_doc_key,
        "RANK": col_resultado_rank,
    })

    df_resultados['DOC_KEY'] = df_resultados['DOC_KEY'].astype(str)
    df_resultados['DOC_KEY'] = df_resultados['DOC_KEY'].apply(lambda x: find_key(x, docs))
    
    return df_resultados

# Função que verifica se uma chave já existe em um mapa
def verifica_chave(mapa_metricas, chave):
    return chave in mapa_metricas

In [15]:
import pickle
from metricas import metricas
from tqdm import tqdm

for modelo in tqdm(MODELOS, desc='Processando MODELOS'):
    
    caminho_modelo = modelo.split("/")[-1]
    pasta_resultado_busca = f'{PASTA_DADOS}outputs/8_armazena_dados_em_banco_vetorial/{caminho_modelo}/'
    
    for tipo_camada_oculta in tqdm(TIPOS_CAMADA_OCULTA, desc=f'Processando {modelo}', leave=False):
        
        tipo = tipo_camada_oculta.split('_')[0]
        
        # Se as métricas do modelo já está em mapa_metricas continue
        if verifica_chave(mapa_metricas,f'{caminho_modelo}_{tipo}'):
            continue        
        
        arquivo_resultado_busca = f'{pasta_resultado_busca}{caminho_modelo}_{tipo_camada_oculta}_resultado_query.pickle'
        
        with open(arquivo_resultado_busca, 'rb') as arquivo:
            I = pickle.load(arquivo)
            
        df_resultados = processa_resultado(I, docs)
        metrica_calculada = metricas(df_resultados, qrels, aproximacao_trec_eval=True)
        mapa_metricas[f'{caminho_modelo}_{tipo}'] = metrica_calculada

Processando MODELOS:   0%|          | 0/7 [00:00<?, ?it/s]
Processando Luciano/bert-base-portuguese-cased-finetuned-tcu-acordaos:   0%|          | 0/1 [00:00<?, ?it/s][A
                                                                                                            [A
Processando neuralmind/bert-large-portuguese-cased:   0%|          | 0/1 [00:00<?, ?it/s][A
                                                                                         [A
Processando rufimelo/Legal-BERTimbau-sts-large-ma-v3:   0%|          | 0/1 [00:00<?, ?it/s][A
                                                                                           [A
Processando stjiris/bert-large-portuguese-cased-legal-mlm-sts-v1.0:   0%|          | 0/1 [00:00<?, ?it/s][A
                                                                                                         [A
Processando sentence-transformers/paraphrase-multilingual-mpnet-base-v2:   0%|          | 0/1 [00:00<?, ?it/s][A
Processan

In [17]:
verifica_chave(mapa_metricas,f'{caminho_modelo}_{tipo}')

True

## 4. Exibe as métricas

In [19]:
# Imprime as métricas para o conjunto de queries 1 (0:50), 2 (100:150), ou 3 (100:150) 
# e para um determinado k (foi gerado para k = 5, 10, 20 e 50.

def compara_metricas(con_query, k):
    # Acumula as métricas
    precisao = []
    recall = []
    mrr = []
    ndcg = []

    for nome in mapa_metricas.keys():
        estatisticas = mapa_metricas[nome][50*(con_query-1):50*(con_query)].describe()
        precisao.append(estatisticas.loc['mean', f'P@{k}'])
        recall.append(estatisticas.loc['mean', f'R@{k}'])
        mrr.append(estatisticas.loc['mean', f'MRR@{k}'])
        ndcg.append(estatisticas.loc['mean', f'nDCG@{k}'])

    df = pd.DataFrame({
        "Modelo": mapa_metricas.keys(),
        f"P@{k}": precisao,
        f"R@{k}": recall,
        f"MRR@{k}": mrr,
        f"nDCG@{k}": ndcg
    })
    return df

def compara_metricas_todas_queries(k):
    # Acumula as métricas
    precisao = []
    recall = []
    mrr = []
    ndcg = []

    for nome in mapa_metricas.keys():
        estatisticas = mapa_metricas[nome].describe()
        precisao.append(estatisticas.loc['mean', f'P@{k}'])
        recall.append(estatisticas.loc['mean', f'R@{k}'])
        mrr.append(estatisticas.loc['mean', f'MRR@{k}'])
        ndcg.append(estatisticas.loc['mean', f'nDCG@{k}'])

    df = pd.DataFrame({
        "Modelo": mapa_metricas.keys(),
        f"P@{k}": precisao,
        f"R@{k}": recall,
        f"MRR@{k}": mrr,
        f"nDCG@{k}": ndcg
    })
    return df

pd.set_option('display.precision', 4)

In [21]:
for con_query in [1, 2, 3]:
    for k in [5, 10]:
        print(f'Resultados para conjunto de query {con_query} e k={k}')
        df_metricas = compara_metricas(con_query, k)
        df_metricas.to_csv(f'{PASTA_RESULTADO_CADERNO}metricas-conj-{con_query}-k-{k}.csv', index=False)
        display(df_metricas)

Resultados para conjunto de query 1 e k=5


Unnamed: 0,Modelo,P@5,R@5,MRR@5,nDCG@5
0,bm25_padrao,0.272,0.1106,0.5253,0.2824
1,bert-base-portuguese-cased-finetuned-tcu-acord...,0.064,0.0253,0.1673,0.0707
2,bert-large-portuguese-cased_mean,0.084,0.0359,0.178,0.0885
3,Legal-BERTimbau-sts-large-ma-v3_mean,0.128,0.0523,0.269,0.1316
4,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.244,0.0983,0.4047,0.2419
5,paraphrase-multilingual-mpnet-base-v2_mean,0.188,0.0753,0.3337,0.1817
6,text-embedding-3-small_mean,0.52,0.2091,0.7433,0.5228
7,text-embedding-3-large_mean,0.532,0.2164,0.7523,0.5351


Resultados para conjunto de query 1 e k=10


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.238,0.1966,0.5386,0.2753
1,bert-base-portuguese-cased-finetuned-tcu-acord...,0.044,0.0354,0.1735,0.056
2,bert-large-portuguese-cased_mean,0.074,0.0625,0.1988,0.0833
3,Legal-BERTimbau-sts-large-ma-v3_mean,0.108,0.088,0.2877,0.1214
4,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.202,0.1632,0.424,0.2261
5,paraphrase-multilingual-mpnet-base-v2_mean,0.146,0.1164,0.3512,0.1608
6,text-embedding-3-small_mean,0.378,0.3069,0.7489,0.4454
7,text-embedding-3-large_mean,0.408,0.3319,0.7543,0.4733


Resultados para conjunto de query 2 e k=5


Unnamed: 0,Modelo,P@5,R@5,MRR@5,nDCG@5
0,bm25_padrao,0.5,0.2077,0.862,0.5713
1,bert-base-portuguese-cased-finetuned-tcu-acord...,0.144,0.0609,0.3513,0.1522
2,bert-large-portuguese-cased_mean,0.196,0.0838,0.3997,0.2051
3,Legal-BERTimbau-sts-large-ma-v3_mean,0.224,0.0925,0.4497,0.2347
4,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.42,0.1726,0.7017,0.4397
5,paraphrase-multilingual-mpnet-base-v2_mean,0.332,0.1345,0.595,0.3455
6,text-embedding-3-small_mean,0.604,0.2508,0.8907,0.6484
7,text-embedding-3-large_mean,0.648,0.2692,0.889,0.6803


Resultados para conjunto de query 2 e k=10


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.378,0.3176,0.8665,0.5106
1,bert-base-portuguese-cased-finetuned-tcu-acord...,0.11,0.0921,0.3587,0.1344
2,bert-large-portuguese-cased_mean,0.156,0.1317,0.4178,0.187
3,Legal-BERTimbau-sts-large-ma-v3_mean,0.166,0.1372,0.4637,0.2099
4,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.302,0.2475,0.7134,0.3851
5,paraphrase-multilingual-mpnet-base-v2_mean,0.246,0.2028,0.6051,0.3146
6,text-embedding-3-small_mean,0.468,0.3892,0.8951,0.5883
7,text-embedding-3-large_mean,0.492,0.4077,0.8923,0.6177


Resultados para conjunto de query 3 e k=5


Unnamed: 0,Modelo,P@5,R@5,MRR@5,nDCG@5
0,bm25_padrao,0.52,0.234,0.915,0.603
1,bert-base-portuguese-cased-finetuned-tcu-acord...,0.296,0.1333,0.5997,0.3392
2,bert-large-portuguese-cased_mean,0.296,0.1326,0.5983,0.3287
3,Legal-BERTimbau-sts-large-ma-v3_mean,0.236,0.106,0.4723,0.2599
4,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.468,0.2085,0.8647,0.5283
5,paraphrase-multilingual-mpnet-base-v2_mean,0.496,0.2192,0.7823,0.5351
6,text-embedding-3-small_mean,0.624,0.2775,0.9133,0.6752
7,text-embedding-3-large_mean,0.632,0.2774,0.9147,0.6882


Resultados para conjunto de query 3 e k=10


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.388,0.3451,0.9175,0.5328
1,bert-base-portuguese-cased-finetuned-tcu-acord...,0.202,0.1795,0.6083,0.2881
2,bert-large-portuguese-cased_mean,0.222,0.1964,0.607,0.2893
3,Legal-BERTimbau-sts-large-ma-v3_mean,0.182,0.1611,0.4921,0.2338
4,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.348,0.3065,0.8675,0.4603
5,paraphrase-multilingual-mpnet-base-v2_mean,0.344,0.3047,0.7919,0.4519
6,text-embedding-3-small_mean,0.482,0.4247,0.9167,0.6089
7,text-embedding-3-large_mean,0.472,0.415,0.9147,0.6084


In [13]:
for k in [20]:
    df_metricas = compara_metricas_todas_queries(k)
    df_metricas.to_csv(f'{PASTA_RESULTADO_CADERNO}metricas-k-{k}.csv', index=False)
    display(df_metricas)

Unnamed: 0,Modelo,P@20,R@20,MRR@20,nDCG@20
0,bert-base-portuguese-cased-finetuned-tcu-acord...,0.088,0.1518,0.3867,0.1801
1,bert-base-portuguese-cased-finetuned-tcu-acord...,0.0637,0.1066,0.2778,0.1237
2,bert-base-portuguese-cased_mean,0.0807,0.139,0.3732,0.1695
3,bert-base-portuguese-cased_cls,0.0397,0.0666,0.164,0.0698
4,bert-large-portuguese-cased_mean,0.1063,0.1825,0.4146,0.2099
5,bert-large-portuguese-cased_cls,0.025,0.0433,0.1232,0.0471
6,Legal-BERTimbau-sts-large-ma-v3_mean,0.1117,0.1884,0.4224,0.2154
7,Legal-BERTimbau-sts-large-ma-v3_cls,0.0577,0.0984,0.2726,0.1186
8,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.2033,0.3415,0.6717,0.4011
9,bert-large-portuguese-cased-legal-mlm-sts-v1.0...,0.16,0.2716,0.5457,0.3124


In [14]:
# Gravando mapa_metricas
caminho_arquivo = f'{PASTA_RESULTADO_CADERNO}mapa_metricas.pickle'
with open(caminho_arquivo, 'wb') as arquivo_pickle:
    pickle.dump(mapa_metricas, arquivo_pickle)

In [15]:
mapa_metricas

{'bert-base-portuguese-cased-finetuned-tcu-acordaos_mean':      QUERY_KEY  P@5  P@10  P@20  P@50     R@5    R@10    R@20    R@50   MRR@5  \
 0            1  0.0   0.0  0.05  0.02  0.0000  0.0000  0.0667  0.0667  0.0000   
 1            2  0.0   0.0  0.05  0.02  0.0000  0.0000  0.1429  0.1429  0.0000   
 2            3  0.0   0.0  0.00  0.00  0.0000  0.0000  0.0000  0.0000  0.0000   
 3            4  0.2   0.2  0.10  0.06  0.0769  0.1538  0.1538  0.2308  0.3333   
 4            5  0.2   0.1  0.10  0.04  0.0769  0.0769  0.1538  0.1538  0.5000   
 ..         ...  ...   ...   ...   ...     ...     ...     ...     ...     ...   
 145        146  0.2   0.1  0.10  0.06  0.0833  0.0833  0.1667  0.2500  1.0000   
 146        147  0.0   0.0  0.05  0.04  0.0000  0.0000  0.0714  0.1429  0.0000   
 147        148  0.0   0.1  0.15  0.10  0.0000  0.0769  0.2308  0.3846  0.0000   
 148        149  0.2   0.1  0.05  0.06  0.0833  0.0833  0.0833  0.2500  1.0000   
 149        150  0.2   0.1  0.10  0.08  