# Caderno 10 - Compila todas as métricas.

As tabelas são as combinações dos conjuntos de queries (3 conjuntos) e k = [5, 10, 20].

In [2]:
import pandas as pd

# Modelos
MODELOS = ['Luciano/bert-base-portuguese-cased-finetuned-tcu-acordaos',
           'neuralmind/bert-base-portuguese-cased',
           'neuralmind/bert-large-portuguese-cased',
           'rufimelo/Legal-BERTimbau-sts-large-ma-v3',
           'stjiris/bert-large-portuguese-cased-legal-mlm-sts-v1.0',
           'stjiris/bert-large-portuguese-cased-legal-mlm-nli-sts-v1',    
           'sentence-transformers/paraphrase-multilingual-mpnet-base-v2',
           'text-embedding-ada-002',
           'text-embedding-3-small',
           'text-embedding-3-large'           
           ]

# Seleciona o tipo de camada oculta
TIPOS_CAMADA_OCULTA = ['mean_hidden_state']
#TIPOS_CAMADA_OCULTA = ['mean_hidden_state',
#                       'cls_hidden_state']

PASTA_DADOS = './dados/'
PASTA_RESULTADO_CADERNO = f'{PASTA_DADOS}outputs/10_compila_todas_metricas/'
ARQUIVO_METRICAS_BM25 = f'{PASTA_DADOS}outputs/4_metricas_bm25_padrao/metricas_bm25.pickle'
ARQUIVO_METRICAS_ATUAIS = f'{PASTA_DADOS}outputs/3_calcula_metricas_atuais/'
ARQUIVO_METRICAS_ATUAIS_SELECT = f'{ARQUIVO_METRICAS_ATUAIS}metricas_atuais_select.pickle'
ARQUIVO_METRICAS_ATUAIS_SELECT_SWAN = f'{ARQUIVO_METRICAS_ATUAIS}metricas_atuais_select_swan.pickle'
ARQUIVO_METRICAS_ATUAIS_SELECT_SWAN_SYNONYM = f'{ARQUIVO_METRICAS_ATUAIS}metricas_atuais_select_swan_synonym.pickle'
ARQUIVO_MAPA_METRICAS = f'{PASTA_RESULTADO_CADERNO}mapa_metricas.pickle'

## 1. Carrega dados de jurisprudência

In [4]:
import pandas as pd
from metricas import metricas

# A pasta dos JURIS aqui não é a pasta original, e sim o resultado do caderno 1 (os documentos já estão filtrados)
PASTA_JURIS_TCU = f'{PASTA_DADOS}outputs/1_tratamento_juris_tcu/'

# Carrega os arquivos 
def carrega_juris_tcu():
    doc1 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_1.csv', sep='|')
    doc2 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_2.csv', sep='|')
    doc3 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_3.csv', sep='|')
    doc4 = pd.read_csv(f'{PASTA_JURIS_TCU}doc_tratado_parte_4.csv', sep='|')
    doc = pd.concat([doc1, doc2, doc3, doc4], ignore_index=True)
    query = pd.read_csv(f'{PASTA_JURIS_TCU}query_tratado.csv', sep='|')
    qrel = pd.read_csv(f'{PASTA_JURIS_TCU}qrel_tratado.csv', sep='|')

    return doc, query, qrel

docs, queries, qrels = carrega_juris_tcu()

## 2. Extrai as métricas

Carrega a lista dos resultados das queries realizadas no banco vetorial.

In [6]:
import pickle

# Carrega mapa_metricas
try:
    with open(ARQUIVO_MAPA_METRICAS, 'rb') as arquivo:
        mapa_metricas = pickle.load(arquivo)
except FileNotFoundError:
    mapa_metricas = {} 

# Carrega métricas do bm25_padrao
with open(ARQUIVO_METRICAS_BM25, 'rb') as arquivo:
    metricas_bm25_padrao = pickle.load(arquivo)
    
mapa_metricas['bm25_padrao'] = metricas_bm25_padrao

# Carrega métricas do atuais
with open(ARQUIVO_METRICAS_ATUAIS_SELECT, 'rb') as arquivo:
    metricas_atuais_select = pickle.load(arquivo)
    
with open(ARQUIVO_METRICAS_ATUAIS_SELECT_SWAN, 'rb') as arquivo:
    metricas_atuais_select_swan = pickle.load(arquivo)
    
with open(ARQUIVO_METRICAS_ATUAIS_SELECT_SWAN_SYNONYM, 'rb') as arquivo:
    metricas_atuais_select_swan_synonym = pickle.load(arquivo)
    
mapa_metricas['select'] = metricas_atuais_select
mapa_metricas['select_swan'] = metricas_atuais_select_swan
mapa_metricas['select_swan_synonym'] = metricas_atuais_select_swan_synonym


In [7]:
# Função para encontrar a correspondência entre keys
def find_key(doc_key, docs):
    matches = docs[docs['KEY'].str.contains(doc_key, na=False)]
    if not matches.empty:
        return matches.iloc[0]['KEY']
    return doc_key

# Função que constroi dataframe de resultados no formato esperado pela função de métricas
def processa_resultado(I, docs):
    col_resultado_query_key = []
    col_resultado_doc_key = []
    col_resultado_rank = []

    # Preenchendo as listas
    for query_idx in range(len(I)):
        for rank_idx in range(len(I[0])):
            col_resultado_query_key.append(query_idx + 1)
            col_resultado_doc_key.append(I[query_idx, rank_idx])
            col_resultado_rank.append(rank_idx + 1)

    df_resultados = pd.DataFrame({
        "QUERY_KEY": col_resultado_query_key,
        "DOC_KEY": col_resultado_doc_key,
        "RANK": col_resultado_rank,
    })

    df_resultados['DOC_KEY'] = df_resultados['DOC_KEY'].astype(str)
    df_resultados['DOC_KEY'] = df_resultados['DOC_KEY'].apply(lambda x: find_key(x, docs))
    
    return df_resultados

# Função que verifica se uma chave já existe em um mapa
def verifica_chave(mapa_metricas, chave):
    return chave in mapa_metricas

In [8]:
import pickle
from metricas import metricas
from tqdm import tqdm

for modelo in tqdm(MODELOS, desc='Processando MODELOS'):
    
    caminho_modelo = modelo.split("/")[-1]
    pasta_resultado_busca = f'{PASTA_DADOS}outputs/8_armazena_dados_em_banco_vetorial/{caminho_modelo}/'
    
    for tipo_camada_oculta in tqdm(TIPOS_CAMADA_OCULTA, desc=f'Processando {modelo}', leave=False):
        
        tipo = tipo_camada_oculta.split('_')[0]
        
        # Se as métricas do modelo já está em mapa_metricas continue
        if verifica_chave(mapa_metricas,f'{caminho_modelo}_{tipo}'):
            continue        
        
        arquivo_resultado_busca = f'{pasta_resultado_busca}{caminho_modelo}_{tipo_camada_oculta}_resultado_query.pickle'
        
        with open(arquivo_resultado_busca, 'rb') as arquivo:
            I = pickle.load(arquivo)
            
        df_resultados = processa_resultado(I, docs)
        metrica_calculada = metricas(df_resultados, qrels, aproximacao_trec_eval=True)
        mapa_metricas[f'{caminho_modelo}_{tipo}'] = metrica_calculada

Processando MODELOS:   0%|          | 0/10 [00:00<?, ?it/s]
Processando Luciano/bert-base-portuguese-cased-finetuned-tcu-acordaos:   0%|          | 0/1 [00:00<?, ?it/s][A
                                                                                                            [A
Processando neuralmind/bert-base-portuguese-cased:   0%|          | 0/1 [00:00<?, ?it/s][A
                                                                                        [A
Processando neuralmind/bert-large-portuguese-cased:   0%|          | 0/1 [00:00<?, ?it/s][A
                                                                                         [A
Processando rufimelo/Legal-BERTimbau-sts-large-ma-v3:   0%|          | 0/1 [00:00<?, ?it/s][A
                                                                                           [A
Processando stjiris/bert-large-portuguese-cased-legal-mlm-sts-v1.0:   0%|          | 0/1 [00:00<?, ?it/s][A
                                               

In [9]:
verifica_chave(mapa_metricas,f'{caminho_modelo}_{tipo}')

True

## 4. Exibe as métricas

In [11]:
# Imprime as métricas para o conjunto de queries 1 (0:50), 2 (100:150), ou 3 (100:150) 
# e para um determinado k (foi gerado para k = 5, 10, 20 e 50.

def compara_metricas(con_query, k):
    # Acumula as métricas
    precisao = []
    recall = []
    mrr = []
    ndcg = []

    for nome in mapa_metricas.keys():
        estatisticas = mapa_metricas[nome][50*(con_query-1):50*(con_query)].describe()
        precisao.append(estatisticas.loc['mean', f'P@{k}'])
        recall.append(estatisticas.loc['mean', f'R@{k}'])
        mrr.append(estatisticas.loc['mean', f'MRR@{k}'])
        ndcg.append(estatisticas.loc['mean', f'nDCG@{k}'])

    df = pd.DataFrame({
        "Modelo": mapa_metricas.keys(),
        f"P@{k}": precisao,
        f"R@{k}": recall,
        f"MRR@{k}": mrr,
        f"nDCG@{k}": ndcg
    })
    return df

def compara_metricas_todas_queries(k):
    # Acumula as métricas
    precisao = []
    recall = []
    mrr = []
    ndcg = []

    for nome in mapa_metricas.keys():
        estatisticas = mapa_metricas[nome].describe()
        precisao.append(estatisticas.loc['mean', f'P@{k}'])
        recall.append(estatisticas.loc['mean', f'R@{k}'])
        mrr.append(estatisticas.loc['mean', f'MRR@{k}'])
        ndcg.append(estatisticas.loc['mean', f'nDCG@{k}'])

    df = pd.DataFrame({
        "Modelo": mapa_metricas.keys(),
        f"P@{k}": precisao,
        f"R@{k}": recall,
        f"MRR@{k}": mrr,
        f"nDCG@{k}": ndcg
    })
    return df

pd.set_option('display.precision', 4)

In [12]:
for con_query in [1, 2, 3]:
#    for k in [5, 10, 20]:
    for k in [10]:
        print(f'Resultados para conjunto de query {con_query} e k={k}')
        display(compara_metricas(con_query, k))

Resultados para conjunto de query 1 e k=10


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.238,0.1966,0.5386,0.2753
1,select,0.224,0.182,0.3511,0.224
2,select_swan,0.242,0.1968,0.3848,0.2481
3,select_swan_synonym,0.26,0.212,0.3963,0.2656
4,bert-base-portuguese-cased-finetuned-tcu-acord...,0.044,0.0354,0.1735,0.056
5,bert-base-portuguese-cased-finetuned-tcu-acord...,0.07,0.0541,0.2062,0.077
6,bert-base-portuguese-cased_mean,0.048,0.038,0.1805,0.0598
7,bert-base-portuguese-cased_cls,0.034,0.0251,0.1021,0.0371
8,bert-large-portuguese-cased_mean,0.074,0.0625,0.1988,0.0833
9,bert-large-portuguese-cased_cls,0.04,0.0328,0.153,0.0473


Resultados para conjunto de query 2 e k=10


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.378,0.3176,0.8665,0.5106
1,select,0.248,0.2066,0.7948,0.409
2,select_swan,0.282,0.2348,0.8683,0.4545
3,select_swan_synonym,0.288,0.2396,0.8667,0.4579
4,bert-base-portuguese-cased-finetuned-tcu-acord...,0.11,0.0921,0.3587,0.1344
5,bert-base-portuguese-cased-finetuned-tcu-acord...,0.066,0.0538,0.2414,0.0876
6,bert-base-portuguese-cased_mean,0.094,0.0772,0.324,0.1155
7,bert-base-portuguese-cased_cls,0.048,0.0406,0.183,0.0565
8,bert-large-portuguese-cased_mean,0.156,0.1317,0.4178,0.187
9,bert-large-portuguese-cased_cls,0.02,0.0176,0.0572,0.0234


Resultados para conjunto de query 3 e k=10


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.388,0.3451,0.9175,0.5328
1,select,0.004,0.0036,0.04,0.01
2,select_swan,0.018,0.0164,0.11,0.0372
3,select_swan_synonym,0.018,0.0164,0.11,0.0372
4,bert-base-portuguese-cased-finetuned-tcu-acord...,0.202,0.1795,0.6083,0.2881
5,bert-base-portuguese-cased-finetuned-tcu-acord...,0.128,0.1142,0.3565,0.1648
6,bert-base-portuguese-cased_mean,0.192,0.1714,0.6002,0.2714
7,bert-base-portuguese-cased_cls,0.066,0.0592,0.1758,0.0796
8,bert-large-portuguese-cased_mean,0.222,0.1964,0.607,0.2893
9,bert-large-portuguese-cased_cls,0.038,0.0354,0.136,0.0479


In [13]:
for k in [5, 10, 20]:
    display(compara_metricas_todas_queries(k))

Unnamed: 0,Modelo,P@5,R@5,MRR@5,nDCG@5
0,bm25_padrao,0.4307,0.1841,0.7674,0.4856
1,select,0.212,0.0872,0.3848,0.2428
2,select_swan,0.252,0.1037,0.4462,0.2865
3,select_swan_synonym,0.26,0.107,0.4496,0.2934
4,bert-base-portuguese-cased-finetuned-tcu-acord...,0.168,0.0732,0.3728,0.1874
5,bert-base-portuguese-cased-finetuned-tcu-acord...,0.1147,0.0479,0.2541,0.1235
6,bert-base-portuguese-cased_mean,0.152,0.0654,0.3524,0.1728
7,bert-base-portuguese-cased_cls,0.0613,0.026,0.1377,0.0642
8,bert-large-portuguese-cased_mean,0.192,0.0841,0.392,0.2074
9,bert-large-portuguese-cased_cls,0.044,0.0191,0.1044,0.0469


Unnamed: 0,Modelo,P@10,R@10,MRR@10,nDCG@10
0,bm25_padrao,0.3347,0.2864,0.7742,0.4396
1,select,0.1587,0.1308,0.3953,0.2143
2,select_swan,0.1807,0.1493,0.4544,0.2466
3,select_swan_synonym,0.1887,0.156,0.4576,0.2536
4,bert-base-portuguese-cased-finetuned-tcu-acord...,0.1187,0.1024,0.3802,0.1595
5,bert-base-portuguese-cased-finetuned-tcu-acord...,0.088,0.074,0.2681,0.1098
6,bert-base-portuguese-cased_mean,0.1113,0.0956,0.3682,0.1489
7,bert-base-portuguese-cased_cls,0.0493,0.0416,0.1536,0.0577
8,bert-large-portuguese-cased_mean,0.1507,0.1302,0.4079,0.1865
9,bert-large-portuguese-cased_cls,0.0327,0.0286,0.1154,0.0395


Unnamed: 0,Modelo,P@20,R@20,MRR@20,nDCG@20
0,bm25_padrao,0.2497,0.4258,0.7762,0.5004
1,select,0.112,0.1865,0.3994,0.2366
2,select_swan,0.1243,0.2074,0.457,0.2683
3,select_swan_synonym,0.128,0.213,0.4603,0.2746
4,bert-base-portuguese-cased-finetuned-tcu-acord...,0.088,0.1518,0.3867,0.1801
5,bert-base-portuguese-cased-finetuned-tcu-acord...,0.0637,0.1066,0.2778,0.1237
6,bert-base-portuguese-cased_mean,0.0807,0.139,0.3732,0.1695
7,bert-base-portuguese-cased_cls,0.0397,0.0666,0.164,0.0698
8,bert-large-portuguese-cased_mean,0.1063,0.1825,0.4146,0.2099
9,bert-large-portuguese-cased_cls,0.025,0.0433,0.1232,0.0471


In [14]:
# Gravando mapa_metricas
caminho_arquivo = f'{PASTA_RESULTADO_CADERNO}mapa_metricas.pickle'
with open(caminho_arquivo, 'wb') as arquivo_pickle:
    pickle.dump(mapa_metricas, arquivo_pickle)

In [15]:
mapa_metricas

{'bm25_padrao':      QUERY_KEY  P@5  P@10  P@20  P@50     R@5    R@10    R@20    R@50  MRR@5  \
 0            1  0.6   0.3  0.25  0.16  0.2000  0.2000  0.3333  0.5333    1.0   
 1            2  0.4   0.4  0.25  0.12  0.2857  0.5714  0.7143  0.8571    1.0   
 2            3  0.0   0.0  0.00  0.06  0.0000  0.0000  0.0000  0.2000    0.0   
 3            4  0.2   0.3  0.25  0.20  0.0769  0.2308  0.3846  0.7692    0.5   
 4            5  0.0   0.1  0.15  0.08  0.0000  0.0769  0.2308  0.3077    0.0   
 ..         ...  ...   ...   ...   ...     ...     ...     ...     ...    ...   
 145        146  0.4   0.2  0.15  0.08  0.1667  0.1667  0.2500  0.3333    1.0   
 146        147  0.2   0.1  0.15  0.06  0.0714  0.0714  0.2143  0.2143    1.0   
 147        148  0.4   0.3  0.25  0.14  0.1538  0.2308  0.3846  0.5385    1.0   
 148        149  0.2   0.4  0.30  0.18  0.0833  0.3333  0.5000  0.7500    1.0   
 149        150  0.8   0.5  0.35  0.16  0.3636  0.4545  0.6364  0.7273    1.0   
 
      MRR@1