In [1]:
import numpy as np

import pandas as pd
pd.options.plotting.backend = "plotly"

import os
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

import warnings
warnings.filterwarnings('once')

# Processamento e seleção da melhor coluna

In [3]:
best_column = 'NDCG@10'
metric_type = ["Prec", "Rec", "F1_Score", "Hit_Rate", "NDCG"]
top_k = [3, 5, 10, 20]

main_path = "../results/metrics"
main_file = os.listdir(main_path)

dataframes = []

for dataset_name in main_file:
    
    recommender_files = os.listdir(os.path.join(main_path, dataset_name))
    for recommender_name in recommender_files:
        
        metrics_path = os.path.join("..", "results", "metrics", dataset_name, recommender_name, "metrics.csv")
        metrics_aux = pd.read_csv(metrics_path, sep=';')
        
        metrics_aux.insert(0,'Recommender','')
        metrics_aux['Recommender'] = recommender_name

        metrics_aux.insert(0,'Dataset','')
        metrics_aux['Dataset'] = dataset_name
        
        dataframes.append(metrics_aux)
                
metrics_df = pd.concat(dataframes, ignore_index=True)

#if (best_column == 'Mean'):
#    metrics_df.insert(3,'Mean', metrics_df.mean(axis=1))
    
if best_column in metrics_df.columns:
    saida = metrics_df.loc[metrics_df.reset_index().groupby(['Dataset', 'Recommender'])[best_column].idxmax()].reset_index(drop=True)
else:
    raise Exception("Coluna '" + best_column + "' não encontrada")

# Geração de gráficos

In [4]:
def function_1(x):
   if(len(x.split('@')) > 1): 
       x = x.split('@')[1]
       x = "Top " + x
   return x

DATASETS = metrics_df['Dataset'].unique()
METRICS = ['Hit_Rate', 'F1_Score', 'NDCG']
for dataset_name in DATASETS:

    print('=' * 100)
    print(dataset_name)

    for curr_metric in METRICS:
            
        my_regex = "Recommender|" +  curr_metric + ".*"
        df_aux = saida[saida['Dataset'] == dataset_name].filter(regex=(my_regex))
        
        df_aux = df_aux.set_index('Recommender')
        df_aux = df_aux.rename(columns=function_1)
    
        marker_symbols = ['circle', 'square', 'diamond', 'cross', 'triangle-up', 'triangle-down', 'star', 'hexagram']
    
        fig = go.Figure()
    
        for i, recomendador in enumerate(df_aux.index):
            fig.add_trace(
                go.Scatter(
                    x=df_aux.columns,  # Colunas de métricas
                    y=df_aux.loc[recomendador],  # Pontuações do recomendador
                    mode='lines+markers',
                    marker=dict(symbol=marker_symbols[i], size=7, line=dict(color='black', width=0.5)),
                    name=recomendador
                )
            )
        
        fig.update_layout(
            title=curr_metric,
            height=500,
            width=800,
            yaxis_title="Taxa de acerto",
            xaxis_title="",
            legend_title="Recomendadores",
            font=dict(
                family="Helvetica",
                size=12,
                color="black"
            )
        )
    
        fig.show()

RetailRocket-Transactions


In [4]:
#for k in metric_k:
#    name = curr_metric + "@" + str(k)
#    fig.add_trace(go.Bar(x=df_aux["Recommender"], y=df_aux[name], name = name, hoverinfo='y'), x_cord, y_cord)

# Geração de tabelas - Divisão por Top-N

In [5]:
# Gera uma tabela para todos os datasets em todos os Top-N diferentes
# Retorna o melhor parametro de cada dataset

def function_1(x):
   x = x.split('@')[0]
   return x

names_dict = {"Prec": "Precision", "Rec": "Recall", "F1_Score":"F1-Score", "Hit_Rate": "Hit-Rate"}

results_path = "../latex_results"

string_final = ""

if not os.path.exists(results_path):
    os.makedirs(results_path)
    
main_file = os.listdir(main_path)

for dataset_name in main_file:
    
    saida_aux = saida[saida['Dataset'] == dataset_name] 
      
    string_final = string_final + ("=========== DATASET: " + dataset_name + " ===========\n\n\n")
    string_final = string_final + ("=========== MELHORES PARÂMETROS ===========\n\n")
    string_final = string_final + (saida[["Recommender", "Parameters"]].to_string())

    for curr_k in top_k:
        
        aux = '\n\n------ Top' + str(curr_k) + ' ------\n\n'
        string_final = string_final + (aux)

        #Pre processamento das colunas
        my_regex = "Recommender|" +  str(curr_k) + "$"
        df_aux = saida_aux.filter(regex=(my_regex))
        df_aux = df_aux.round(4)
        df_aux = df_aux.rename(columns=function_1)
        df_aux = df_aux.rename(columns=names_dict)

        # Tranforma a tabela em Latex
        capt = "Top " + str(curr_k) + " recommendation"
        string_final = string_final + (df_aux.style.hide(axis="index").set_caption(capt).to_latex())
        
        
    string_final = string_final + ("\n\n\n\n")

f = open(os.path.join(results_path, "saida.txt"), "w") 
f.write(string_final)
f.close()

# Geração de tabelas - Tabela unificada

In [6]:
#comp_metric = "NDCG@10"

f = open(os.path.join(results_path, "saida_tabela_unificada.txt"), "w")

for comp_metric in saida.columns[3:]:

    #Seleciona apenas a metrica relevante
    df_aux = saida.loc[:, ["Dataset", "Recommender", comp_metric]].copy()

    #Separa a coluna de recomendadores em dois
    df_aux[['Embedding', 'Recommender']]= df_aux['Recommender'].str.split('_', expand=True)

    #Preenche os valores nulos
    df_aux['Recommender'] = np.where(df_aux['Recommender'].isna() , df_aux['Embedding'], df_aux['Recommender'])

    df_pivot = df_aux.pivot(index='Dataset', columns=['Embedding', 'Recommender'], values=comp_metric)
    df_pivot = df_pivot.reset_index()

    tabela_latex = (df_pivot.style.hide(axis="index")
                    .to_latex(column_format='lllllllll', 
                              multicol_align='l',
                              hrules='True',
                              position_float='centering',
                              caption="Comparação de diversos métodos sob " + comp_metric))

    tabela_latex = tabela_latex.replace("\\begin{tabular}", "\\resizebox{\\textwidth} {!}{ \\begin{tabular}")
    tabela_latex = tabela_latex.replace("\\end{tabular}", "\\end{tabular} }")

    #display(df_pivot)
    #print(tabela_latex)

    results_path = "latex_results"

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    f.write("\n\n\n------------------  " + "MÉTRICA: " + comp_metric + " ------------------\n\n\n")
    f.write(tabela_latex)
    
    
f.close()