In [34]:
import os
import pandas as pd
import json
import glob
from scipy import stats

from recsysconfident.utils.files import sort_paths_by_datetime

def find_subfolders_with_prefix(root_folder: str, prefix: str):

  subfolders = []
  for dirpath, dirnames, filenames in os.walk(root_folder):
    for dirname in dirnames:
      if dirname.startswith(prefix):
        subfolders.append(os.path.join(dirpath, dirname))
  return subfolders

def read_json(path: str) -> dict:

    with open(path, 'r') as f:
        return json.load(f)
    
def generate_latex_table_from_dataframe(df: pd.DataFrame, caption: str, label: str, columns: list):

    df = df[~df.index.str.contains("std")]
    std_columns = []
    columns1 = []
    for col in list(df.model_names):
        if "std" in col:
            std_columns.append(col)
        else:
            columns1.append(col)

    if not columns:
        columns = columns1

    for col in columns + std_columns:
        df.loc[:, col] = df[col].astype(float).round(4)

    df_bolded = df.astype(str)
    for idx, row in df[columns].iterrows():

        bold_value = row.max()

        for col in columns:
            if row[col] == bold_value:
                df_bolded.at[idx, col] = "\\textbf{"+str(row[col])+"}"

    for std_col in std_columns:
        col_name = std_col[:-4]
        formatted_col = df_bolded[col_name].astype(str) + " $ \\pm $ " + df_bolded[std_col].astype(str)
        df_bolded[col_name] = formatted_col

    df_bolded = df_bolded[columns]
    df_bolded = df_bolded.reset_index().rename(columns={'index': 'metric'})

    latex_code = df_bolded.to_latex(
        label=label,
        caption=caption,
        index=False,
        escape=False,  # Prevent escaping special characters
        column_format="c" * len(columns)  # Center align columns
    )
    return latex_code

def get_models_metrics(dataset_uris) -> dict:

    models_metrics_dfs = {}
    for path in dataset_uris:
        if "data_splits" in path:
            continue

        setup = read_json(sort_paths_by_datetime(glob.glob(f"{path}/setup-*.json"))[-1])
        model_name = setup['model_name']

        metrics_list = sort_paths_by_datetime(glob.glob(f"{path}/metrics-*.json"))
        metrics_df = pd.DataFrame.from_dict([read_json(metrics_list[-1])[split_name]])

        if model_name in models_metrics_dfs:
            models_metrics_dfs[model_name] = pd.concat([models_metrics_dfs[model_name], metrics_df], axis=0)
        else:
            models_metrics_dfs[model_name] = metrics_df
    return models_metrics_dfs



In [26]:
group_name = "learn-rank"
model_names = ['uagat', 'uamf', 'dgat', 'mf']
metrics = ["mNDCG@10", "mAP@10", "mRecall@10", "MRR@10", "mNDCG@3", "mAP@3", "mRecall@3", "MRR@3"]

split_name = "test"

datasets_uris = {
  "amazon-beauty": find_subfolders_with_prefix(f"../runs/{group_name}/", "amazon-beauty"),
  "jester-joke": find_subfolders_with_prefix(f"../runs/{group_name}/", "jester-joke"),
  "ml-1m": find_subfolders_with_prefix(f"../runs/{group_name}/", "ml-1m"),
    "rotten-tomatoes": find_subfolders_with_prefix(f"../runs/{group_name}/", "rotten-tomatoes")
}

In [45]:
metrics_ds = {}

for dataset_name in datasets_uris.keys():

    models_metrics_dfs_dict = get_models_metrics(datasets_uris[dataset_name])

    metrics_model_rank = {}

    for model_name in model_names:

        mean_metrics_df = models_metrics_dfs_dict[model_name].astype(float).mean()
        mean_metrics_df = mean_metrics_df.to_frame(name=model_name) #index: metrics names, columns: [mean]

        std_metrics_df = models_metrics_dfs_dict[model_name].astype(float).std()
        std_metrics_df = std_metrics_df.to_frame(name=f'{model_name}_std')

        if dataset_name in metrics_ds:
            metrics_ds[dataset_name] = pd.concat([metrics_ds[dataset_name], mean_metrics_df, std_metrics_df], axis=1)
        else:
            metrics_ds[dataset_name] = pd.concat([mean_metrics_df, std_metrics_df], axis=1)

        for metric in metrics:

            if not (metric in metrics_model_rank.keys()):
                metrics_model_rank[metric] = [{"model_name": model_name, "value": float(mean_metrics_df.loc[metric].values[0])}]
            else:
                metrics_model_rank[metric].append({"model_name": model_name, "value": float(mean_metrics_df.loc[metric].values[0])})

    metric_significances = []

    for metric in metrics:
        metrics_model_rank[metric] = sorted(metrics_model_rank[metric], key=lambda x: x["value"], reverse=True)

        model1, model2 = metrics_model_rank[metric][0]['model_name'], metrics_model_rank[metric][1]['model_name']
        m1_df = models_metrics_dfs_dict[model1].astype(float)
        m2_df = models_metrics_dfs_dict[model2].astype(float)

        t_stat, p_val = stats.ttest_ind(m1_df[metric], m2_df[metric], equal_var=False)
        metric_significances.append(f"{p_val < 0.05}")

    metrics_ds[dataset_name] = pd.concat([metrics_ds[dataset_name].reset_index(drop=True), pd.DataFrame({"p < 0.05": metric_significances})], axis=1)

    print(metrics_ds[dataset_name])

       uagat  uagat_std      uamf  uamf_std      dgat  dgat_std        mf  \
0   0.792683   0.063782  0.839358  0.012611  1.403568  0.094969  3.019853   
1   0.643560   0.033251  0.764375  0.014057  1.146264  0.065234  2.423954   
2   0.512850   0.018251  0.633830  0.014240  0.530086  0.015690  0.637788   
3   0.209794   0.012553  0.283522  0.017228  0.220826  0.008153  0.264058   
4   0.489426   0.023930  0.661568  0.013853  0.511136  0.025553  0.602192   
5   0.208006   0.008842  0.280524  0.018774  0.215854  0.009184  0.269636   
6   0.504544   0.034951  0.784090  0.021380  0.523636  0.042597  0.610452   
7   0.203082   0.006377  0.164318  0.020481  0.211526  0.013946  0.262164   
8   0.775250   0.039720  0.846898  0.011412  0.770504  0.017494  0.858260   
9   0.306666   0.015816  0.286262  0.015373  0.305374  0.011233  0.260834   
10  0.593272   0.035336  0.727732  0.016433  0.591648  0.018426  0.729076   
11  0.301068   0.015206  0.329360  0.007850  0.305560  0.018721  0.305448   

In [46]:
metrics_ds['amazon-beauty']

Unnamed: 0,uagat,uagat_std,uamf,uamf_std,dgat,dgat_std,mf,mf_std,p < 0.05
0,0.792683,0.063782,0.839358,0.012611,1.403568,0.094969,3.019853,0.117848,False
1,0.64356,0.033251,0.764375,0.014057,1.146264,0.065234,2.423954,0.094398,True
2,0.51285,0.018251,0.63383,0.01424,0.530086,0.01569,0.637788,0.008145,True
3,0.209794,0.012553,0.283522,0.017228,0.220826,0.008153,0.264058,0.010827,False
4,0.489426,0.02393,0.661568,0.013853,0.511136,0.025553,0.602192,0.01399,False
5,0.208006,0.008842,0.280524,0.018774,0.215854,0.009184,0.269636,0.009536,True
6,0.504544,0.034951,0.78409,0.02138,0.523636,0.042597,0.610452,0.019851,True
7,0.203082,0.006377,0.164318,0.020481,0.211526,0.013946,0.262164,0.014292,False
8,0.77525,0.03972,0.846898,0.011412,0.770504,0.017494,0.85826,0.028868,
9,0.306666,0.015816,0.286262,0.015373,0.305374,0.011233,0.260834,0.013158,


In [18]:

print(generate_latex_table_from_dataframe(metrics_ds['amazon-beauty'],
                                          'Models performance over test split of amazon-beauty dataset.',
                                          "tab:amazon-beauty-ranking", model_names))

print(generate_latex_table_from_dataframe(metrics_ds['ml-1m'], 'Models performance over test split of ml-1m dataset.', "tab:ml-1m-ranking", model_names))

print(generate_latex_table_from_dataframe(metrics_ds['jester-joke'], 'Metrics of the models in test split of jester-joke.',"tab:jester-joke-ranking", model_names))

print(generate_latex_table_from_dataframe(metrics_ds['rotten-tomatoes'], 'Metrics of the models in test split of rotten-tomatoes.',"tab:rotten-tomatoes-ranking", model_names))


\begin{table}
\caption{Models performance over test split of amazon-beauty dataset.}
\label{tab:amazon-beauty-ranking}
\begin{tabular}{cccccccccc}
\toprule
metric & uagat & uamf & dgat & mf \\
\midrule
rmse & 0.7927 $ \pm $ 0.0638 & 0.8394 $ \pm $ 0.0126 & 1.4036 $ \pm $ 0.095 & \textbf{3.0199} $ \pm $ 0.1178 \\
mae & 0.6436 $ \pm $ 0.0333 & 0.7644 $ \pm $ 0.0141 & 1.1463 $ \pm $ 0.0652 & \textbf{2.424} $ \pm $ 0.0944 \\
mNDCG@10 & 0.5128 $ \pm $ 0.0183 & 0.6338 $ \pm $ 0.0142 & 0.5301 $ \pm $ 0.0157 & \textbf{0.6378} $ \pm $ 0.0081 \\
mAP@10 & 0.4894 $ \pm $ 0.0239 & \textbf{0.6616} $ \pm $ 0.0139 & 0.5111 $ \pm $ 0.0256 & 0.6022 $ \pm $ 0.014 \\
mRecall@10 & 0.5045 $ \pm $ 0.035 & \textbf{0.7841} $ \pm $ 0.0214 & 0.5236 $ \pm $ 0.0426 & 0.6105 $ \pm $ 0.0199 \\
MRR@10 & 0.7752 $ \pm $ 0.0397 & 0.8469 $ \pm $ 0.0114 & 0.7705 $ \pm $ 0.0175 & \textbf{0.8583} $ \pm $ 0.0289 \\
mNDCG@3 & 0.5933 $ \pm $ 0.0353 & 0.7277 $ \pm $ 0.0164 & 0.5916 $ \pm $ 0.0184 & \textbf{0.7291} $ \pm $ 0.010