In [1]:
import os
import pandas as pd
import json
import glob
from scipy import stats

from recsysconfident.utils.files import sort_paths_by_datetime

def find_subfolders_with_prefix(root_folder: str, prefix: str):

  subfolders = []
  for dirpath, dirnames, filenames in os.walk(root_folder):
    for dirname in dirnames:
      if dirname.startswith(prefix):
        subfolders.append(os.path.join(dirpath, dirname))
  return subfolders

def read_json(path: str) -> dict:

    with open(path, 'r') as f:
        return json.load(f)
    
def generate_latex_table_from_dataframe(df: pd.DataFrame, caption: str, label: str, columns: list):

    df = df[~df.index.str.contains("std")]
    std_columns = []
    columns1 = []
    for col in list(df.columns):
        if "std" in col:
            std_columns.append(col)
        else:
            columns1.append(col)

    if not columns:
        columns = columns1

    for col in columns + std_columns:
        df.loc[:, col] = df[col].astype(float).round(4)

    df_bolded = df.astype(str)
    for idx, row in df[columns].iterrows():

        bold_value = row.max()

        for col in columns:
            if row[col] == bold_value:
                df_bolded.at[idx, col] = "\\textbf{"+str(row[col])+"}"

    for std_col in std_columns:
        col_name = std_col[:-4]
        formatted_col = df_bolded[col_name].astype(str) + " $ \\pm $ " + df_bolded[std_col].astype(str)
        df_bolded[col_name] = formatted_col

    df_bolded = df_bolded[columns]
    df_bolded = df_bolded.reset_index().rename(columns={'index': 'metric'})

    latex_code = df_bolded.to_latex(
        label=label,
        caption=caption,
        index=False,
        escape=False,  # Prevent escaping special characters
        column_format="c" * len(columns)  # Center align columns
    )
    return latex_code

def get_models_metrics(dataset_uris) -> dict:

    models_metrics_dfs = {}
    for path in dataset_uris:
        if "data_splits" in path:
            continue

        setup = read_json(sort_paths_by_datetime(glob.glob(f"{path}/setup-*.json"))[-1])
        model_name = setup['model_name']

        metrics_list = sort_paths_by_datetime(glob.glob(f"{path}/metrics-*.json"))
        metrics_df = pd.DataFrame.from_dict([read_json(metrics_list[-1])[split_name]])

        if model_name in models_metrics_dfs:
            models_metrics_dfs[model_name] = pd.concat([models_metrics_dfs[model_name], metrics_df], axis=0)
        else:
            models_metrics_dfs[model_name] = metrics_df
    return models_metrics_dfs



In [2]:
group_name = ""
model_names = ['dgat', 'mf', 'mf-cluster', 'att']
metrics = ["mNDCG@10", "mAP@10", "mRecall@10", "MRR@10", "mNDCG@3", "mAP@3", "mRecall@3", "MRR@3"]

split_name = "test"

datasets_uris = {
"amazon-beauty": find_subfolders_with_prefix(f"../runs/{group_name}/", "amazon-beauty"),
  "amazon-movies-tvs": find_subfolders_with_prefix(f"../runs/{group_name}/", "amazon-movies-tvs"),
  "jester-joke": find_subfolders_with_prefix(f"../runs/{group_name}/", "jester-joke"),
  "ml-1m": find_subfolders_with_prefix(f"../runs/{group_name}/", "ml-1m"),
}

In [3]:
metrics_ds = {}

for dataset_name in datasets_uris.keys():

    models_metrics_dfs_dict = get_models_metrics(datasets_uris[dataset_name])

    metrics_model_rank = {}

    for model_name in model_names:

        mean_metrics_df = models_metrics_dfs_dict[model_name].astype(float).mean()
        mean_metrics_df = mean_metrics_df.to_frame(name=model_name) #index: metrics names, columns: [mean]

        std_metrics_df = models_metrics_dfs_dict[model_name].astype(float).std()
        std_metrics_df = std_metrics_df.to_frame(name=f'{model_name}_std')

        if dataset_name in metrics_ds:
            metrics_ds[dataset_name] = pd.concat([metrics_ds[dataset_name], mean_metrics_df, std_metrics_df], axis=1)
        else:
            metrics_ds[dataset_name] = pd.concat([mean_metrics_df, std_metrics_df], axis=1)

        for metric in metrics:

            if not (metric in metrics_model_rank.keys()):
                metrics_model_rank[metric] = [{"model_name": model_name, "value": float(mean_metrics_df.loc[metric].values[0])}]
            else:
                metrics_model_rank[metric].append({"model_name": model_name, "value": float(mean_metrics_df.loc[metric].values[0])})


In [4]:
model_names

['dgat', 'mf', 'mf-cluster', 'att']

In [5]:

print(generate_latex_table_from_dataframe(metrics_ds['amazon-movies-tvs'],
                                          'Models performance over test split of amazon-beauty dataset.',
                                          "tab:amazon-movies-tvs", model_names))

print(generate_latex_table_from_dataframe(metrics_ds['ml-1m'], 'Models performance over test split of ml-1m dataset.', "tab:ml-1m-ranking", model_names))

print(generate_latex_table_from_dataframe(metrics_ds['jester-joke'], 'Metrics of the models in test split of jester-joke.',"tab:jester-joke-ranking", model_names))

#print(generate_latex_table_from_dataframe(metrics_ds['rotten-tomatoes'], 'Metrics of the models in test split of rotten-tomatoes.',"tab:rotten-tomatoes-ranking", model_names))


\begin{table}
\caption{Models performance over test split of amazon-beauty dataset.}
\label{tab:amazon-movies-tvs}
\begin{tabular}{cccc}
\toprule
metric & dgat & mf & mf-cluster & att \\
\midrule
mNDCG@10 & \textbf{0.2053} $ \pm $ 0.2162 & 0.1477 $ \pm $ 0.0674 & 0.1589 $ \pm $ 0.0428 & 0.1717 $ \pm $ 0.0154 \\
mAP@10 & \textbf{0.231} $ \pm $ 0.2549 & 0.1503 $ \pm $ 0.0621 & 0.1617 $ \pm $ 0.0373 & 0.1695 $ \pm $ 0.0083 \\
mRecall@10 & 0.7604 $ \pm $ 0.2741 & \textbf{0.8102} $ \pm $ 0.0263 & 0.6763 $ \pm $ 0.3249 & 0.696 $ \pm $ 0.2976 \\
MRR@10 & 0.2235 $ \pm $ 0.1234 & 0.2808 $ \pm $ 0.1365 & 0.3047 $ \pm $ 0.0862 & \textbf{0.3236} $ \pm $ 0.0443 \\
mNDCG@3 & 0.1403 $ \pm $ 0.1167 & 0.1426 $ \pm $ 0.0792 & 0.1525 $ \pm $ 0.0561 & \textbf{0.1635} $ \pm $ 0.0324 \\
mAP@3 & 0.1533 $ \pm $ 0.1393 & 0.1433 $ \pm $ 0.0795 & 0.1546 $ \pm $ 0.0527 & \textbf{0.1659} $ \pm $ 0.029 \\
mRecall@3 & \textbf{0.8018} $ \pm $ 0.2398 & 0.7509 $ \pm $ 0.157 & 0.6695 $ \pm $ 0.3406 & 0.704 $ \pm $ 0.297