In [None]:
import os
import pandas as pd
import json
import glob
from recsysconfident.utils.files import sort_paths_by_datetime

def find_subfolders_with_prefix(root_folder: str, prefix: str):

  subfolders = []
  for dirpath, dirnames, filenames in os.walk(root_folder):
    for dirname in dirnames:
      if dirname.startswith(prefix):
        subfolders.append(os.path.join(dirpath, dirname))
  return subfolders

def read_json(path: str) -> dict:

    with open(path, 'r') as f:
        return json.load(f)
    
def generate_latex_table_from_dataframe(df: pd.DataFrame, caption: str, label: str, columns: list):

    df = df[~df.index.str.contains("std")]
    std_columns = []
    columns1 = []
    for col in list(df.columns):
        if "std" in col:
            std_columns.append(col)
        else:
            columns1.append(col)

    if not columns:
        columns = columns1

    for col in columns + std_columns:
        df.loc[:, col] = df[col].astype(float).round(4)

    df_bolded = df.astype(str)
    for idx, row in df[columns].iterrows():

        if "rmse" in idx or "mae" in idx:
            bold_value = row.min()
        else:
            bold_value = row.max()

        for col in columns:
            if row[col] == bold_value:
                df_bolded.at[idx, col] = "\\textbf{"+str(row[col])+"}"

    for std_col in std_columns:
        col_name = std_col[:-4]
        formatted_col = df_bolded[col_name].astype(str) + " $ \\pm $ " + df_bolded[std_col].astype(str)
        df_bolded[col_name] = formatted_col

    df_bolded = df_bolded[columns]
    df_bolded = df_bolded.reset_index().rename(columns={'index': 'metric'})

    latex_code = df_bolded.to_latex(
        label=label,
        caption=caption,
        index=False,
        escape=False,  # Prevent escaping special characters
        column_format="c" * len(df.columns)  # Center align columns
    )
    return latex_code

def get_models_metrics(dataset_uris) -> pd.DataFrame:

    models_metrics_dfs = {}
    for path in dataset_uris:
        if "data_splits" in path:
            continue

        setup = read_json(sort_paths_by_datetime(glob.glob(f"{path}/setup-*.json"))[-1])
        model_name = setup['model_name']

        metrics_list = sort_paths_by_datetime(glob.glob(f"{path}/metrics-*.json"))
        metrics_df = pd.DataFrame.from_dict([read_json(metrics_list[-1])[split_name]])

        if model_name in models_metrics_dfs:
            models_metrics_dfs[model_name] = pd.concat([models_metrics_dfs[model_name], metrics_df], axis=0)
        else:
            models_metrics_dfs[model_name] = metrics_df
    return models_metrics_dfs



In [None]:
#group_name = "proposal"
#columns = ['cpgat', 'mf-not-reg', 'gnn', 'gnn-mf']
group_name = "distribution_based"
columns = ['ordrec', 'cpmf', 'cbpmf', 'lbd']

split_name = "test"

datasets_uris = {
  "amazon-beauty": find_subfolders_with_prefix(f"../runs/{group_name}/", "amazon-beauty"),
  "jester-joke": find_subfolders_with_prefix(f"../runs/{group_name}/", "jester-joke"),
  "ml-1m": find_subfolders_with_prefix(f"../runs/{group_name}/", "ml-1m")
}

In [None]:
metrics_ds = {}
for dataset_name in datasets_uris.keys():

    models_metrics_dfs_dict = get_models_metrics(datasets_uris[dataset_name])

    for model_name in models_metrics_dfs_dict.keys():

        mean_metrics_df = models_metrics_dfs_dict[model_name].astype(float).mean()
        mean_metrics_df = mean_metrics_df.to_frame(name=model_name) #index: metrics names, columns: [mean]

        std_metrics_df = models_metrics_dfs_dict[model_name].astype(float).std()
        std_metrics_df = std_metrics_df.to_frame(name=f'{model_name}_std')

        if dataset_name in metrics_ds:
            metrics_ds[dataset_name] = pd.concat([metrics_ds[dataset_name], mean_metrics_df, std_metrics_df], axis=1)
        else:
            metrics_ds[dataset_name] = pd.concat([mean_metrics_df, std_metrics_df], axis=1)


In [None]:

print(generate_latex_table_from_dataframe(metrics_ds['amazon-beauty'],
                                          'Models performance over test split of amazon-beauty dataset.',
                                          "tab:amazon-beauty-ranking", columns))

print(generate_latex_table_from_dataframe(metrics_ds['ml-1m'], 'Models performance over test split of ml-1m dataset.', "tab:ml-1m-ranking", columns))

print(generate_latex_table_from_dataframe(metrics_ds['jester-joke'], 'Metrics of the models in test split of jester-joke.',"tab:jester-joke-ranking", columns))
