In [1]:
import pandas as pd
import numpy as np
import os
import tqdm
import shutil

In [2]:
dataset = 'onion' # onion or emma
results_dir = f'../results/' + dataset + '/performance/'
cutoffs = [1, 5, 10, 20]
metrics = ['Recall', 'Precision', 'nDCG', 'MRR', 'HR', 'EFD', 'EPC', 'ARP', 'PopREO', 'PopRSP', 'ItemCoverage', 'UserCoverage']
#metrics = ['Recall', 'Precision', 'nDCG', 'MRR', 'HR']

In [3]:
def find_modality(string):
    keyword = "modalites="
    start = string.find(keyword)
    if start == -1:
        return None

    start += len(keyword)
    end = string.find("_", start)
    if end == -1:
        end = len(string)

    modality = string[start:end].strip("'")
    return modality

In [4]:
# read all files in result folder
files = os.listdir(results_dir)
df = pd.DataFrame()

for cutoff in cutoffs:
    df_cut = pd.DataFrame()
    result_files_cutoff = [f for f in files if f.startswith(f'rec_cutoff_{cutoff}_')]
    for f in result_files_cutoff:
        df_tmp = pd.read_csv(results_dir + f, sep='\t')
        tmp_metrics = [m for m in metrics if m in df_tmp.columns]
        df_tmp.rename(columns={m: f'{m}@{cutoff}' for m in tmp_metrics}, inplace=True)
        df_tmp['modality'] = find_modality(df_tmp['model'].values[0])
        df_tmp['model'] = df_tmp['model'].str.split('_').str[0]
        df_tmp['date'] = f[-23:-4]
        df_cut = pd.concat([df_cut, df_tmp], axis=0)

    # keep only newest model results
    df_cut = df_cut.sort_values('date').drop_duplicates(subset=['model', 'modality'], keep='last')

    # merge with previous cutoffs
    if not df.empty:
        df = pd.merge(df, df_cut, on=['model', 'date', 'modality'], how='inner')
        print(df.shape)
    else:
        df = df_cut

df = df[['model', 'modality', 'date'] + [c for c in df.columns if '@' in c]]
df.sort_values('nDCG@5', inplace=True)
# display columns with @10 only
#df[['model', 'modality', 'date'] + [c for c in df.columns if '@1' in c]]
df[['model', 'modality', 'date'] + ['Recall@1', 'Recall@5', 'Recall@10', 'nDCG@1', 'nDCG@5', 'nDCG@10']]

(55, 13)
(55, 18)
(55, 23)


Unnamed: 0,model,modality,date,Recall@1,Recall@5,Recall@10,nDCG@1,nDCG@5,nDCG@10
40,LATTICE,textual'-'emotion,2025_02_05_18_45_41,0.0,0.000227,0.000582,0.0,0.000159,0.000331
18,LATTICE,textual,2025_01_15_00_39_49,0.0,0.000227,0.000582,0.0,0.000159,0.000331
3,Random,,2025_01_14_16_21_46,0.000128,0.000409,0.000811,0.000293,0.000379,0.000546
50,BiVAECFM,audio'-'emotion,2025_02_16_04_39_06,0.00353,0.008013,0.013081,0.010415,0.008621,0.010406
32,BiVAECFM,audio,2025_01_15_11_25_11,0.003553,0.008731,0.013279,0.010488,0.00931,0.010757
33,BiVAECFM,textual,2025_01_15_11_56_46,0.002917,0.011437,0.017016,0.009755,0.010793,0.012841
30,BiVAECFM,visual,2025_01_15_10_58_45,0.004085,0.010451,0.016258,0.01192,0.010928,0.012975
2,MostPop,,2025_01_14_16_21_46,0.003608,0.011235,0.019006,0.010782,0.011125,0.013836
19,NeuMF,,2025_01_15_01_27_09,0.003813,0.012371,0.018402,0.011222,0.011878,0.013911
35,BiVAECFM,emotion,2025_02_04_10_50_50,0.004451,0.013108,0.022818,0.013349,0.013458,0.017165


In [13]:
# export to comma seperated file
df.to_csv(f'../results/{dataset}_performance.csv', index=False)

In [14]:
for cutoff in cutoffs:
    df_cut = df[['model'] + [c for c in df.columns if f'@{cutoff}' in c]]
    df_cut.to_csv(f'../results/{dataset}_performance_{cutoff}.csv', index=False)