In [2]:
import pandas as pd
import numpy as np
import os
import tqdm
import shutil

In [3]:
dataset = 'emma' # onion or emma
results_dir = f'../results/' + dataset + '/performance/'
cutoffs = [1, 5, 10, 20]
metrics = ['Recall', 'Precision', 'nDCG', 'MRR', 'HR', 'EFD', 'EPC', 'ARP', 'PopREO', 'PopRSP', 'ItemCoverage', 'UserCoverage']

In [4]:
# read all files in result folder
files = os.listdir(results_dir)
df = pd.DataFrame()

for cutoff in cutoffs:
    df_cut = pd.DataFrame()
    result_files_cutoff = [f for f in files if f.startswith(f'rec_cutoff_{cutoff}_')]
    for f in result_files_cutoff:
        df_tmp = pd.read_csv(results_dir + f, sep='\t')
        tmp_metrics = [m for m in metrics if m in df_tmp.columns]
        df_tmp.rename(columns={m: f'{m}@{cutoff}' for m in tmp_metrics}, inplace=True)
        df_tmp['model'] = df_tmp['model'].str.split('_').str[0]
        df_tmp['date'] = f[-23:-4]
        df_cut = pd.concat([df_cut, df_tmp], axis=0)

    # keep only newest model results
    df_cut = df_cut.sort_values('date').drop_duplicates(subset='model', keep='last')

    # merge with previous cutoffs
    if not df.empty:
        df = pd.merge(df, df_cut, on=['model', 'date'], how='inner')
        print(df.shape)
    else:
        df = df_cut

df = df[['model', 'date'] + [c for c in df.columns if '@' in c]]
df.sort_values('nDCG@10', inplace=True)
df

(12, 26)
(12, 38)
(12, 50)


Unnamed: 0,model,date,Recall@1,Precision@1,nDCG@1,MRR@1,HR@1,EFD@1,EPC@1,ARP@1,...,nDCG@20,HR@20,EFD@20,EPC@20,ARP@20,PopREO@20,PopRSP@20,MRR@20,ItemCoverage@20,UserCoverage@20
9,AttributeItemKNN,2024_12_12_09_52_08,0.000529,0.001201,0.001201,0.001201,0.001201,0.007378,0.001135,135.205146,...,0.003942,0.019726,0.007221,0.001007,94.807847,0.037349,0.085207,0.003958,238.0,5830.0
4,Random,2024_12_05_08_44_19,0.000826,0.003251,0.003251,0.003251,0.003251,0.022331,0.002928,1547.408065,...,0.008769,0.062558,0.023339,0.003037,1539.072372,0.01501,0.001399,0.011535,283.0,63061.0
10,FM,2024_12_12_10_18_32,0.003491,0.00789,0.00789,0.00789,0.00789,0.036658,0.006676,850.621784,...,0.014363,0.061235,0.020537,0.003432,413.787221,0.963069,0.990243,0.016755,76.0,5830.0
11,DeepFM,2024_12_12_10_27_24,0.003709,0.007547,0.007547,0.007547,0.007547,0.033885,0.006276,990.466552,...,0.015009,0.061235,0.02018,0.003432,449.401166,1.0,0.99999,0.017101,52.0,5830.0
0,NeuMF,2024_12_04_05_13_28,0.005667,0.020663,0.020663,0.020663,0.020663,0.117448,0.01753,8853.810866,...,0.041345,0.239356,0.090926,0.013167,6627.880913,0.886589,0.945522,0.055942,64.0,63061.0
8,BPRMF,2024_12_05_15_23_54,0.010724,0.039343,0.039343,0.039343,0.039343,0.188547,0.028539,17570.489526,...,0.060849,0.301993,0.123947,0.01852,8943.725977,1.0,0.999922,0.087156,77.0,63061.0
5,MostPop,2024_12_05_08_44_19,0.010756,0.040754,0.040754,0.040754,0.040754,0.195213,0.029564,17757.799813,...,0.061848,0.305482,0.128119,0.019148,9114.17736,1.0,0.999993,0.089518,81.0,63061.0
7,BM3,2024_12_05_10_26_28,0.015571,0.053044,0.053044,0.053044,0.053044,0.28563,0.042481,12035.639476,...,0.072335,0.328713,0.158206,0.022648,6338.068649,0.575857,0.89894,0.10754,234.0,63061.0
1,MMGCN,2024_12_04_21_06_23,0.016333,0.057183,0.057183,0.057183,0.057183,0.343249,0.048628,7997.436339,...,0.077299,0.352627,0.179842,0.025096,6119.846592,0.466337,0.883578,0.115309,272.0,63061.0
6,ItemKNN,2024_12_05_08_44_19,0.01837,0.062416,0.062416,0.062416,0.062416,0.353644,0.051174,11453.979226,...,0.081008,0.353182,0.177436,0.025511,7608.82973,0.728495,0.964367,0.122259,270.0,63061.0


In [6]:
# export to comma seperated file
df.to_csv(f'../results/{dataset}_performance.csv', index=False)

In [31]:
for cutoff in cutoffs:
    df_cut = df[['model'] + [c for c in df.columns if f'@{cutoff}' in c]]
    df_cut.to_csv(f'../results/{dataset}_performance_{cutoff}.csv', index=False)