In [10]:
import pandas as pd
import numpy as np
import os
import tqdm
import shutil

In [11]:
dataset = 'emma' # onion or emma
results_dir = f'../results/' + dataset + '/performance/'
cutoffs = [1, 5, 10, 20]
metrics = ['Recall', 'Precision', 'nDCG', 'MRR', 'HR', 'EFD', 'EPC', 'ARP', 'PopREO', 'PopRSP', 'ItemCoverage', 'UserCoverage']

In [13]:
# read all files in result folder
files = os.listdir(results_dir)
df = pd.DataFrame()

for cutoff in cutoffs:
    df_cut = pd.DataFrame()
    result_files_cutoff = [f for f in files if f.startswith(f'rec_cutoff_{cutoff}_')]
    for f in result_files_cutoff:
        df_tmp = pd.read_csv(results_dir + f, sep='\t')
        tmp_metrics = [m for m in metrics if m in df_tmp.columns]
        df_tmp.rename(columns={m: f'{m}@{cutoff}' for m in tmp_metrics}, inplace=True)
        df_tmp['model'] = df_tmp['model'].str.split('_').str[0]
        df_tmp['date'] = f[-23:-4]
        df_cut = pd.concat([df_cut, df_tmp], axis=0)

    # keep only newest model results
    df_cut = df_cut.sort_values('date').drop_duplicates(subset='model', keep='last')

    # merge with previous cutoffs
    if not df.empty:
        df = pd.merge(df, df_cut, on=['model', 'date'], how='inner')
        print(df.shape)
    else:
        df = df_cut

df = df[['model', 'date'] + [c for c in df.columns if '@' in c]]
df.sort_values('nDCG@10', inplace=True)
df

(17, 26)
(17, 38)
(17, 50)


Unnamed: 0,model,date,Recall@1,Precision@1,nDCG@1,MRR@1,HR@1,EFD@1,EPC@1,ARP@1,...,nDCG@20,MRR@20,HR@20,EFD@20,EPC@20,ARP@20,PopREO@20,PopRSP@20,ItemCoverage@20,UserCoverage@20
4,Random,2024_12_12_19_06_31,0.000257,0.000343,0.000343,0.000343,0.000343,0.001819,0.000305,88.735849,...,0.002126,0.001712,0.010806,0.003543,0.000497,86.471072,0.006886,0.004202,243.0,5830.0
16,FeatureItemKNN,2024_12_18_10_06_21,0.000343,0.000515,0.000515,0.000515,0.000515,0.003531,0.000498,93.472384,...,0.003476,0.003281,0.017839,0.006382,0.000874,97.048971,0.005163,0.093612,243.0,5830.0
15,AttributeItemKNN,2024_12_18_10_01_33,0.000343,0.000515,0.000515,0.000515,0.000515,0.003531,0.000498,93.472384,...,0.003476,0.003281,0.017839,0.006382,0.000874,97.048971,0.005163,0.093612,243.0,5830.0
6,NeuMF,2024_12_12_21_13_23,0.002497,0.005489,0.005489,0.005489,0.005489,0.027342,0.004827,665.116123,...,0.011886,0.01258,0.053002,0.016699,0.002753,371.37958,0.670937,0.872638,41.0,5830.0
0,FM,2024_12_12_10_18_32,0.003491,0.00789,0.00789,0.00789,0.00789,0.036658,0.006676,850.621784,...,0.014363,0.016755,0.061235,0.020537,0.003432,413.787221,0.963069,0.990243,76.0,5830.0
5,BPRMF,2024_12_12_19_34_09,0.003571,0.007204,0.007204,0.007204,0.007204,0.032551,0.006004,996.799485,...,0.014884,0.016677,0.061407,0.019862,0.003383,453.513491,1.0,0.99999,53.0,5830.0
3,MostPop,2024_12_12_19_06_31,0.003464,0.007033,0.007033,0.007033,0.007033,0.031659,0.005849,998.42693,...,0.015,0.01687,0.064837,0.020572,0.003506,468.675266,1.0,1.0,53.0,5830.0
1,DeepFM,2024_12_12_10_27_24,0.003709,0.007547,0.007547,0.007547,0.007547,0.033885,0.006276,990.466552,...,0.015009,0.017101,0.061235,0.02018,0.003432,449.401166,1.0,0.99999,52.0,5830.0
13,MMGCN,2024_12_14_22_54_22,0.003629,0.008233,0.008233,0.008233,0.008233,0.043557,0.007305,481.624357,...,0.016582,0.01892,0.07187,0.026481,0.004112,305.387864,0.518667,0.836373,206.0,5830.0
7,MultiVAE,2024_12_13_03_27_55,0.00508,0.011149,0.011149,0.011149,0.011149,0.062546,0.010137,442.090051,...,0.016906,0.021236,0.065523,0.026654,0.004177,300.894451,0.706261,0.916863,243.0,5830.0


In [9]:
# export to comma seperated file
df.to_csv(f'../results/{dataset}_performance.csv', index=False)

In [31]:
for cutoff in cutoffs:
    df_cut = df[['model'] + [c for c in df.columns if f'@{cutoff}' in c]]
    df_cut.to_csv(f'../results/{dataset}_performance_{cutoff}.csv', index=False)