In [1]:
import pandas as pd
import numpy as np
import os
import tqdm
import shutil

In [6]:
dataset = 'onion' # onion or emma
results_dir = f'../results/' + dataset + '/performance/'
cutoffs = [1, 5, 10, 20]
metrics = ['Recall', 'Precision', 'nDCG', 'MRR', 'HR', 'EFD', 'EPC', 'ARP', 'PopREO', 'PopRSP', 'ItemCoverage', 'UserCoverage']

In [7]:
# read all files in result folder
files = os.listdir(results_dir)
df = pd.DataFrame()

for cutoff in cutoffs:
    df_cut = pd.DataFrame()
    result_files_cutoff = [f for f in files if f.startswith(f'rec_cutoff_{cutoff}_')]
    for f in result_files_cutoff:
        df_tmp = pd.read_csv(results_dir + f, sep='\t')
        tmp_metrics = [m for m in metrics if m in df_tmp.columns]
        df_tmp.rename(columns={m: f'{m}@{cutoff}' for m in tmp_metrics}, inplace=True)
        df_tmp['model'] = df_tmp['model'].str.split('_').str[0]
        df_tmp['date'] = f[-23:-4]
        df_cut = pd.concat([df_cut, df_tmp], axis=0)

    # keep only newest model results
    df_cut = df_cut.sort_values('date').drop_duplicates(subset='model', keep='last')

    # merge with previous cutoffs
    if not df.empty:
        df = pd.merge(df, df_cut, on=['model', 'date'], how='inner')
        print(df.shape)
    else:
        df = df_cut

df = df[['model', 'date'] + [c for c in df.columns if '@' in c]]
df.sort_values('nDCG@10', inplace=True)
df

(13, 26)
(13, 38)
(13, 50)


Unnamed: 0,model,date,Recall@1,Precision@1,nDCG@1,MRR@1,HR@1,EFD@1,EPC@1,ARP@1,...,nDCG@20,MRR@20,HR@20,EFD@20,EPC@20,ARP@20,PopREO@20,PopRSP@20,ItemCoverage@20,UserCoverage@20
3,Random,2024_12_19_21_21_26,2.5e-05,0.000816,0.000816,0.000816,0.000816,0.012156,0.000811,36.048335,...,0.000558,0.001957,0.010087,0.007824,0.000522,35.491432,0.13799,0.000575,48117.0,17151.0
0,AttributeItemKNN,2024_12_19_18_50_19,2.4e-05,0.0007,0.0007,0.0007,0.0007,0.009872,0.000692,32.13626,...,0.000698,0.002327,0.012594,0.009402,0.000634,33.721946,0.080795,0.045619,32167.0,17151.0
6,NeuMF,2024_12_23_14_42_28,0.000806,0.022739,0.022739,0.022739,0.022739,0.243401,0.021372,1045.753659,...,0.018303,0.054086,0.199405,0.177873,0.015356,781.802347,1.0,1.0,103.0,17151.0
2,MostPop,2024_12_19_21_21_26,0.000739,0.021107,0.021107,0.021107,0.021107,0.225369,0.019817,1051.036208,...,0.018753,0.054143,0.207335,0.181183,0.015698,821.33515,1.0,1.0,54.0,17151.0
4,FM,2024_12_20_19_13_08,0.001237,0.023672,0.023672,0.023672,0.023672,0.294375,0.023062,192.002974,...,0.03598,0.083312,0.356073,0.391541,0.030298,228.801154,0.966723,0.943944,15262.0,17151.0
9,BiVAECF,2024_12_26_23_13_01,0.001773,0.044021,0.044021,0.044021,0.044021,0.547302,0.043058,350.189377,...,0.041426,0.105295,0.375955,0.465329,0.036166,328.261396,0.993174,0.999177,3725.0,17151.0
8,DeepFM,2024_12_25_09_00_43,0.004025,0.06023,0.06023,0.06023,0.06023,0.767233,0.058966,270.859017,...,0.057222,0.134543,0.440849,0.593512,0.045096,234.373319,0.836169,0.87263,22064.0,17151.0
5,MMGCN,2024_12_21_20_54_38,0.007638,0.116087,0.116087,0.116087,0.116087,1.519791,0.113956,267.762696,...,0.096928,0.213068,0.567605,1.037661,0.076857,215.839966,0.743169,0.879656,25553.0,17151.0
7,BM3,2024_12_23_16_08_06,0.008724,0.125415,0.125415,0.125415,0.125415,1.600713,0.122989,307.278409,...,0.09602,0.219102,0.556527,0.99238,0.074332,210.04863,0.92669,0.980828,13723.0,17151.0
12,GRCN,2024_12_31_14_14_56,0.00853,0.12903,0.12903,0.12903,0.12903,1.775852,0.12777,153.041805,...,0.107666,0.233442,0.611101,1.198477,0.086047,146.178756,0.801593,0.918991,24798.0,17151.0


In [9]:
# export to comma seperated file
df.to_csv(f'../results/{dataset}_performance.csv', index=False)

In [31]:
for cutoff in cutoffs:
    df_cut = df[['model'] + [c for c in df.columns if f'@{cutoff}' in c]]
    df_cut.to_csv(f'../results/{dataset}_performance_{cutoff}.csv', index=False)