In [3]:
import pandas as pd
import numpy as np
import os
import tqdm
import shutil

In [4]:
dataset = 'emma' # m4a or emma
results_dir = f'../results/' + dataset + '/performance/'
cutoffs = [1, 5, 10, 20]
metrics = ['Recall', 'Precision', 'nDCG', 'MRR', 'HR', 'EFD', 'EPC', 'ARP', 'PopREO', 'PopRSP', 'ItemCoverage', 'UserCoverage']

In [5]:
# read all files in result folder
files = os.listdir(results_dir)
df = pd.DataFrame()

for cutoff in cutoffs:
    df_cut = pd.DataFrame()
    result_files_cutoff = [f for f in files if f.startswith(f'rec_cutoff_{cutoff}_')]
    for f in result_files_cutoff:
        df_tmp = pd.read_csv(results_dir + f, sep='\t')
        tmp_metrics = [m for m in metrics if m in df_tmp.columns]
        df_tmp.rename(columns={m: f'{m}@{cutoff}' for m in tmp_metrics}, inplace=True)
        df_tmp['model'] = df_tmp['model'].str.split('_').str[0]
        df_tmp['date'] = f[-23:-4]
        df_cut = pd.concat([df_cut, df_tmp], axis=0)

    # keep only newest model results
    df_cut = df_cut.sort_values('date').drop_duplicates(subset='model', keep='last')

    # merge with previous cutoffs
    if not df.empty:
        df = pd.merge(df, df_cut, on=['model', 'date'], how='inner')
        print(df.shape)
    else:
        df = df_cut

df = df[['model', 'date'] + [c for c in df.columns if '@' in c]]
df

(6, 26)
(6, 38)
(6, 50)


Unnamed: 0,model,date,Recall@1,Precision@1,nDCG@1,MRR@1,HR@1,EFD@1,EPC@1,ARP@1,...,nDCG@20,HR@20,EFD@20,EPC@20,ARP@20,PopREO@20,PopRSP@20,MRR@20,ItemCoverage@20,UserCoverage@20
0,BPRMF,2024_12_03_20_18_30,0.010724,0.039343,0.039343,0.039343,0.039343,0.188547,0.028539,17570.489526,...,0.060849,0.301993,0.123947,0.01852,8943.725977,1.0,0.999922,0.087156,77.0,63061.0
1,ItemKNN,2024_12_04_01_52_53,0.01837,0.062416,0.062416,,0.062416,0.353644,0.051174,11453.979226,...,0.081008,0.353182,0.177436,0.025511,7608.82973,0.728495,0.964367,,,
2,MostPop,2024_12_04_01_52_53,0.010756,0.040754,0.040754,,0.040754,0.195213,0.029564,17757.799813,...,0.061848,0.305482,0.128119,0.019148,9114.17736,1.0,0.999993,,,
3,Random,2024_12_04_01_52_53,0.000825,0.003378,0.003378,,0.003378,0.023648,0.003072,1532.790917,...,0.008469,0.06083,0.023133,0.002992,1538.221555,0.048127,0.001618,,,
4,NeuMF,2024_12_04_05_13_28,0.005667,0.020663,0.020663,0.020663,0.020663,0.117448,0.01753,8853.810866,...,0.041345,0.239356,0.090926,0.013167,6627.880913,0.886589,0.945522,0.055942,64.0,63061.0
5,BM3,2024_12_04_13_11_14,0.015671,0.052965,0.052965,0.052965,0.052965,0.290987,0.043246,11042.16381,...,0.072386,0.329142,0.159529,0.022735,6032.110917,0.553655,0.899206,0.107506,232.0,63061.0


In [6]:
# export to comma seperated file
df.to_csv(f'../results/{dataset}_performance.csv', index=False)