In [3]:
import pandas as pd
import numpy as np
import os
import tqdm
import shutil

In [7]:
dataset = 'm4a_sample' # m4a or emma
results_dir = f'../results/' + dataset + '/performance/'
cutoffs = [1, 5, 10, 20]
metrics = ['Recall', 'Precision', 'nDCG', 'MRR', 'HR', 'EFD', 'EPC', 'ARP', 'PopREO', 'PopRSP', 'ItemCoverage', 'UserCoverage']

In [8]:
# read all files in result folder
files = os.listdir(results_dir)
df = pd.DataFrame()

for cutoff in cutoffs:
    df_cut = pd.DataFrame()
    result_files_cutoff = [f for f in files if f.startswith(f'rec_cutoff_{cutoff}_')]
    for f in result_files_cutoff:
        df_tmp = pd.read_csv(results_dir + f, sep='\t')
        tmp_metrics = [m for m in metrics if m in df_tmp.columns]
        df_tmp.rename(columns={m: f'{m}@{cutoff}' for m in tmp_metrics}, inplace=True)
        df_tmp['model'] = df_tmp['model'].str.split('_').str[0]
        df_tmp['date'] = f[-23:-4]
        df_cut = pd.concat([df_cut, df_tmp], axis=0)

    # keep only newest model results
    df_cut = df_cut.sort_values('date').drop_duplicates(subset='model', keep='last')

    # merge with previous cutoffs
    if not df.empty:
        df = pd.merge(df, df_cut, on=['model', 'date'], how='inner')
        print(df.shape)
    else:
        df = df_cut

df = df[['model', 'date'] + [c for c in df.columns if '@' in c]]
df.sort_values('nDCG@10', inplace=True)
df

(12, 26)
(12, 38)
(12, 50)


Unnamed: 0,model,date,Recall@1,Precision@1,nDCG@1,MRR@1,HR@1,EFD@1,EPC@1,ARP@1,...,nDCG@20,MRR@20,HR@20,EFD@20,EPC@20,ARP@20,PopREO@20,PopRSP@20,ItemCoverage@20,UserCoverage@20
7,Random,2024_12_04_20_56_16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.146758,...,0.001229,0.003748,0.027304,0.016917,0.001202,4.632423,0.430312,0.007231,4741.0,293.0
0,BPRMF,2024_12_04_19_24_12,0.000234,0.006826,0.006826,0.006826,0.006826,0.086158,0.006593,13.242321,...,0.005123,0.017786,0.085324,0.060892,0.004721,14.438396,0.676263,0.74912,1104.0,293.0
3,NeuMF,2024_12_04_20_11_29,2.8e-05,0.003413,0.003413,0.003413,0.003413,0.036637,0.002982,35.460751,...,0.010233,0.029993,0.16041,0.113312,0.009182,27.604949,1.0,1.0,38.0,293.0
2,BM3,2024_12_04_19_55_07,0.000221,0.027304,0.027304,0.027304,0.027304,0.375053,0.026733,4.535836,...,0.015847,0.056092,0.228669,0.214056,0.015335,4.533276,0.128336,0.182547,2298.0,293.0
6,MostPop,2024_12_04_20_56_16,0.000112,0.017065,0.017065,0.017065,0.017065,0.178427,0.014444,46.440273,...,0.016828,0.058474,0.228669,0.180394,0.014658,34.901365,1.0,1.0,47.0,293.0
9,MMGCN,2024_12_04_21_00_24,0.000121,0.013652,0.013652,0.013652,0.013652,0.156036,0.012522,18.392491,...,0.022999,0.062484,0.327645,0.272178,0.021236,14.845222,0.825104,0.888649,2065.0,293.0
11,MultiVAE,2024_12_04_22_48_56,0.001827,0.05802,0.05802,0.05802,0.05802,0.690827,0.05427,17.703072,...,0.039058,0.125107,0.430034,0.452381,0.035403,14.108362,1.0,0.956188,2428.0,293.0
1,LightGCN,2024_12_04_19_46_14,0.000639,0.051195,0.051195,0.051195,0.051195,0.592924,0.04692,21.320819,...,0.042103,0.123178,0.457338,0.49056,0.038387,16.630205,0.933793,0.87946,1578.0,293.0
8,GRCN,2024_12_04_20_57_22,0.000811,0.051195,0.051195,0.051195,0.051195,0.624827,0.048562,11.853242,...,0.045234,0.132408,0.484642,0.558508,0.042682,10.438908,0.827243,0.811967,2760.0,293.0
4,FREEDOM,2024_12_04_20_31_40,0.000324,0.037543,0.037543,0.037543,0.037543,0.460015,0.035481,15.83959,...,0.04707,0.139424,0.508532,0.55635,0.042787,13.024915,0.803165,0.849989,2743.0,293.0


In [6]:
# export to comma seperated file
df.to_csv(f'../results/{dataset}_performance.csv', index=False)

In [31]:
for cutoff in cutoffs:
    df_cut = df[['model'] + [c for c in df.columns if f'@{cutoff}' in c]]
    df_cut.to_csv(f'../results/{dataset}_performance_{cutoff}.csv', index=False)