In [3]:
import pandas as pd
import numpy as np
import os
import tqdm
import shutil

In [8]:
dataset = 'emma' # onion or emma
results_dir = f'../results/' + dataset + '/performance/'
cutoffs = [1, 5, 10, 20]
metrics = ['Recall', 'Precision', 'nDCG', 'MRR', 'HR', 'EFD', 'EPC', 'ARP', 'PopREO', 'PopRSP', 'ItemCoverage', 'UserCoverage']
#metrics = ['Recall', 'Precision', 'nDCG', 'MRR', 'HR']

In [16]:
def find_modality(string):
    keyword = "modalites="
    start = string.find(keyword)
    if start == -1:
        return None

    start += len(keyword)
    end = string.find("_", start)
    if end == -1:
        end = len(string)

    modality = string[start:end].strip("'")
    return modality

In [25]:
# read all files in result folder
files = os.listdir(results_dir)
df = pd.DataFrame()

for cutoff in cutoffs:
    df_cut = pd.DataFrame()
    result_files_cutoff = [f for f in files if f.startswith(f'rec_cutoff_{cutoff}_')]
    for f in result_files_cutoff:
        df_tmp = pd.read_csv(results_dir + f, sep='\t')
        tmp_metrics = [m for m in metrics if m in df_tmp.columns]
        df_tmp.rename(columns={m: f'{m}@{cutoff}' for m in tmp_metrics}, inplace=True)
        df_tmp['modality'] = find_modality(df_tmp['model'].values[0])
        df_tmp['model'] = df_tmp['model'].str.split('_').str[0]
        df_tmp['date'] = f[-23:-4]
        df_cut = pd.concat([df_cut, df_tmp], axis=0)

    # keep only newest model results
    df_cut = df_cut.sort_values('date').drop_duplicates(subset='model', keep='last')

    # merge with previous cutoffs
    if not df.empty:
        df = pd.merge(df, df_cut, on=['model', 'date', 'modality'], how='inner')
        print(df.shape)
    else:
        df = df_cut

df = df[['model', 'modality', 'date'] + [c for c in df.columns if '@' in c]]
df.sort_values('nDCG@10', inplace=True)
# display columns with @10 only
df[['model', 'modality', 'date'] + [c for c in df.columns if '@10' in c]]

(20, 27)
(20, 39)
(20, 51)


Unnamed: 0,model,modality,date,Recall@10,Precision@10,nDCG@10,MRR@10,HR@10,EFD@10,EPC@10,ARP@10,PopREO@10,PopRSP@10,ItemCoverage@10,UserCoverage@10
5,Random,,2024_12_22_18_48_47,0.05687,0.01123,0.030133,0.030727,0.10665,0.073342,0.010221,40.30389,0.033423,0.000266,179.0,1594.0
0,AttributeItemKNN,,2024_12_19_14_16_39,0.083841,0.016499,0.051222,0.056101,0.151819,0.117691,0.016933,50.066688,0.095469,0.213448,174.0,1594.0
7,NeuMF,,2024_12_22_19_32_46,0.185293,0.03394,0.107198,0.112744,0.309912,0.202549,0.032672,127.289084,0.816847,0.850359,19.0,1594.0
1,FM,,2024_12_19_14_18_52,0.292749,0.057277,0.180118,0.189197,0.476788,0.326818,0.054832,198.062798,1.0,0.999534,28.0,1594.0
2,DeepFM,,2024_12_19_14_28_18,0.294494,0.057654,0.184197,0.193952,0.472396,0.331193,0.055642,199.645169,1.0,1.0,28.0,1594.0
13,GRCN,,2024_12_23_14_57_04,0.296256,0.059285,0.184243,0.192857,0.466123,0.392073,0.059886,114.680928,0.434161,0.787792,179.0,1594.0
8,MultiVAE,,2024_12_22_21_30_00,0.297946,0.060163,0.186641,0.199826,0.481179,0.379135,0.060551,141.940464,0.960999,0.980218,155.0,1594.0
4,MostPop,,2024_12_22_18_48_47,0.301127,0.059473,0.188548,0.201581,0.489962,0.341135,0.057485,204.656462,1.0,1.0,26.0,1594.0
10,BiVAECF,,2024_12_22_22_06_09,0.319294,0.061731,0.195243,0.199832,0.496236,0.371596,0.060397,162.223965,0.984661,0.987725,69.0,1594.0
15,MMGCN,,2024_12_23_15_20_02,0.315005,0.062233,0.196086,0.207447,0.499373,0.390283,0.062283,151.326976,0.70657,0.955615,81.0,1594.0


In [9]:
# export to comma seperated file
df.to_csv(f'../results/{dataset}_performance.csv', index=False)

In [31]:
for cutoff in cutoffs:
    df_cut = df[['model'] + [c for c in df.columns if f'@{cutoff}' in c]]
    df_cut.to_csv(f'../results/{dataset}_performance_{cutoff}.csv', index=False)