In [1]:
import pandas as pd
import numpy as np
import os
import tqdm
import shutil

In [47]:
dataset = 'emma' # onion or emma
results_dir = f'../results/' + dataset + '/performance/'
#cutoffs = [1, 5, 10, 20]
cutoffs = [5]
cutoff_date = '2025_03_20'
metrics = ['Recall', 'Precision', 'nDCG', 'MRR', 'HR', 'EFD', 'EPC', 'ARP', 'PopREO', 'PopRSP', 'ItemCoverage', 'UserCoverage']
#metrics = ['Recall', 'Precision', 'nDCG', 'MRR', 'HR']

In [48]:
def find_modality(string):
    keyword = "modalites="
    start = string.find(keyword)
    if start == -1:
        return None

    start += len(keyword)
    end = string.find("_", start)
    if end == -1:
        end = len(string)

    modality = string[start:end].strip("'")
    return modality

In [49]:
# read all files in result folder
files = os.listdir(results_dir)
df = pd.DataFrame()

for cutoff in cutoffs:
    df_cut = pd.DataFrame()
    result_files_cutoff = [f for f in files if f.startswith(f'rec_cutoff_{cutoff}_')]
    for f in result_files_cutoff:
        df_tmp = pd.read_csv(results_dir + f, sep='\t')
        tmp_metrics = [m for m in metrics if m in df_tmp.columns]
        df_tmp.rename(columns={m: f'{m}@{cutoff}' for m in tmp_metrics}, inplace=True)
        df_tmp['modality'] = find_modality(df_tmp['model'].values[0])
        df_tmp['path'] = df_tmp['model']
        df_tmp['model'] = df_tmp['model'].str.split('_').str[0]
        df_tmp['date'] = f[-23:-4]
        df_cut = pd.concat([df_cut, df_tmp], axis=0)

    # keep only newest model results
    df_cut = df_cut.sort_values('date').drop_duplicates(subset=['model', 'modality'], keep='last')

    # drop models results before cutoff date
    df_cut = df_cut[df_cut['date'] >= cutoff_date]

    # merge with previous cutoffs
    if not df.empty:
        df = pd.merge(df, df_cut, on=['model', 'date', 'modality'], how='inner')
        print(df.shape)
    else:
        df = df_cut

df.sort_values('nDCG@5', inplace=True)
# display columns with @10 only
df[['model', 'modality', 'date', 'path'] + [c for c in df.columns if '@' in c]]
#df[['model', 'modality', 'date'] + ['Recall@1', 'Recall@5', 'Recall@10', 'nDCG@1', 'nDCG@5', 'nDCG@10']]

Unnamed: 0,model,modality,date,path,Recall@5,Precision@5,nDCG@5,MRR@5,HR@5,EFD@5,EPC@5,ARP@5,PopREO@5,PopRSP@5,ItemCoverage@5,UserCoverage@5
0,RerankerLightGCN,,2025_03_27_09_46_17,RerankerLightGCN_cosine_5_emotion,0.298826,0.110964,0.202942,0.214288,0.461864,0.481007,0.083088,705.811706,0.918588,0.989939,65.0,3776.0
0,RerankerItemKNN,,2025_03_27_09_44_12,RerankerItemKNN_cosine_5_emotion,0.369804,0.136176,0.263704,0.288873,0.556939,0.676527,0.112266,595.153867,0.738335,0.969334,153.0,3776.0
0,LightGCNM,emotion,2025_03_27_09_02_20,LightGCNM_seed=123_e=200_bs=128_lr=0$0005_fact...,0.337636,0.124417,0.275314,0.326028,0.503972,0.659074,0.112551,627.076801,0.864993,0.985631,78.0,3776.0
0,LightGCN,,2025_03_25_15_09_16,LightGCN_seed=123_e=200_bs=128_lr=0$0005_facto...,0.356637,0.131727,0.289113,0.339773,0.529661,0.726921,0.121405,566.985646,0.76502,0.973364,130.0,3776.0
0,LightGCNM,audio'-'textual'-'visual,2025_03_26_17_01_40,LightGCNM_seed=123_e=200_bs=128_lr=0$0005_fact...,0.358919,0.132203,0.289294,0.338533,0.534958,,,,,,,
0,LightGCNM,audio'-'textual'-'visual'-'emotion,2025_03_26_17_17_14,LightGCNM_seed=123_e=200_bs=128_lr=0$001_facto...,0.355832,0.130667,0.289309,0.340356,0.529926,,,,,,,
0,ItemKNN,,2025_03_25_13_29_56,ItemKNN_nn=50_sim=cosine_imp=standard_bin=Fals...,0.369804,0.136176,0.302827,0.358148,0.556939,0.75556,0.126132,595.153867,0.738335,0.969334,153.0,3776.0
0,FeatureItemKNN,emotion,2025_03_25_13_29_34,FeatureItemKNN_nn=50_sim=cosine_msf=0$01_bin=F...,0.374032,0.1375,0.303772,0.356669,0.558263,0.758201,0.126557,592.976324,0.72967,0.967141,155.0,3776.0
0,FeatureItemKNN,audio'-'textual'-'visual,2025_03_25_13_29_12,FeatureItemKNN_nn=70_sim=cosine_msf=0$01_bin=F...,0.375215,0.137553,0.30747,0.363436,0.559057,0.761916,0.127434,606.391419,0.746195,0.972911,149.0,3776.0
0,FeatureItemKNN,audio'-'textual'-'visual'-'emotion,2025_03_25_13_30_21,FeatureItemKNN_nn=70_sim=cosine_msf=0$01_bin=F...,0.375215,0.137553,0.30747,0.363436,0.559057,0.761916,0.127434,606.391419,0.746195,0.972911,149.0,3776.0


In [9]:
for model_path in df['path'].values:
    print(model_path)

FeatureItemKNN_nn=20_sim=cosine_msf=0$15_bin=False_modalites='audio'-'textual'-'visual'_aggregation=ensemble_loads='AudioAttribute'-'TextualAttribute'-'VisualAttribute'
FeatureItemKNN_nn=20_sim=cosine_msf=0$15_bin=False_modalites='audio'-'textual'-'visual'-'emotion'_aggregation=concat_loads='AudioAttribute'-'TextualAttribute'-'VisualAttribute'-'EmotionAttribute'
LightGCN_seed=123_e=200_bs=128_lr=0$0005_factors=64_l_w=1e-05_n_layers=3_normalize=True
FeatureItemKNN_nn=20_sim=cosine_msf=0$01_bin=False_modalites='emotion'_aggregation=concat_loads='EmotionAttribute'
ItemKNN_nn=20_sim=cosine_imp=standard_bin=False_shrink=0_norm=True_asymalpha=_tvalpha=_tvbeta=_rweights=


In [13]:
# export to comma seperated file
df.to_csv(f'../results/{dataset}_performance.csv', index=False)

In [16]:
for cutoff in cutoffs:
    df_cut = df[['model'] + [c for c in df.columns if f'@{cutoff}' in c]]
    df_cut.to_csv(f'../results/{dataset}_performance_{cutoff}.csv', index=False)