In [1]:
import pandas as pd
import numpy as np
import os
import tqdm
import shutil

In [12]:
dataset = 'session_onion' # onion or emma
results_dir = f'../results/' + dataset + '/performance/'
#cutoffs = [1, 5, 10, 20]
cutoffs = [5]
cutoff_date = '2025_03_25'
metrics = ['Recall', 'Precision', 'nDCG', 'MRR', 'HR', 'EFD', 'EPC', 'ARP', 'PopREO', 'PopRSP', 'ItemCoverage', 'UserCoverage']
#metrics = ['Recall', 'Precision', 'nDCG', 'MRR', 'HR']

In [13]:
def find_modality(string):
    keyword = "modalites="
    start = string.find(keyword)
    if start == -1:
        return None

    start += len(keyword)
    end = string.find("_", start)
    if end == -1:
        end = len(string)

    modality = string[start:end].strip("'")
    return modality

In [14]:
# read all files in result folder
files = os.listdir(results_dir)
df = pd.DataFrame()

for cutoff in cutoffs:
    df_cut = pd.DataFrame()
    result_files_cutoff = [f for f in files if f.startswith(f'rec_cutoff_{cutoff}_')]
    for f in result_files_cutoff:
        df_tmp = pd.read_csv(results_dir + f, sep='\t')
        tmp_metrics = [m for m in metrics if m in df_tmp.columns]
        df_tmp.rename(columns={m: f'{m}@{cutoff}' for m in tmp_metrics}, inplace=True)
        df_tmp['modality'] = find_modality(df_tmp['model'].values[0])
        df_tmp['path'] = df_tmp['model']
        df_tmp['model'] = df_tmp['model'].str.split('_').str[0]
        df_tmp['date'] = f[-23:-4]
        df_cut = pd.concat([df_cut, df_tmp], axis=0)

    # keep only newest model results
    df_cut = df_cut.sort_values('date').drop_duplicates(subset=['model', 'modality'], keep='last')

    # drop models results before cutoff date
    df_cut = df_cut[df_cut['date'] >= cutoff_date]

    # merge with previous cutoffs
    if not df.empty:
        df = pd.merge(df, df_cut, on=['model', 'date', 'modality'], how='inner')
        print(df.shape)
    else:
        df = df_cut

df.sort_values('nDCG@5', inplace=True)
# display columns with @10 only
df[['model', 'modality', 'date', 'path'] + [c for c in df.columns if '@' in c]]
#df[['model', 'modality', 'date'] + ['Recall@1', 'Recall@5', 'Recall@10', 'nDCG@1', 'nDCG@5', 'nDCG@10']]

Unnamed: 0,model,modality,date,path,Recall@5,Precision@5,nDCG@5,MRR@5,HR@5,EFD@5,EPC@5,ARP@5,PopREO@5,PopRSP@5,ItemCoverage@5,UserCoverage@5
0,LightGCNM,audio'-'textual'-'visual,2025_04_15_09_42_44,LightGCNM_seed=123_e=200_bs=128_lr=0$001_facto...,0.186049,0.056677,0.156791,0.181844,0.257279,0.941197,0.072792,65.569067,0.251166,0.798914,13152.0,62819.0
0,LightGCNM,audio'-'textual'-'visual'-'emotion,2025_04_15_09_42_41,LightGCNM_seed=123_e=200_bs=128_lr=0$001_facto...,0.18618,0.056664,0.157193,0.182537,0.257549,0.943416,0.0729,62.312046,0.247074,0.800811,13452.0,62819.0
0,FeatureItemKNN,audio'-'textual'-'visual'-'emotion,2025_04_15_09_43_04,FeatureItemKNN_nn=70_sim=cosine_msf=0$2_bin=Fa...,0.193102,0.059406,0.163643,0.191544,0.267658,1.016845,0.076796,34.928464,0.102487,0.472422,16499.0,62819.0
0,FeatureItemKNN,audio'-'textual'-'visual,2025_04_15_09_43_07,FeatureItemKNN_nn=20_sim=cosine_msf=0$3_bin=Fa...,0.19379,0.059597,0.164079,0.19169,0.268072,1.020292,0.076949,33.905911,0.092566,0.432537,16620.0,62819.0
0,LightGCNM,emotion,2025_04_15_09_43_32,LightGCNM_seed=123_e=200_bs=128_lr=0$001_facto...,0.383097,0.121236,0.296644,0.29109,0.438863,1.796751,0.139653,66.243771,0.339585,0.826639,14459.0,68379.0
0,LightGCN,,2025_04_15_09_43_12,LightGCN_seed=123_e=200_bs=128_lr=0$001_factor...,0.388486,0.122865,0.300647,0.29412,0.444537,1.821277,0.141387,73.832244,0.311187,0.841498,14594.0,68379.0
0,FeatureItemKNN,emotion,2025_04_15_09_43_21,FeatureItemKNN_nn=20_sim=dot_msf=0$3_bin=False...,0.399666,0.127688,0.311816,0.303297,0.447711,1.915567,0.14785,56.631875,0.254536,0.709633,16854.0,68379.0
0,ItemKNN,,2025_04_15_09_43_44,ItemKNN_nn=20_sim=dot_imp=standard_bin=False_s...,0.400555,0.127893,0.312604,0.304038,0.448383,1.920175,0.148158,56.52725,0.250617,0.699215,17270.0,68379.0


In [11]:
def shell_safe_string(s):
    """Escape a string to make it safe for use in shell scripts."""
    # Replace single quotes with '\'' (close quote, escaped quote, open quote)
    escaped = s.replace("'", "'\\''")
    # Wrap the entire string in single quotes for best protection
    return f"'{escaped}'"

for model_path in df['path'].values:
    print(shell_safe_string(model_path + '.tsv'))

'LightGCNM_seed=123_e=200_bs=128_lr=0$001_factors=64_l_w=1e-05_modalites='\''emotion'\''_n_layers=2_normalize=True_aggregation=concat_loads='\''EmotionAttribute'\''.tsv'
'LightGCNM_seed=123_e=200_bs=128_lr=0$001_factors=64_l_w=1e-05_modalites='\''audio'\''-'\''textual'\''-'\''visual'\''_n_layers=2_normalize=True_aggregation=concat_loads='\''AudioAttribute'\''-'\''TextualAttribute'\''-'\''VisualAttribute'\''.tsv'
'LightGCNM_seed=123_e=200_bs=128_lr=0$001_factors=64_l_w=1e-05_modalites='\''audio'\''-'\''textual'\''-'\''visual'\''-'\''emotion'\''_n_layers=2_normalize=True_aggregation=concat_loads='\''AudioAttribute'\''-'\''TextualAttribute'\''-'\''VisualAttribute'\''-'\''EmotionAttribute'\''.tsv'
'LightGCN_seed=123_e=200_bs=128_lr=0$0005_factors=64_l_w=1e-05_n_layers=3_normalize=True.tsv'
'FeatureItemKNN_nn=100_sim=cosine_msf=0$01_bin=False_modalites='\''emotion'\''_aggregation=ensemble_loads='\''EmotionAttribute'\''.tsv'
'FeatureItemKNN_nn=100_sim=cosine_msf=0$05_bin=False_modalites='\''

In [15]:
# export to comma seperated file
df_export = df.copy()
float_columns = df_export.select_dtypes(include=['float']).columns
df_export[float_columns] = df_export[float_columns].round(4)
df_export[['model', 'modality'] + [c for c in df.columns if '@' in c]].to_csv(f'../results/{dataset}_performance.csv', index=False)

In [16]:
for cutoff in cutoffs:
    df_cut = df[['model'] + [c for c in df.columns if f'@{cutoff}' in c]]
    df_cut.to_csv(f'../results/{dataset}_performance_{cutoff}.csv', index=False)