In [1]:
import os
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import json

import matplotlib as mpl
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from matplotlib.lines import Line2D
from matplotlib.legend_handler import HandlerTuple
from matplotlib import gridspec
import seaborn as sns

from scipy.stats import wilcoxon
#check if 1.7 (old statistic) or 1.11 (new statistic)
import scipy
print(scipy.__version__)

1.11.1


In [None]:
#compile HEAR eval results from json
path = '/home/maellef/Results/finefriends/group_model/HEAReval'
pair_task_test = '/home/maellef/git/cNeuromod_encoding_2020/benchmark/HEAR-EVAL/task-test.csv'

task_test_df = pd.read_csv(pair_task_test, sep=',')
group_metrics_df = pd.DataFrame()

for dir in os.listdir(path):
    atlas = 'STG' if 'auditory_Voxels' in dir else 'wholebrain'
    sub_dict = {
        'model':'group',
        'finetune':'conv4',
        'subject':dir[:6],
        'atlas':atlas
               }
    
    model_path = os.path.join(path, dir, 'soundnetbrain_hear')
    for i, benchmark in enumerate(os.listdir(model_path)):
        test = task_test_df['test'].loc[task_test_df['task']==benchmark].values[0]
        benchmark_score_path = os.path.join(model_path, benchmark, 'test.predicted-scores.json')
        with open(benchmark_score_path, 'r') as file:
            data = json.load(file)
            if 'test' in data.keys():
                scores = data['test']
            elif "aggregated_scores" in data.keys():
                scores = data['aggregated_scores']
            selected_score = scores[test]
            test_name = f'{benchmark}_{test}'
            sub_dict[test_name] = selected_score

    sub_df = pd.DataFrame([sub_dict])
    group_metrics_df = pd.concat([group_metrics_df, sub_df], ignore_index=True)

group_metrics_df.to_csv('/home/maellef/git/cNeuromod_encoding_2020/benchmark/HEAR-EVAL/metrics_group.csv', sep=',')
print(group_metrics_df.columns)

In [2]:
#paths + global variables
subs = ['sub-01','sub-02', 'sub-03','sub-04', 'sub-05', 'sub-06']
ft_models = ['conv4', 'conv5', 'conv6', 'conv7', 'no']
scales = ['wholebrain', 'STG']
baseline = 'no'
path_to_repo = '/home/maellef/git' #'/home/maelle/GitHub_repositories'

csv_path_sub236 = path_to_repo + '/cNeuromod_encoding_2020/benchmark/HEAR-EVAL/metrics.csv'
csv_path_sub145 = path_to_repo + '/cNeuromod_encoding_2020/benchmark/HEAR-EVAL/metrics_145.csv'
csv_path_group = path_to_repo + '/cNeuromod_encoding_2020/benchmark/HEAR-EVAL/metrics_group.csv'
csv_path_leaderboard = path_to_repo + '/cNeuromod_encoding_2020/benchmark/HEAR-EVAL/leaderboard.csv'

In [3]:
#merge & format all models results from HEAREVAL
eq = {
    'model':'Model',
    'beehive_states_fold0-v2-full_test_aucroc':'Beehive',
    'beehive_states_fold1-v2-full_test_aucroc':'Beehive_fold1',
    'beijing_opera-v1.0-hear2021-full_test_top1_acc_mean':'Beijing Opera',
    'beijing_opera-v1.0-hear2021-full_test_top1_acc_std':'Beijing Opera_std',
    'tfds_crema_d-1.0.0-full_test_top1_acc_mean':'CREMA-D',
    'tfds_crema_d-1.0.0-full_test_top1_acc_std':'CREMA-D_std',
    'dcase2016_task2-hear2021-full_test_event_onset_200ms_fms':'DCASE 2016',
    'esc50-v2.0.0-full_test_top1_acc_mean':'ESC-50',
    'esc50-v2.0.0-full_test_top1_acc_std':'ESC-50_std',
    'fsd50k-v1.0-full_test_mAP':'FSD50K',
    'tfds_gtzan-1.0.0-full_test_top1_acc_mean':'GTZAN Genre',
    'tfds_gtzan-1.0.0-full_test_top1_acc_std' : 'GTZAN Genre_std',    
    'tfds_gtzan_music_speech-1.0.0-full_test_top1_acc_mean':'GTZAN Music/Speech',
    'tfds_gtzan_music_speech-1.0.0-full_test_top1_acc_std' :'GTZAN Music/Speech_std',
    'gunshot_triangulation-v1.0-full_test_top1_acc_mean':'Gunshot',
    'gunshot_triangulation-v1.0-full_test_top1_acc_std':'Gunshot_std',
    'libricount-v1.0.0-hear2021-full_test_top1_acc_mean':'Libricount',
    'libricount-v1.0.0-hear2021-full_test_top1_acc_std':'Libricount_std',
    'maestro-v3.0.0-5h_test_event_onset_50ms_fms_mean':'Maestro 5h',
    'maestro-v3.0.0-5h_test_event_onset_50ms_fms_std':'Maestro 5h_std',
    'mridangam_stroke-v1.5-full_test_top1_acc_mean':'Mridangam Stroke',
    'mridangam_stroke-v1.5-full_test_top1_acc_std':'Mridangam Stroke_std',
    'mridangam_tonic-v1.5-full_test_top1_acc_mean':'Mridangam Tonic',
    'mridangam_tonic-v1.5-full_test_top1_acc_std':'Mridangam Tonic_std',
    'nsynth_pitch-v2.2.3-50h_test_pitch_acc':'NSynth Pitch 50h',
    'nsynth_pitch-v2.2.3-5h_test_pitch_acc':'NSynth Pitch 5h',
    'speech_commands-v0.0.2-5h_test_top1_acc':'Speech commands 5h',
    'speech_commands-v0.0.2-full_test_top1_acc':'Speech commands full',
    'vocal_imitation-v1.1.3-full_test_mAP_mean':'Vocal Imitation',
    'vocal_imitation-v1.1.3-full_test_mAP_std':'Vocal Imitation_std',
    'vox_lingua_top10-hear2021-full_test_top1_acc_mean':'VoxLingua107 top 10', 
    'vox_lingua_top10-hear2021-full_test_top1_acc_std':'VoxLingua107 top 10_std'
}

df_236 = pd.read_csv(csv_path_sub236)
df_145 = pd.read_csv(csv_path_sub145)
df_group = pd.read_csv(csv_path_group)
HEAREVAL_df = pd.concat([df_236,df_145, df_group], ignore_index=True)
HEAREVAL_df.sort_values(by=['subject', 'model'], inplace=True)
HEAREVAL_df.drop('Unnamed: 0', axis='columns', inplace=True)
HEAREVAL_df.rename(columns = eq, inplace=True)
models = HEAREVAL_df['Model'].copy()

In [4]:
#divide results from HEAREVAL models in 2 catgerories : small models VS big models 
small_models=[
    'OpenL3',
    'Descript/MARL Wav2CLIP',
    'IUT-CSE MLP (audio)',
    'IUT-CSE MLP (keyword)',
    'Logitech AI SERAB BYOL-S',
    'RedRice EfficientNet-B2',
    'Sony UDONS ViT',
    'Soundsensing YAMNet'
]
HEAREVAL_leaderboard_full = pd.read_csv(csv_path_leaderboard)
HEAREVAL_leaderboard_full.pop('URL')
HEAREVAL_leaderboard_small = HEAREVAL_leaderboard_full[HEAREVAL_leaderboard_full['Model'].isin(small_models)]
HEAREVAL_leaderboard_big = HEAREVAL_leaderboard_full[~HEAREVAL_leaderboard_full['Model'].isin(small_models)]

In [5]:
#divide tasks by their training dataset size
tasks_training_size = {
    'Gunshot':114,
    'Beijing Opera':902,
    'GTZAN Music/Speech':3456,
    'Mridangam Stroke':4521,
    'Mridangam Tonic':4521,
    'DCASE 2016':6912,
    'ESC-50':8000,
    'VoxLingua107 top 10':14494,
    'NSynth Pitch 5h':16000,
    'Maestro 5h':17760,
    'Speech commands 5h':18312,
    'Libricount':22880,
    'GTZAN Genre':27000,
    'CREMA-D':29755,
    'Vocal Imitation':42044,
    'Speech commands full':80402,
    'NSynth Pitch 50h':156992,
    'Beehive':276480,
    'FSD50K':289440
}
tasks = list(tasks_training_size.keys())
sizes = list(tasks_training_size.values())
tasks_training = {'task':tasks,
                 'size(s)':sizes}

task_training_df = pd.DataFrame(data = tasks_training)
task_training_df['size(min)']=task_training_df['size(s)']/60
task_training_df['size(hour)']=task_training_df['size(s)']/60/60
task_training_small = task_training_df.loc[task_training_df['size(min)']<90]
task_training_medium = task_training_df.loc[(task_training_df['size(min)']>90)&(task_training_df['size(min)']<600)]
task_training_big = task_training_df.loc[task_training_df['size(min)']>600]

In [6]:
#function for ranking :
def rank_models_amongst_HEAREVAL(models, small=False, big=False):
    if small & big:
        HEAREVAL_leaderboard = HEAREVAL_leaderboard_full
    elif big:
        HEAREVAL_leaderboard = HEAREVAL_leaderboard_big
    else: 
        HEAREVAL_leaderboard = HEAREVAL_leaderboard_small
        
    merged_df = pd.merge(left=HEAREVAL_leaderboard, right=models, how='outer')
    models = merged_df['Model']
    ranked_df = merged_df.rank(numeric_only=False, ascending=False)
    ranked_df['Model'] = models
    return ranked_df

In [7]:
#create the different soundnet baseline that can be used 
soundnet_HE_results = HEAREVAL_df[HEAREVAL_df['finetune'] == 'no']
soundnet_mean_results = soundnet_HE_results.mean(numeric_only=True)
soundnet_median_results = soundnet_HE_results.median(numeric_only=True)

In [None]:
# HEAR EVAL Task scores for every model
#atlas = 'STG' #'wholebrain' 

soundnet_model = soundnet_median_results.to_frame().T
soundnet_model['Model'] = 'soundnet'
others_models = HEAREVAL_leaderboard_full
finetuned_models = HEAREVAL_df.loc[(HEAREVAL_df['finetune']=='conv4')].copy() #& (HEAREVAL_df['atlas']==atlas)
for i, row in finetuned_models.iterrows():
    model_name = f'{row["subject"]}_{row["finetune"]}_{row["atlas"]}'
    finetuned_models.at[i, 'Model'] = model_name+'_group' if row['Model']=='group' else model_name
finetuned_models.drop(['atlas', 'finetune'], axis='columns', inplace=True)

all_models = pd.concat([others_models, finetuned_models, soundnet_model], axis='rows', join='inner')
HEAREVAL_tasks = pd.DataFrame({'nb_models': all_models.count(), 
                               'min':all_models.min(), 
                               'max':all_models.max(),
                               'median':all_models.median(numeric_only=True)})
HEAREVAL_tasks['Soundnet'] = soundnet_model.T
HEAREVAL_tasks.drop('Model', axis='rows', inplace=True)

for i, row in finetuned_models.iterrows():
    col_name = row['Model']
    diff_name = 'diff_'+row['Model']
    
    HEAREVAL_tasks[col_name] = row
    HEAREVAL_tasks[diff_name] = row-HEAREVAL_tasks['Soundnet']

type_test = {
    'Beehive':'aucroc',
    'Beijing Opera':'top1_acc',
    'CREMA-D':'top1_acc',
    'DCASE 2016':'event_onset_200ms',
    'ESC-50':'top1_acc',
    'FSD50K':'mAP',
    'GTZAN Genre':'top1_acc',
    'GTZAN Music/Speech':'top1_acc',
    'Gunshot':'top1_acc',
    'Libricount':'top1_acc',
    'Maestro 5h':'event_onset_50ms',
    'Mridangam Stroke':'top1_acc',
    'Mridangam Tonic':'top1_acc',
    'NSynth Pitch 5h':'pitch_acc',
    'NSynth Pitch 50h':'pitch_acc',
    'Speech commands 5h':'top1_acc',
    'Speech commands full':'top1_acc',
    'Vocal Imitation':'mAP',
    'VoxLingua107 top 10':'top1_acc', 
}
test_df = pd.DataFrame(type_test.values(), index = type_test.keys())
HEAREVAL_tasks['test'] = test_df
print(HEAREVAL_tasks)
HEAREVAL_tasks.to_csv('/home/maellef/Results/finefriends/figures/figure_papier/HEAREVAL_tasks_data.csv')

In [33]:
HEAR_tasks = pd.read_csv('/home/maellef/Results/finefriends/figures/figure_papier/HEAREVAL_tasks_data.csv', index_col=0)

#atlas = 'STG' #'wholebrain'
include_group = True
include_indvd = True

all_diff = HEAR_tasks.filter(like='diff', axis=1)
#all_diff = all_diff.filter(like=atlas, axis=1)
if include_group and not include_indvd:
    all_diff = all_diff.filter(like='group', axis=1)
elif include_indvd and not include_group:
    temp = all_diff.filter(regex="group", axis=1)
    all_diff.drop(labels=list(temp.columns), axis='columns', inplace=True)

diffpos, diffneg, same = 0, 0, 0
negtasks, nosigtasks = [], []
for task, row in all_diff.iterrows():
    stat, pvalue = wilcoxon(row.values, nan_policy='propagate')
    print(task, pvalue, pvalue<0.05)
    #print(row.values)
    if pvalue<0.05 and np.median(row.values) > 0:
        diffpos+=1
    elif pvalue<0.05 and np.median(row.values) < 0:
        diffneg+=1
        negtasks.append(task)
    else:
        same+=1
        nosigtasks.append(task)
print(f'number of tasks with significantly improved performance: {diffpos}/19, decreased performance: {diffneg}/19, {negtasks}, no change: {same}/19, {nosigtasks}')

Beehive 0.07379257678985596 False
Beijing Opera 1.6689300537109375e-06 True
CREMA-D 1.633167266845703e-05 True
DCASE 2016 0.0035042762756347656 True
ESC-50 5.960464477539062e-07 True
FSD50K 2.384185791015625e-07 True
GTZAN Genre 1.1920928955078125e-07 True
GTZAN Music/Speech 0.04248917102813721 True
Gunshot 1.6689300537109375e-06 True
Libricount 1.1920928955078125e-07 True
Maestro 5h 0.027281171477617997 True
Mridangam Stroke 0.7682963609695435 False
Mridangam Tonic nan False
NSynth Pitch 50h 1.1920928955078125e-07 True
NSynth Pitch 5h 0.00027811527252197266 True
Speech commands 5h 0.0008462667465209961 True
Speech commands full 1.633167266845703e-05 True
Vocal Imitation nan False
VoxLingua107 top 10 0.005897641181945801 True
number of tasks with significantly improved performance: 12/19, decreased performance: 2/19, ['DCASE 2016', 'VoxLingua107 top 10'], no change: 5/19, ['Beehive', 'Maestro 5h', 'Mridangam Stroke', 'Mridangam Tonic', 'Vocal Imitation']




In [None]:
#HEAR EVAL Scatterplot
all_models2 = all_models.copy()
subject, scale = [], []
for i, row in all_models2.iterrows():
    if row['Model'] == 'soundnet':
        model_sub = 'Soundnet'
        model_scale = 'Soundnet'
        
    elif 'sub-' in row['Model']:
        model_sub = row['Model'][:6]
        model_scale = 'STG' if 'STG' in row['Model'] else 'Whole Brain'
        model_scale = model_scale+' group' if 'group' in row['Model'] else model_scale
    else:
        if row['Model'] in small_models:
            model_scale = 'HEAR EVAL small' 
        elif row['Model'] in list(HEAREVAL_leaderboard_big['Model']):
            model_scale = 'HEAR EVAL big'
        else:
            pass
    
    scale.append(model_scale)
    subject.append(model_sub)

all_models2['subject'] = subject
all_models2['scale'] = scale

df_graph = all_models2.melt(id_vars=['Model','subject', 'scale'], var_name = 'task', value_name = 'result')
print(df_graph)

models_size = 'both'
if models_size == 'small':
    index = df_graph.loc[df_graph['subject']=='HEAR EVAL big'].index
elif models_size == 'big':
    index = df_graph.loc[df_graph['subject']=='HEAR EVAL small'].index
else:
    index = []
df_graph.drop(index, inplace=True)

#color = 'teal' if atlas =='STG' else 'indigo'
#edgecolor = 'w' if atlas =='STG' else 'gray'
#color_big = 'w' if atlas =='STG' else 'slategray'
#color_small = 'w' if atlas =='STG' else 'lightsteelblue'
    
palette = {'HEAR EVAL big':'dimgray',
           'HEAR EVAL small':'darkgray',
           'STG':'teal',
           'STG group':'turquoise',
           'Whole Brain':'indigo',
           'Whole Brain group':'mediumpurple',
           'Soundnet':'firebrick',
          }
hue_order = ['HEAR EVAL big', 'HEAR EVAL small', 'STG', 'STG group', 'Whole Brain', 'Whole Brain group', 'Soundnet']
#index = df_graph.loc[df_graph['subject']=='HEAR EVAL small'].index
#index2 = df_graph.loc[df_graph['subject']=='HEAR EVAL big'].index
#all_i = np.concatenate((index,index2))
#df_graph.drop(all_i, inplace=True)

plt.subplots(figsize=(35, 20))
ax = sns.boxplot(data=df_graph, x="task", y="result", hue="scale", hue_order=hue_order, dodge=True,
                   order=tasks, palette=palette, linewidth=0.2, medianprops={"color": "w", "linewidth": 0.5}) #edgecolor='w'

#plt.legend(bbox_to_anchor=(1.2,1))
plt.xticks(rotation = 90)
plt.yticks(rotation = 90)
ax.set(ylabel="results for {} models".format(models_size))
ax.set(xlabel="")
plt.savefig('/home/maellef/Results/finefriends/figures/figure_papier/HEAR_EVAL_{}_models_with_group_boxplot.png'.format(models_size), 
            bbox_inches='tight', transparent=False, dpi=300)

In [None]:
#confidence interval for soundnet model : score/rank

soundnet_ranks = pd.DataFrame()
for i, row in soundnet_HE_results.iterrows():
    row = row.to_frame().T
    all_models_ranked_df = rank_models_amongst_HEAREVAL(models = row)
    soundnet_model = all_models_ranked_df[-1:]
    soundnet_ranks = pd.concat([soundnet_ranks, soundnet_model])

soundnet_ranks= soundnet_ranks.set_index(keys='Model')
soundnet_ranks.drop(list(soundnet_ranks.filter(regex='_std|_fold1')), axis='columns', inplace=True)
soundnet_ranks.drop(['subject', 'finetune', 'atlas'], axis='columns', inplace=True)
a = soundnet_ranks.describe().T.drop('count', axis=1)

a['75-25'] = a['75%'] - a['25%']

outliers_nb = []
all_ranks = []
for test in soundnet_ranks.columns:
    test_results = soundnet_ranks[test].values
    all_ranks.append(test_results)
    low_threshold = a.loc[test]['25%']
    high_threshold = a.loc[test]['75%']
    test_outliers = ((test_results<low_threshold) | (test_results > high_threshold)).sum()
    outliers_nb.append(test_outliers)
    
a['outliers_nb'] = outliers_nb
a['ranks_values'] = all_ranks
print(a)
a.to_csv('/home/maellef/Results/figures/figure_papier/Soundnet_data.csv')
#a = soundnet_ranks.describe()
#print(a.loc['75%'].sub(a.loc['25%']))
#print(soundnet_ranks[['Model', 'Beijing Opera', 'Gunshot', 'Mridangam Stroke', 'Mridangam Tonic', 'NSynth Pitch 5h']])

In [8]:
soundnet_baseline = soundnet_median_results.to_frame().T
soundnet_baseline['Model'] = 'soundnet_median'
selected_finetune = 'conv4'
all_df = pd.DataFrame()
small = True
big = True
solo_soundnet = False

for sub in subs:
    for scale in scales:
        for group in [False, True]:
            #1 - model selection & baseline addition
            conditions_list = [(HEAREVAL_df['subject'] == sub),
                               (HEAREVAL_df['atlas'] == scale), 
                               (HEAREVAL_df['finetune'] == selected_finetune)]
            if group:
                conditions_list.append((HEAREVAL_df['Model'] == 'group'))
            else:
                conditions_list.append((HEAREVAL_df['Model'] != 'group'))
    
            full_cdt = True
            for condition in conditions_list:
                full_cdt &= condition     
            selected_df = HEAREVAL_df.loc[full_cdt]
            
            model_hp = selected_df[['Model', 'subject','atlas','finetune']].set_index(keys='Model')
            model_hp['training_data'] = 'group' if group else 'individual'
            if not solo_soundnet:
                #Version 1 - rank both soundnet and finetune at the same time
                selected_df = pd.concat([selected_df, soundnet_baseline]).drop(labels=['subject','atlas','finetune'], axis='columns')
            else:
                #version 2 - rank each model individually
                selected_df = selected_df.drop(labels=['subject','atlas','finetune'], axis='columns')
                
            #2 - ranking of selected model + baseline amongst HEAREVAL models
            all_models_ranked_df = rank_models_amongst_HEAREVAL(models = selected_df, small=small, big=big)
            if solo_soundnet:
                soundnet_rank_df = rank_models_amongst_HEAREVAL(models = soundnet_baseline, small=small, big=big)
            
            if not solo_soundnet:
                finetuned_ranked_df = all_models_ranked_df[-2:].set_index(keys='Model')
            else:
                finetuned_ranked_df = all_models_ranked_df[-1:].set_index(keys='Model')
                soundnet_rank_df = soundnet_rank_df[-1:].set_index(keys='Model')
                finetuned_ranked_df = pd.concat([finetuned_ranked_df, soundnet_rank_df])
            
            ranked_diff = finetuned_ranked_df.diff(periods=-1).mul(-1)
            
            #3 - melt all test columns into one column 'test' + add both rank and diff rank columns
            a = []
            if sub == 'sub-01':
                soundnet_median_ranks = finetuned_ranked_df.loc['soundnet_median'].to_frame().T
                
            ft_ranked = finetuned_ranked_df.iloc[0].to_frame().T
            ft_ranked_diff = ranked_diff.iloc[0].to_frame().T
            for result, df in zip(('rank', 'diff_rank'), [ft_ranked, ft_ranked_diff]):
                model_diff = pd.concat([model_hp, df], axis='columns', join='inner')
                model_diff.drop(list(model_diff.filter(regex='_std|_fold1')), axis='columns', inplace=True)
                model_melt = model_diff.melt(id_vars=['subject','atlas','finetune', 'training_data'], 
                                                   var_name = 'test', value_name = result)
                a.append(model_melt)
                
            #merge all df for visualisation
            sub_results_df = pd.merge(a[0], a[1], how = 'inner')   
            all_df = pd.concat([all_df, sub_results_df], axis=0, ignore_index=True)

In [31]:
HEAR_tasks = pd.read_csv('/home/maellef/Results/finefriends/figures/figure_papier/HEAREVAL_tasks_data.csv', index_col=0)
all_dif_score = []
for scale in scales:
    for sub in subs:
        sub_scores_diff = HEAR_tasks.filter(regex=f'^{sub}').filter(regex=scale).diff(axis=1)
        diff_val = sub_scores_diff.iloc[:,-1].values
        all_dif_score.extend(diff_val)
        _, pvalue = wilcoxon(diff_val, nan_policy='omit')
        print(sub, scale, np.nanmean(diff_val), pvalue, pvalue<0.05 )

sub-01 wholebrain -0.00010423713589790339 0.7771175732880233 False
sub-02 wholebrain -0.004733746158422963 0.35197884167136195 False
sub-03 wholebrain 0.0021345388053102842 0.8647045760375567 False
sub-04 wholebrain 0.004415382073170103 0.060207366943359375 False
sub-05 wholebrain 0.010610329711872995 0.4413337707519531 False
sub-06 wholebrain -0.0003287551422034214 0.4037628173828125 False
sub-01 STG -0.009115566916777288 0.001176563999937216 True
sub-02 STG -6.127998180037207e-05 0.7173808884143212 False
sub-03 STG -0.0029346280492379135 0.3778228759765625 False
sub-04 STG -0.00667588016093364 0.28597919066405664 False
sub-05 STG -0.013841588133978044 0.001176563999937216 True
sub-06 STG -0.007775743340593757 0.46911315243545415 False
-0.002372282807529862 0.03917421676388495 True




In [47]:
tasks = list(tasks_training_size.keys())
all_diff_group_ind, all_group, all_ind = [], [], []
both_scale = []
for scale in scales:
    scale_diff, scale_group, scale_ind = [], [], []
    for sub in subs:
        sub_diff, sub_ind, sub_group = [], [], []
        for task in tasks:
            df_task = all_df.loc[(all_df['subject']==sub)
                                & (all_df['atlas']==scale)
                                & (all_df['test']==task)]
            ranks = df_task['diff_rank'].values
            sub_ind.append(ranks[0])
            sub_group.append(ranks[1])
            scale_ind.append(ranks[0])
            scale_group.append(ranks[1])
            all_ind.append(ranks[0])
            all_group.append(ranks[1])
            
            diff_group_ind = ranks[1] - ranks[0]
            all_diff_group_ind.append(diff_group_ind)
            sub_diff.append(diff_group_ind)
            scale_diff.append(diff_group_ind)
            
        _, pvalue = wilcoxon(sub_diff, nan_policy='omit')
        print(sub, scale, np.nanmean(sub_diff), 'mean group: ', np.nanmean(sub_group),', mean ind: ', np.nanmean(sub_ind), pvalue, pvalue<0.05, )
    _, pvalue = wilcoxon(scale_diff, nan_policy='omit')
    print(scale, np.nanmean(scale_diff), 'mean group: ', np.nanmean(scale_group),', mean ind: ', np.nanmean(scale_ind), pvalue, pvalue<0.05,)
    both_scale.append(scale_ind+scale_group)
_, pvalue = wilcoxon(all_diff_group_ind, nan_policy='omit')
print(np.nanmean(all_diff_group_ind), 'mean group: ', np.nanmean(all_group),', mean ind: ', np.nanmean(all_ind), pvalue, pvalue<0.05)

diff_scale = np.array(both_scale[0])-np.array(both_scale[1])
_, pvalue = wilcoxon(diff_scale, nan_policy='omit')
print('mean wholebrain: ', np.nanmean(both_scale[0]), ', stg: ', np.nanmean(both_scale[1]), np.nanmean(diff_scale), pvalue, pvalue<0.05)


sub-01 wholebrain -0.8947368421052632 mean group:  1.1578947368421053 , mean ind:  2.0526315789473686 0.3844120797513729 False
sub-02 wholebrain 1.088235294117647 mean group:  2.236842105263158 , mean ind:  1.2941176470588236 0.0649777819760697 False
sub-03 wholebrain 0.0 mean group:  2.0526315789473686 , mean ind:  2.235294117647059 0.5486660989062389 False
sub-04 wholebrain -0.9473684210526315 mean group:  1.868421052631579 , mean ind:  2.8157894736842106 0.018036975635797568 True
sub-05 wholebrain -0.4473684210526316 mean group:  1.6842105263157894 , mean ind:  2.1315789473684212 0.9719968677847196 False
sub-06 wholebrain -0.9705882352941176 mean group:  1.763157894736842 , mean ind:  2.8823529411764706 0.37173026702997414 False
wholebrain -0.38425925925925924 mean group:  1.793859649122807 , mean ind:  2.240740740740741 0.49927819105799587 False
sub-01 STG 1.105263157894737 mean group:  2.8421052631578947 , mean ind:  1.736842105263158 0.005118643800037987 True
sub-02 STG 0.3529411

In [None]:
print(soundnet_median_ranks)
soundnet_median_ranks['subject'] = 'Soundnet'
soundnet_median_ranks.drop(list(soundnet_median_ranks.filter(regex='_std|_fold1')), axis='columns', inplace=True)
soundnet_median_ranks = soundnet_median_ranks.melt(id_vars=['subject'], 
                                               var_name = 'test', value_name = 'rank')
all_df = pd.concat([all_df, soundnet_median_ranks], axis=0, ignore_index=True)

In [None]:
all_df

In [None]:
#stat
conv='conv4'
for scale in scales : 
    for sub in subs : 
        df_sub = all_df.loc[(all_df['finetune'] == conv) & (all_df['atlas'] == scale) & (all_df['subject'] == sub)]
        diff_values = df_sub['diff_rank'].values
        stat, pvalue = wilcoxon(diff_values, nan_policy='omit')
        print(conv, scale, sub, pvalue, pvalue<0.05)

In [None]:
all_df.head()

In [None]:
atlas = 'STG' #'wholebrain'
df_wip = all_df.loc[all_df['atlas']==atlas]
df_wip.drop(['atlas', 'finetune'], axis='columns', inplace=True)
df_wip = df_wip.melt(id_vars=['subject','test', 'training_data'], var_name = 'type', value_name = 'result')
df_wip = df_wip.pivot(index="test", columns=["subject", 'training_data', "type"], values="result")
columns_order = [('sub-01', 'rank'),('sub-01', 'diff_rank'),
                 ('sub-02', 'rank'),('sub-02', 'diff_rank'),
                 ('sub-03', 'rank'),('sub-03', 'diff_rank'),
                 ('sub-04', 'rank'),('sub-04', 'diff_rank'),
                 ('sub-05', 'rank'),('sub-05', 'diff_rank'),
                 ('sub-06', 'rank'),('sub-06', 'diff_rank')]

rows_order = ['Gunshot','Beijing Opera','NSynth Pitch 5h','NSynth Pitch 50h','ESC-50',
              'Speech commands 5h','CREMA-D','FSD50K','GTZAN Genre','GTZAN Music/Speech',
              'Speech commands full','Mridangam Stroke','Libricount','Maestro 5h',
              'Beehive','VoxLingua107 top 10','DCASE 2016','Vocal Imitation','Mridangam Tonic']
df_wip = df_wip.reindex(rows_order)
df_wip.to_csv(f'/home/maellef/Results/finefriends/figures/figure_papier/HEAREVAL_full_rank_data_with_group_{atlas}.csv')

test full benchmark model new scale legend

In [None]:
all_df

In [None]:
#HEAREVAL big
rank_range = np.arange(-20, 21)
#define a new colormap (colors from -20-0-20 = 41 colors)
redblue = mpl.colormaps['RdYlBu_r'].resampled(len(rank_range))
newcolors = redblue(np.linspace(0, 1, len(rank_range)))

color_values = [0, 0.09, 0.18, 0.27, 0.36, 
                0.45, 
                0.54, 0.65, 0.75, 0.89, 0.99]

colors = [redblue(i) for i in color_values]

newcolors[:10, :] = colors[0]
newcolors[10:14, :] = colors[1]
newcolors[14:16, :] = colors[2]
newcolors[17:19, :] = colors[3]
newcolors[19, :] = colors[4]
newcolors[20, :] = colors[5]
newcolors[21, :] = colors[6]
newcolors[22:24, :] = colors[7]
newcolors[24:27, :] = colors[8]
newcolors[27:31, :] = colors[9]
newcolors[31:, :] = colors[10]

newcmp = ListedColormap(newcolors)

#select and reorder dataframe
df = all_df
atlas = 'STG' #'wholebrain'    
training_data= 'individual' #'group'
task_training_size = 'small_train' #'medium_train' #'big_train' 
comparaison = 'duo' if not solo_soundnet else 'solo'
if big and small:
    models_size = 'all'
elif big:
    models_size = 'big'
else:
    models_size = 'small'


df_selected = df.loc[(df['finetune'] == 'conv4') & (df['atlas'] == atlas) & (df['training_data'] == training_data)]
if task_training_size == 'small_train':
    tasks_list=list(task_training_small['task'])
elif task_training_size == 'medium_train':
    tasks_list=list(task_training_medium['task'])
else:
    tasks_list=list(task_training_big['task'])
    
df_selected = df_selected.loc[df_selected['test'].isin(tasks_list)]
test_order = pd.CategoricalDtype(tasks_list, ordered=True)
df_selected['test']=df_selected['test'].astype(test_order)
df_selected.sort_values(['test', 'subject'], inplace=True)

#automatic sizes range for graph
unique_rank_values = np.sort(df_selected['diff_rank'].unique())
sizes=[100, 150]+[200]*2+[250]*3+[300]*4+[350]*10
sizes_range=[]
for rank in unique_rank_values:
    if not np.isnan(rank):
        i = int(abs(rank))
        sizes_range.append(sizes[i])

#legend
labels = ['-11 to -20', '-7 to -10', '-4 to -6', '-2 to -3', '-1', '0', '+1', '+2 to +3', '+4 to +6', '+7 to +10', '+11 to +20'] #HEAREVAL full
markersizes = [18, 16.8, 15.5, 14.2, 12.5, 10, 12.5, 14.2, 15.5, 16.8, 18]
legend_circles = [Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[i], markersize=markersizes[i], label=labels[i]) 
                  for i in range(len(markersizes))]

#graph 
graph_height = 455/19*len(tasks_list)
px = 1/plt.rcParams['figure.dpi']  # pixel in inches
plt.subplots(figsize=(841*px, graph_height*px))

ax = sns.scatterplot(data=df_selected, x="subject", y="test", 
                hue="diff_rank", size="diff_rank",sizes=sizes_range, palette = newcmp, hue_norm = (-20, 20),
               legend=True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.legend(loc='upper left', bbox_to_anchor=(1, 1), reverse=True, handles=legend_circles) 
ax.margins(y=0.2)
ax.set_title(atlas)
plt.savefig('/home/maellef/Results/finefriends/figures/figure_papier/HEAR_EVAL_{}_{}_{}_{}_{}_models_sept2024.png'.format(training_data, atlas, 
                                                                                                                task_training_size,
                                                                                                             comparaison, models_size), 
            bbox_inches='tight', transparent=True)

rank figures - different scales:

In [None]:
#HEAREVAL small
rank_range = np.arange(-10, 11)
#define a new colormap (colors from -20-0-20 = 41 colors)
redblue = mpl.colormaps['RdYlBu_r'].resampled(len(rank_range))
newcolors = redblue(np.linspace(0, 1, len(rank_range)))

color_values = [0, 0.11, 0.22, 0.33, 0.45, 0.58, 0.72, 0.85, 0.99]
colors = [redblue(i) for i in color_values]

newcolors[:5, :] = colors[0]
newcolors[5:7, :] = colors[1]
newcolors[7:9, :] = colors[2]
newcolors[9, :] = colors[3]
newcolors[10, :] = colors[4]
newcolors[11, :] = colors[5]
newcolors[12:14, :] = colors[6]
newcolors[14:16, :] = colors[7]
newcolors[16:, :] = colors[8]
newcmp = ListedColormap(newcolors)

#select and reorder dataframe
df = all_df
atlas = 'wholebrain' #'STG'     
df_selected = df.loc[(df['finetune'] == 'conv4') & (df['atlas'] == atlas)]

tasks_list=list(task_training_small['task']) #task_training_small task_training_medium task_training_big
training_Size = 'small_tr'
df_selected = df_selected.loc[df_selected['test'].isin(tasks_list)]
test_order = pd.CategoricalDtype(tasks_list, ordered=True)
df_selected['test']=df_selected['test'].astype(test_order)
df_selected.sort_values(['test', 'subject'], inplace=True)

#automatic sizes range for graph
unique_rank_values = np.sort(df_selected['diff_rank'].unique())
sizes=[100, 150]+[200]*2+[275]*2+[350]*5
sizes_range=[]
for rank in unique_rank_values:
    if not np.isnan(rank):
        i = int(abs(rank))
        sizes_range.append(sizes[i])

#legend
labels = ['-6 to -10', '-4 to -5', '-2 to -3', '-1', '0', '+1', '+2 to +3', '+4 to +5', '+6 to +10']#HEAREVAL small
markersizes = [18, 16.3, 14.2, 12.5, 10, 12.5, 14.2, 16.3, 18]
legend_circles = [Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[i], markersize=markersizes[i], label=labels[i]) 
                  for i in range(len(markersizes))]

#graph 
graph_height = 455/19*len(tasks_list)
px = 1/plt.rcParams['figure.dpi']  # pixel in inches
plt.subplots(figsize=(841*px, graph_height*px))

ax = sns.scatterplot(data=df_selected, x="subject", y="test", 
                hue="diff_rank", size="diff_rank",sizes=sizes_range, palette = newcmp, hue_norm = (-10, 10),
               legend=True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.legend(loc='upper left', bbox_to_anchor=(1, 1), reverse=True, handles=legend_circles) 
ax.set_title(atlas)
ax.margins(y=0.2)
plt.savefig('/home/maellef/Results/figures/figure_papier/HEAR_EVAL_{}_{}_duo_small_model_diff_2024.png'.format(atlas, training_Size), 
            bbox_inches='tight', transparent=True)

In [None]:
#HEAREVAL big
rank_range = np.arange(-20, 21)
#define a new colormap (colors from -20-0-20 = 41 colors)
redblue = mpl.colormaps['RdYlBu_r'].resampled(len(rank_range))
newcolors = redblue(np.linspace(0, 1, len(rank_range)))

color_values = [0, 0.11, 0.22, 0.33, 0.45, 0.58, 0.72, 0.85, 0.99]
colors = [redblue(i) for i in color_values]

newcolors[:10, :] = colors[0]
newcolors[10:15, :] = colors[1]
newcolors[15:19, :] = colors[2]
newcolors[19, :] = colors[3]
newcolors[20, :] = colors[4]
newcolors[21, :] = colors[5]
newcolors[22:26, :] = colors[6]
newcolors[26:31, :] = colors[7]
newcolors[31:, :] = colors[8]
newcmp = ListedColormap(newcolors)

#select and reorder dataframe
df = all_df
atlas = 'STG' #'wholebrain'    
df_selected = df.loc[(df['finetune'] == 'conv4') & (df['atlas'] == atlas)]

tasks_list=list(task_training_small['task']) #task_training_small task_training_medium task_training_big
training_Size = 'small_tr'
df_selected = df_selected.loc[df_selected['test'].isin(tasks_list)]
test_order = pd.CategoricalDtype(tasks_list, ordered=True)
df_selected['test']=df_selected['test'].astype(test_order)
df_selected.sort_values(['test', 'subject'], inplace=True)

#automatic sizes range for graph
unique_rank_values = np.sort(df_selected['diff_rank'].unique())
sizes=[100, 150]+[200]*4+[275]*5+[350]*10
sizes_range=[]
for rank in unique_rank_values:
    if not np.isnan(rank):
        i = int(abs(rank))
        sizes_range.append(sizes[i])

#legend
labels = ['-11 to -20', '-6 to -10', '-2 to -5', '-1', '0', '+1', '+2 to +5', '+6 to +10', '+11 to +20'] #HEAREVAL full
markersizes = [18, 16.3, 14.2, 12.5, 10, 12.5, 14.2, 16.3, 18]
legend_circles = [Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[i], markersize=markersizes[i], label=labels[i]) 
                  for i in range(len(markersizes))]

#graph 
graph_height = 455/19*len(tasks_list)
px = 1/plt.rcParams['figure.dpi']  # pixel in inches
plt.subplots(figsize=(841*px, graph_height*px))

ax = sns.scatterplot(data=df_selected, x="subject", y="test", 
                hue="diff_rank", size="diff_rank",sizes=sizes_range, palette = newcmp, hue_norm = (-20, 20),
               legend=True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.legend(loc='upper left', bbox_to_anchor=(1, 1), reverse=True, handles=legend_circles) 
ax.margins(y=0.2)
ax.set_title(atlas)
plt.savefig('/home/maellef/Results/figures/figure_papier/HEAR_EVAL_{}_{}_duo_small_model_diff2_2024.png'.format(atlas, training_Size), 
            bbox_inches='tight', transparent=True)

In [None]:
#HEAREVAL small rank
rank_range = np.arange(10, 0, -1)
#define a new colormap (colors from -20-0-20 = 41 colors)
redblue = mpl.colormaps['RdYlBu_r'].resampled(len(rank_range))
newcolors = redblue(np.linspace(0, 1, len(rank_range)))

color_values = [0, 0.11, 0.22, 0.33, 0.45, 0.58, 0.64, 0.72, 0.85, 0.99]
colors = [redblue(i) for i in color_values]

for i in range(10):
    y = 10-(i+1)
    newcolors[i, :] = colors[y]

newcmp = ListedColormap(newcolors)

#select and reorder dataframe
df = all_df
atlas = 'wholebrain'#'STG'  
df_selected = df.loc[(df['finetune'] == 'conv4') & (df['atlas'] == atlas)]
df_selected = pd.concat([df_selected, soundnet_median_ranks], axis=0, ignore_index=True)
test_order = pd.CategoricalDtype(['Gunshot','Beijing Opera','NSynth Pitch 5h','NSynth Pitch 50h','ESC-50',
              'Speech commands 5h','CREMA-D','FSD50K','GTZAN Genre','GTZAN Music/Speech',
              'Speech commands full','Mridangam Stroke','Libricount','Maestro 5h',
              'Beehive','VoxLingua107 top 10','DCASE 2016','Vocal Imitation','Mridangam Tonic'], ordered=True)

df_selected['test']=df_selected['test'].astype(test_order)
df_selected.sort_values(['test', 'subject'], inplace=True)

#automatic sizes range for graph
unique_rank_values = np.sort(df_selected['rank'].unique())
sizes=[350, 275, 200, 150, 100, 100, 150, 200, 275, 350]
sizes_range=[]
for rank in unique_rank_values:
    if not np.isnan(rank):
        i = int(abs(rank))
        sizes_range.append(sizes[2])

#legend
labels = ['10', '9', '8', '7', '6', '5', '4', '3', '2', '1']
markersizes = [18, 16.3, 14.2, 12.5, 10, 12.5, 14.2, 16.3, 18]
legend_circles = [Line2D([0], [0], marker='o', color='w', markerfacecolor=colors[i], markersize=markersizes[i], label=labels[i]) 
                  for i in range(len(markersizes))]

#graph
ax = sns.scatterplot(data=df_selected, x="subject", y="test", 
                hue="rank", size="rank",sizes=sizes_range, palette = newcmp, hue_norm = (1, 10),
               legend=True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.legend(loc='upper left', bbox_to_anchor=(1, 1), reverse=True, handles=legend_circles) 
ax.set_title(atlas)
plt.savefig('/home/maellef/Results/figures/figure_papier/HEAR_EVAL_{}_small_rank_2024.png'.format(atlas), 
            bbox_inches='tight', transparent=True)