In [1]:
import os
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.legend_handler import HandlerTuple
from matplotlib import gridspec

In [2]:
#paths + global variables
subs = ['sub-01','sub-02', 'sub-03','sub-04', 'sub-05', 'sub-06']
ft_models = ['conv4', 'conv5', 'conv6', 'conv7', 'no']
atlas = ['wholebrain', 'STG']
baseline = 'no'

csv_path_sub236 = '/home/maelle/GitHub_repositories/cNeuromod_encoding_2020/benchmark/HEAR-EVAL/metrics.csv'
csv_path_sub145 = '/home/maelle/GitHub_repositories/cNeuromod_encoding_2020/benchmark/HEAR-EVAL/metrics_145.csv'
csv_path_leaderboard = '/home/maelle/GitHub_repositories/cNeuromod_encoding_2020/benchmark/HEAR-EVAL/leaderboard.csv'

In [3]:
def list_positions_in_matrix2D(nb_rows, nb_col):
    i_rows = np.repeat(np.arange(nb_rows), nb_col)
    i_col = np.tile(np.arange(nb_col), nb_rows)
    pos = [(row, col) for row, col in zip(i_rows, i_col)]
    return pos

In [4]:
#dataframe with difference in accuracy with baseline (no_ft) in reference 
def diff_df(df, subs, atlas):
    all_diff_df = pd.DataFrame([], columns=df.columns)
    for sub in subs:
        for a in atlas:
            selected_df = df.loc[(df['subject'] == sub) & (df['atlas'] == a)]
            subject_serie = selected_df.pop('subject').reset_index()
            atlas_serie = selected_df.pop('atlas').reset_index()
            finetune_serie = selected_df.pop('finetune').reset_index()
            diff_df = pd.DataFrame(selected_df.values-selected_df.values[-1], columns=selected_df.columns, index=None)
            diff_df = pd.concat([subject_serie, finetune_serie, atlas_serie, diff_df], axis='columns')
            diff_df.pop('index')
            all_diff_df = all_diff_df.append(diff_df)
    return all_diff_df

In [5]:
df_236 = pd.read_csv(csv_path_sub236)
df_145 = pd.read_csv(csv_path_sub145)
HEAREVAL_leaderboard = pd.read_csv(csv_path_leaderboard)
HEAREVAL_leaderboard.pop('URL')

0     https://github.com/hearbenchmark/hear2021-subm...
1     https://github.com/hearbenchmark/hear2021-subm...
2     https://github.com/hearbenchmark/hear2021-subm...
3     https://github.com/hearbenchmark/hear2021-subm...
4     https://github.com/hearbenchmark/hear2021-subm...
5     https://github.com/hearbenchmark/hear2021-subm...
6     https://github.com/hearbenchmark/hear2021-subm...
7     https://github.com/hearbenchmark/hear2021-subm...
8     https://github.com/hearbenchmark/hear2021-subm...
9     https://github.com/hearbenchmark/hear2021-subm...
10    https://github.com/hearbenchmark/hear2021-subm...
11    https://github.com/hearbenchmark/hear2021-subm...
12    https://github.com/hearbenchmark/hear2021-subm...
13    https://github.com/hearbenchmark/hear2021-subm...
14    https://github.com/hearbenchmark/hear2021-subm...
15    https://github.com/hearbenchmark/hear2021-subm...
16    https://github.com/hearbenchmark/hear2021-subm...
17    https://github.com/hearbenchmark/hear2021-

In [6]:
#dataframe with accuracies for all tests, for each subject
HEAREVAL_df = pd.concat([df_236,df_145], ignore_index=True)
HEAREVAL_df.sort_values(by=['subject', 'model'], inplace=True)
HEAREVAL_df.drop('Unnamed: 0', axis='columns', inplace=True)
models = HEAREVAL_df.pop('model')

In [7]:
#HEAREVAL_df.set_index('model', inplace=True)
acc_diff_df = diff_df(HEAREVAL_df, subs, atlas)
print(acc_diff_df)

  subject finetune       atlas  \
0  sub-01    conv4  wholebrain   
1  sub-01    conv5  wholebrain   
2  sub-01    conv6  wholebrain   
3  sub-01    conv7  wholebrain   
4  sub-01       no  wholebrain   
0  sub-01    conv4         STG   
1  sub-01    conv5         STG   
2  sub-01    conv6         STG   
3  sub-01    conv7         STG   
4  sub-01       no         STG   
0  sub-02    conv4  wholebrain   
1  sub-02    conv5  wholebrain   
2  sub-02    conv6  wholebrain   
3  sub-02    conv7  wholebrain   
4  sub-02       no  wholebrain   
0  sub-02    conv4         STG   
1  sub-02    conv5         STG   
2  sub-02    conv6         STG   
3  sub-02    conv7         STG   
4  sub-02       no         STG   
0  sub-03    conv4  wholebrain   
1  sub-03    conv5  wholebrain   
2  sub-03    conv6  wholebrain   
3  sub-03    conv7  wholebrain   
4  sub-03       no  wholebrain   
0  sub-03    conv4         STG   
1  sub-03    conv5         STG   
2  sub-03    conv6         STG   
3  sub-03    c

In [8]:
#rename columns based on Hear Eval leaderboard
eq = {
    'model':'Model',
    'beehive_states_fold0-v2-full_test_aucroc':'Beehive',
    'beehive_states_fold1-v2-full_test_aucroc':'Beehive_fold1',
    'beijing_opera-v1.0-hear2021-full_test_top1_acc_mean':'Beijing Opera',
    'beijing_opera-v1.0-hear2021-full_test_top1_acc_std':'Beijing Opera_std',
    'tfds_crema_d-1.0.0-full_test_top1_acc_mean':'CREMA-D',
    'tfds_crema_d-1.0.0-full_test_top1_acc_std':'CREMA-D_std',
    'dcase2016_task2-hear2021-full_test_event_onset_200ms_fms':'DCASE 2016',
    'esc50-v2.0.0-full_test_top1_acc_mean':'ESC-50',
    'esc50-v2.0.0-full_test_top1_acc_std':'ESC-50_std',
    'fsd50k-v1.0-full_test_mAP':'FSD50K',
    'tfds_gtzan-1.0.0-full_test_top1_acc_mean':'GTZAN Genre',
    'tfds_gtzan-1.0.0-full_test_top1_acc_std' : 'GTZAN Genre_std',    
    'tfds_gtzan_music_speech-1.0.0-full_test_top1_acc_mean':'GTZAN Music/Speech',
    'tfds_gtzan_music_speech-1.0.0-full_test_top1_acc_std' :'GTZAN Music/Speech_std',
    'gunshot_triangulation-v1.0-full_test_top1_acc_mean':'Gunshot',
    'gunshot_triangulation-v1.0-full_test_top1_acc_std':'Gunshot_std',
    'libricount-v1.0.0-hear2021-full_test_top1_acc_mean':'Libricount',
    'libricount-v1.0.0-hear2021-full_test_top1_acc_std':'Libricount_std',
    'maestro-v3.0.0-5h_test_event_onset_50ms_fms_mean':'Maestro 5h',
    'maestro-v3.0.0-5h_test_event_onset_50ms_fms_std':'Maestro 5h_std',
    'mridangam_stroke-v1.5-full_test_top1_acc_mean':'Mridangam Stroke',
    'mridangam_stroke-v1.5-full_test_top1_acc_std':'Mridangam Stroke_std',
    'mridangam_tonic-v1.5-full_test_top1_acc_mean':'Mridangam Tonic',
    'mridangam_tonic-v1.5-full_test_top1_acc_std':'Mridangam Tonic_std',
    'nsynth_pitch-v2.2.3-50h_test_pitch_acc':'NSynth Pitch 50h',
    'nsynth_pitch-v2.2.3-5h_test_pitch_acc':'NSynth Pitch 5h',
    'speech_commands-v0.0.2-5h_test_top1_acc':'Speech commands 5h',
    'speech_commands-v0.0.2-full_test_top1_acc':'Speech commands full',
    'vocal_imitation-v1.1.3-full_test_mAP_mean':'Vocal Imitation',
    'vocal_imitation-v1.1.3-full_test_mAP_std':'Vocal Imitation_std',
    'vox_lingua_top10-hear2021-full_test_top1_acc_mean':'VoxLingua107 top 10', 
    'vox_lingua_top10-hear2021-full_test_top1_acc_std':'VoxLingua107 top 10_std'
}

renamed_df = pd.concat([models, HEAREVAL_df], axis='columns')
renamed_df.rename(columns = eq, inplace=True)

In [9]:
#dataframe with ranks of all models in regards of other models from HEAREVAL benchmark 
#(for each conv : ranks of baseline + selected conv --> leaderboard)

all_sub_rank = pd.DataFrame([])
all_sub_rank_diff = pd.DataFrame([])
for sub in subs:
    for a in atlas:
        selected_df = renamed_df.loc[(renamed_df['subject'] == sub) & (renamed_df['atlas'] == a)]
        for ft_model in ft_models:
            diff_models = selected_df.loc[(selected_df['finetune'] == baseline) | (selected_df['finetune'] == ft_model)]
            merged_df = pd.merge(left=HEAREVAL_leaderboard, right=diff_models, how='outer')
            ranked_df = merged_df.rank(numeric_only=True, ascending=False)
            models_hp = merged_df[['Model', 'subject','atlas','finetune']]
            all_hp_ranked_df = pd.concat([models_hp, ranked_df], axis='columns')
            models_ranks_df = all_hp_ranked_df.loc[all_hp_ranked_df['subject'].notna()]
            all_sub_rank = all_sub_rank.append(models_ranks_df.iloc[0])
        
            model_serie= models_ranks_df.pop('Model')
            subject_serie= models_ranks_df.pop('subject')
            atlas_serie= models_ranks_df.pop('atlas')
            finetune_serie= models_ranks_df.pop('finetune')
            model_diff_rank = models_ranks_df.diff(periods=-1, axis='rows').mul(-1)
            model_diff_rank = pd.concat([model_serie, subject_serie, atlas_serie, finetune_serie, model_diff_rank],
                                       axis='columns')
            all_sub_rank_diff = pd.concat([all_sub_rank_diff, model_diff_rank])
all_sub_rank_diff.drop_duplicates(keep='last', inplace=True) 
print(all_sub_rank_diff)

                                                Model subject       atlas  \
29  sub-01_friends_MIST_ROI_SoundNetEncoding_conv_...  sub-01  wholebrain   
29  sub-01_friends_MIST_ROI_SoundNetEncoding_conv_...  sub-01  wholebrain   
29  sub-01_friends_MIST_ROI_SoundNetEncoding_conv_...  sub-01  wholebrain   
29  sub-01_friends_MIST_ROI_SoundNetEncoding_conv_...  sub-01  wholebrain   
29  sub-01_friends_MIST_ROI_SoundNetEncoding_conv_...  sub-01  wholebrain   
29  sub-01_friends_auditory_Voxels_SoundNetEncodin...  sub-01         STG   
29  sub-01_friends_auditory_Voxels_SoundNetEncodin...  sub-01         STG   
29  sub-01_friends_auditory_Voxels_SoundNetEncodin...  sub-01         STG   
29  sub-01_friends_auditory_Voxels_SoundNetEncodin...  sub-01         STG   
29  sub-01_friends_auditory_Voxels_SoundNetEncodin...  sub-01         STG   
29  sub-02_friends_MIST_ROI_SoundNetEncoding_conv_...  sub-02  wholebrain   
29  sub-02_friends_MIST_ROI_SoundNetEncoding_conv_...  sub-02  wholebrain   

In [11]:
def concat_all_tasks_in_df_with_task_column(df, std=False):
    df_basis = pd.DataFrame([], columns=['subject', 'finetune',  'atlas'])
    task = []
    std_values = pd.Series(name='std', dtype=int)
    value = pd.Series(name='value', dtype=int)

    for task_name in df.columns[4:]:
        if not 'std' in task_name:  
            df_basis = df_basis.append(df[['subject', 'finetune', 'atlas']]) 
            value = value.append(df[task_name])
            task += len(df[task_name]) * [task_name]
            if std : 
                std_column = task_name+'_std'
                if std_column in df.columns:
                    std_values = std_values.append(df[std_column])
                else : 
                    std_values = std_values.append(pd.Series(len(df[task_name])*[None]))
                    
    df_basis['task'] = task
    df_basis['value']= value.values
    if std : 
        df_basis['std']=std_values.values
    df_result = df_basis.reset_index()
    df_result.pop('index')
    return df_result

In [12]:
#rename all df in conventional ways
df_acc = HEAREVAL_df.set_index(models).rename(columns = eq)
df_acc_diff = acc_diff_df.set_index(models).rename(columns = eq)
df_rank = all_sub_rank.set_index(models).rename(columns = eq)
df_rank_diff = all_sub_rank_diff.set_index('Model')

df_fig = df_acc
df_fig['Beehive_std'] = [sorted(serie.values) for model, serie in df_fig[['Beehive', 'Beehive_fold1']].iterrows()]
df_fig['Beehive'] = df_fig[['Beehive', 'Beehive_fold1']].mean(axis='columns')
df_fig = df_fig.drop(labels='Beehive_fold1', axis='columns')

df_result_acc = concat_all_tasks_in_df_with_task_column(df_fig, std=True)
a = df_result_acc.loc[(df_result_acc['finetune'] == 'conv4') & (df_result_acc['atlas'] == 'wholebrain')]
pd.set_option('display.max_rows', None)
print(a)


     subject finetune       atlas                  task     value  \
0     sub-01    conv4  wholebrain                FSD50K  0.186010   
10    sub-02    conv4  wholebrain                FSD50K  0.186697   
20    sub-03    conv4  wholebrain                FSD50K  0.185791   
30    sub-04    conv4  wholebrain                FSD50K  0.187901   
40    sub-05    conv4  wholebrain                FSD50K  0.185619   
50    sub-06    conv4  wholebrain                FSD50K  0.182396   
60    sub-01    conv4  wholebrain           GTZAN Genre  0.442000   
70    sub-02    conv4  wholebrain           GTZAN Genre  0.454000   
80    sub-03    conv4  wholebrain           GTZAN Genre  0.460000   
90    sub-04    conv4  wholebrain           GTZAN Genre  0.459000   
100   sub-05    conv4  wholebrain           GTZAN Genre  0.448000   
110   sub-06    conv4  wholebrain           GTZAN Genre  0.441000   
120   sub-01    conv4  wholebrain               Gunshot  0.809524   
130   sub-02    conv4  wholebrain 

In [None]:

g = sns.catplot(data=df_result_acc, x='task', y='value', hue='subject', row='finetune', col='atlas', kind='bar',
            row_order=ft_models, hue_order=subs, sharex=False)
#plt.errorbar(x='task',y=df['value'], yerr=,)

#for i, ax in enumerate(g.fig.axes):
#    ax.set_xticklabels(ax.get_xticklabels(), rotation = 45)
#    ax.

In [13]:
#mentionner l'accuracy moyenne a coté, et choisir quelques taches
categories = ['subject', 'atlas', 'finetune',
              'CREMA-D','Speech commands 5h', 'Speech commands full', 'Vocal Imitation','VoxLingua107 top 10',  
              'ESC-50', 'FSD50K', 'GTZAN Genre','GTZAN Music/Speech', 'Libricount', 
              'Beijing Opera', 'Mridangam Stroke','Mridangam Tonic', 'Beehive','Gunshot',
              'DCASE 2016', 'Maestro 5h', 'NSynth Pitch 50h', 'NSynth Pitch 5h']
df_acc = df_acc.reindex(columns=categories)
df_acc_stg = df_acc.loc[(df_acc['finetune']=='conv4') & (df_acc['atlas']=='STG')]
df_acc_stg = df_acc_stg[df_acc_stg.columns[3:]].median(axis='rows')
med_acc_stg = df_acc_stg.tolist()
df_acc_wb = df_acc.loc[(df_acc['finetune']=='conv4') & (df_acc['atlas']=='wholebrain')]
df_acc_wb = df_acc_wb[df_acc_wb.columns[3:]].median(axis='rows')
med_acc_wb = df_acc_wb.tolist()

df2 = df_rank_diff
df_ccn = df2.loc[(df2['finetune']=='conv4')]
df_ccn = df_ccn.reindex(columns=categories).reset_index()
df_ccn_wb = df_ccn.loc[df_ccn['atlas']=='wholebrain'].median(axis='rows').sort_values(ascending=False)
df_ccn_stg = df_ccn.loc[df_ccn['atlas']=='STG'].median(axis='rows').sort_values(ascending=False)
order_stg = df_ccn_stg.index
order_wb = df_ccn_wb.index

df_result = concat_all_tasks_in_df_with_task_column(df=df_ccn)

df_wb = df_result.loc[df_result['atlas']=='wholebrain']
df_stg = df_result.loc[df_result['atlas']=='STG']

df_wb["task"] = pd.Categorical(df_wb["task"], categories = order_wb)
df_stg["task"] = pd.Categorical(df_stg["task"], categories = order_stg)
df_wb.sort_values(by = "task")
df_stg.sort_values(by = "task")

pd.set_option('display.max_rows', None)
print(df_stg)

    subject finetune atlas                  task  value
1    sub-01    conv4   STG               CREMA-D    2.0
3    sub-02    conv4   STG               CREMA-D    3.0
5    sub-03    conv4   STG               CREMA-D    3.0
7    sub-04    conv4   STG               CREMA-D    2.0
9    sub-05    conv4   STG               CREMA-D   -2.0
11   sub-06    conv4   STG               CREMA-D    2.0
13   sub-01    conv4   STG    Speech commands 5h    1.0
15   sub-02    conv4   STG    Speech commands 5h    4.0
17   sub-03    conv4   STG    Speech commands 5h    4.0
19   sub-04    conv4   STG    Speech commands 5h    2.0
21   sub-05    conv4   STG    Speech commands 5h   -1.0
23   sub-06    conv4   STG    Speech commands 5h    4.0
25   sub-01    conv4   STG  Speech commands full   -1.0
27   sub-02    conv4   STG  Speech commands full    1.0
29   sub-03    conv4   STG  Speech commands full    1.0
31   sub-04    conv4   STG  Speech commands full   -3.0
33   sub-05    conv4   STG  Speech commands full

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [None]:
#-----args---------------------------
df = df_result_acc
df_rank = df_wb
order = order_wb #order_stg
atlas = 'wholebrain'#STG
finetune = 'conv'
n = 7
subs = subs
#------------------------------------
best_n_tasks = order[:n]
best_df = df.loc[(df['task'].isin(best_n_tasks)) & (df['finetune']==finetune) & (df['atlas']==atlas)]
print(best_df)
#ig, axs = plt.subplots(1, 6)

for sub in subs : 
    best_sub_df = best_df.loc[best_df['subject'] == sub]

In [None]:
#figure
sns.set_theme(context='poster', style='whitegrid', font_scale=2) 
fig = plt.figure(figsize=(70, 50), constrained_layout=True)
grid = fig.add_gridspec(ncols=1, nrows=2)

ax_wb = fig.add_subplot(grid[0,0])
ax_wb.set_title('change of rank for Whole brain models')
ax_stg = fig.add_subplot(grid[1,0])
ax_stg.set_title('change of rank for Auditory cortex models')

for (ax, df_a, med) in [(ax_wb, df_wb, med_acc_wb), (ax_stg, df_stg, med_acc_stg)]:
    sns.boxplot(data = df_a, x='task',  y='value', ax=ax, boxprops={'alpha': 0.4, 'color':'gainsboro'})
    sns.stripplot(data = df_a, x='task', y='value', hue="subject", ax=ax, palette="Set2", size=30)
    ax.legend(loc='best',markerscale=3)
    ax.tick_params(rotation = 45)
    ax.set_ylabel('change in rank')
    ax.set_xlabel('tasks from HEAR Benchmark')
    ax.axhline(0, ls='--')
    ax.title.set_size(75)
    
    major_ticks = np.arange(-5, 21, 5)
    minor_ticks = np.arange(-5, 21, 1)
    ax.set_yticks(major_ticks)
    ax.set_yticks(minor_ticks, minor=True)
    ax.grid(which='minor', alpha=0.5)
    ax.grid(which='major', alpha=1)
    
    med = ["%.2f" % val for val in med]
    for xtick in ax.get_xticks():
        ax.text(xtick,-6,med[xtick], horizontalalignment='center', weight='semibold')
grid.update(hspace = 0.1)

savepath = '/home/maelle/Results/figures/heareval'
savename = 'fig_heareval_ccn2022.jpg'
#plt.savefig(os.path.join(savepath, savename))
#fig.suptitle('difference of rank between the model Conv4 and the baseline (median accuracy of Conv4 above task label)')