In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seml
import pandas as pd
import json
from collections import defaultdict
from functools import reduce

  from tqdm.autonotebook import tqdm


In [2]:
collection_name = 'week8_cora_full'
collection = seml.database.get_collection(collection_name)
experiments = [{'config' : r['config'], 'result' : r['result'], 'id' : r['_id']} for r in collection.find() if r['status'] in ('COMPLETED',)]
for ex in experiments:
    with open(ex['result']) as f:
        ex['result'] = json.load(f)

print(f'Number of finished experiments : {len(experiments)}')

Number of finished experiments : 16


In [3]:
def get_experiment(experiments, residual=True, spectral_norm=True, train_labels_remove_other=True):
    exs = [
        r for r in experiments if r['config']['model']['residual'] == residual and r['config']['model']['use_spectral_norm'] == spectral_norm and r['config']['data']['train_labels_remove_other'] == train_labels_remove_other
    ]
    return exs[0]

In [4]:
experiment = get_experiment(experiments, residual=True, spectral_norm=True, train_labels_remove_other=True)

In [5]:
print([k for k in experiment['result'].keys() if 'acc' in k])

['val_accuracy-val-train-labels-0', 'val_accuracy-val-reduced-0', 'accuracy_val-reduced', 'accuracy_id_val-reduced', 'accuracy_ood_val-reduced', 'accuracy_val-reduced_no-edges', 'accuracy_id_val-reduced_no-edges', 'accuracy_ood_val-reduced_no-edges', 'accuracy_val_loc', 'accuracy_id_val_loc', 'accuracy_ood_val_loc', 'accuracy_val_loc-no-edges', 'accuracy_id_val_loc-no-edges', 'accuracy_ood_val_loc-no-edges', 'accuracy_val-reduced-bernoulli_bernoulli', 'accuracy_id_val-reduced-bernoulli_bernoulli', 'accuracy_ood_val-reduced-bernoulli_bernoulli', 'accuracy_val-reduced-bernoulli_bernoulli-no-edges', 'accuracy_id_val-reduced-bernoulli_bernoulli-no-edges', 'accuracy_ood_val-reduced-bernoulli_bernoulli-no-edges', 'accuracy_val-reduced-normal_normal', 'accuracy_id_val-reduced-normal_normal', 'accuracy_ood_val-reduced-normal_normal', 'accuracy_val-reduced-normal_normal-no-edges', 'accuracy_id_val-reduced-normal_normal-no-edges', 'accuracy_ood_val-reduced-normal_normal-no-edges']


In [39]:

d = {}
for experiment in experiments:
    residual = experiment['config']['model']['residual']
    spectral_norm = experiment['config']['model']['use_spectral_norm']
    remove_other = experiment['config']['data']['train_labels_remove_other']
    if any('Operating_Systems' in label for label in experiment['config']['data']['val_labels']):
        ood = 'os'
    else:
        ood = 'ai'
    
    prefix = (ood, remove_other, residual, spectral_norm)
    
    for k, v in experiment['result'].items():
        mean, std = np.array(v).mean(), np.array(v).std()
        med = np.median(np.array(v))
        if 'auroc' in k:
            if 'no-edges' in k:
                no_edges = True
                k = k.replace('_no-edges', '')
                k = k.replace('-no-edges', '')
            else:
                no_edges = False
            
            k_tokens = k.split('_')
            k, ood_type = '_'.join(k_tokens[:-1]), k_tokens[-1]
            prefix_edges = prefix + (no_edges, ood_type,)
                
            if not 'no' in k and ':' in k:
                continue
            d[prefix_edges + ('AUC-ROC', k.replace('auroc_', '').replace('-', ' '), 'mean')] = [mean]
            d[prefix_edges + ('AUC-ROC', k.replace('auroc_', '').replace('-', ' '), 'std')] = [std]
            #d[prefix_edges + ('feature density', k.replace('auroc:', ''), 'median')] = [med]
        elif k.startswith('accuracy'):
            k_tokens = k.split('_')
            if k_tokens[1] in ('id', 'ood'):
                k_tokens = ['_'.join(k_tokens[:2])] + k_tokens[2:]
            if 'no-edges' in k_tokens[-1]:
                k_tokens[-1] = k_tokens[-1].replace('no-edges', '')[:-1]
                no_edges = True
            else:
                no_edges = False
            k_tokens = [token for token in k_tokens if len(token) > 0]
            if len(k_tokens) == 3:
                ood_type = k_tokens[-1]
                k_tokens = k_tokens[:-1]
            else:
                #ood_type = ''
                continue
            name, dataset = k_tokens
            
            d[prefix + (no_edges, ood_type, 'accuracy', name.replace('accuracy', '').replace('_', ''), 'mean')] = [mean]
            d[prefix + (no_edges, ood_type, 'accuracy', name.replace('accuracy', '').replace('_', ''), 'std')] = [std]
            #d[prefix + (no_edges, ood_type, 'accuracy', name.replace('accuracy', '').replace('_', ''), 'med')] = [med]
        elif k.startswith('ece') and False:
            if 'no_edges' in k:
                k = k.replace('no_edges', '')[:-1]
                no_edges = True
            else:
                no_edges = False
            
            k_tokens = k.split('_')
            d[prefix + (no_edges, '', 'ece', '', 'mean')] = [mean]
            d[prefix + (no_edges, '', 'ece', '', 'std')] = [std]
            
            #print(name, dataset, prefix + (no_edges, ood_type,))
            
            
            #d[(ood, remove_other, residual, spectral_norm, '', k, '', 'mean')] = [mean]
            #d[(ood, remove_other, residual, spectral_norm, '', k, '', 'std')] = [std]
            #d[(ood, remove_other, residual, spectral_norm, '', k, '', 'median')] = [med]
                
df = pd.DataFrame(d).T.sort_index()

In [40]:
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,0
ai,False,False,False,False,bernoulli,AUC-ROC,7 mog:no,mean,0.48
ai,False,False,False,False,bernoulli,AUC-ROC,7 mog:no,std,0.08
ai,False,False,False,False,bernoulli,AUC-ROC,gpc full:no,mean,0.48
ai,False,False,False,False,bernoulli,AUC-ROC,gpc full:no,std,0.09
ai,False,False,False,False,bernoulli,AUC-ROC,logit energy,mean,0.59
...,...,...,...,...,...,...,...,...,...
os,True,True,True,True,normal,accuracy,,std,0.04
os,True,True,True,True,normal,accuracy,id,mean,0.77
os,True,True,True,True,normal,accuracy,id,std,0.04
os,True,True,True,True,normal,accuracy,ood,mean,0.13


In [41]:
df = df.reset_index((0, 1, 2, 3, 5, -1))

In [42]:
df.shape

(1536, 7)

In [43]:
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,level_0,level_1,level_2,level_3,level_5,level_8,0
False,AUC-ROC,7 mog:no,ai,False,False,False,bernoulli,mean,0.48
False,AUC-ROC,7 mog:no,ai,False,False,False,bernoulli,std,0.08
False,AUC-ROC,gpc full:no,ai,False,False,False,bernoulli,mean,0.48
False,AUC-ROC,gpc full:no,ai,False,False,False,bernoulli,std,0.09
False,AUC-ROC,logit energy,ai,False,False,False,bernoulli,mean,0.59
...,...,...,...,...,...,...,...,...,...
True,accuracy,,os,True,True,True,normal,std,0.04
True,accuracy,id,os,True,True,True,normal,mean,0.77
True,accuracy,id,os,True,True,True,normal,std,0.04
True,accuracy,ood,os,True,True,True,normal,mean,0.13


In [44]:
subdfs, names = [], []
for n, g in df.groupby(['level_0', 'level_1', 'level_2', 'level_3', 'level_5', 'level_8']):
    g = pd.DataFrame(g[0])
    g.columns = [n]
    subdfs.append(g)

In [45]:
df_cat = pd.concat(subdfs, axis=1)
idx = pd.MultiIndex.from_tuples(df_cat.columns, names=('OOD Classes', 'Remove OOD', 'Residual', 'Spectral Norm', 'OOD Experiment', 'Stat'))
df_cat.columns = idx
df_cat.index.names = ('Remove-Edges', '', '')
pd.set_option("display.precision", 2)
df_cat = df_cat.T.sort_index().T

In [46]:
exp_sizes = [
    1, # ood data
    1, # remove ood vertices from train
    2, # residual,
    2, # Spectral norm,
    3, # ood type
    2, # stats
]
exp_size = reduce(lambda x, y: x * y, exp_sizes)
exp_size

24

In [47]:


df_cat.iloc[:, 0 * exp_size : 1 * exp_size : 2]

Unnamed: 0_level_0,Unnamed: 1_level_0,OOD Classes,ai,ai,ai,ai,ai,ai,ai,ai,ai,ai,ai,ai
Unnamed: 0_level_1,Unnamed: 1_level_1,Remove OOD,False,False,False,False,False,False,False,False,False,False,False,False
Unnamed: 0_level_2,Unnamed: 1_level_2,Residual,False,False,False,False,False,False,True,True,True,True,True,True
Unnamed: 0_level_3,Unnamed: 1_level_3,Spectral Norm,False,False,False,True,True,True,False,False,False,True,True,True
Unnamed: 0_level_4,Unnamed: 1_level_4,OOD Experiment,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal
Unnamed: 0_level_5,Unnamed: 1_level_5,Stat,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Remove-Edges,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6
False,AUC-ROC,7 mog:no,0.481,0.71,1.0,0.419,0.59,0.998,0.39,0.63,1.0,0.44,0.43,1.0
False,AUC-ROC,gpc full:no,0.484,0.73,1.0,0.479,0.76,0.998,0.4,0.66,1.0,0.56,0.64,1.0
False,AUC-ROC,logit energy,0.586,0.82,0.21,0.572,0.87,0.279,0.64,0.84,0.0618,0.64,0.79,0.0729
False,AUC-ROC,max score,0.573,0.83,0.4,0.569,0.88,0.377,0.64,0.84,0.255,0.67,0.87,0.134
False,AUC-ROC,total predictive entropy,0.582,0.83,0.35,0.572,0.87,0.274,0.66,0.85,0.211,0.69,0.85,0.0459
False,accuracy,,0.8,0.8,0.74,0.806,0.81,0.766,0.79,0.8,0.717,0.79,0.79,0.732
False,accuracy,id,0.808,0.78,0.79,0.813,0.79,0.802,0.8,0.75,0.779,0.8,0.79,0.792
False,accuracy,ood,0.731,,0.32,0.743,,0.44,0.74,,0.16,0.69,,0.197
True,AUC-ROC,7 mog:no,0.00129,0.49,1.0,0.0,0.29,1.0,0.12,0.56,1.0,0.29,0.35,1.0
True,AUC-ROC,gpc full:no,0.00295,0.51,1.0,0.0083,0.42,1.0,0.12,0.58,1.0,0.53,0.54,1.0


In [49]:

df_cat.iloc[:, 1 * exp_size : 2 * exp_size : 2]

Unnamed: 0_level_0,Unnamed: 1_level_0,OOD Classes,ai,ai,ai,ai,ai,ai,ai,ai,ai,ai,ai,ai
Unnamed: 0_level_1,Unnamed: 1_level_1,Remove OOD,True,True,True,True,True,True,True,True,True,True,True,True
Unnamed: 0_level_2,Unnamed: 1_level_2,Residual,False,False,False,False,False,False,True,True,True,True,True,True
Unnamed: 0_level_3,Unnamed: 1_level_3,Spectral Norm,False,False,False,True,True,True,False,False,False,True,True,True
Unnamed: 0_level_4,Unnamed: 1_level_4,OOD Experiment,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal
Unnamed: 0_level_5,Unnamed: 1_level_5,Stat,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Remove-Edges,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6
False,AUC-ROC,7 mog:no,0.462,0.79,0.997,0.415,0.73,0.997,0.39,0.64,0.999,0.45,0.5,0.999
False,AUC-ROC,gpc full:no,0.472,0.84,0.997,0.479,0.89,0.997,0.41,0.73,0.999,0.55,0.7,1.0
False,AUC-ROC,logit energy,0.6,0.9,0.233,0.595,0.87,0.279,0.66,0.88,0.0757,0.69,0.83,0.047
False,AUC-ROC,max score,0.589,0.9,0.472,0.594,0.9,0.416,0.65,0.89,0.299,0.7,0.89,0.142
False,AUC-ROC,total predictive entropy,0.594,0.9,0.422,0.595,0.89,0.297,0.67,0.89,0.258,0.71,0.88,0.0437
False,accuracy,,0.827,0.82,0.765,0.84,0.83,0.796,0.82,0.81,0.747,0.83,0.83,0.77
False,accuracy,id,0.832,0.85,0.814,0.842,0.86,0.83,0.82,0.82,0.808,0.84,0.83,0.83
False,accuracy,ood,0.789,,0.32,0.817,,0.489,0.78,,0.2,0.79,,0.231
True,AUC-ROC,7 mog:no,0.00458,0.48,1.0,0.000975,0.35,1.0,0.11,0.54,1.0,0.27,0.39,1.0
True,AUC-ROC,gpc full:no,0.00458,0.53,1.0,0.0253,0.52,1.0,0.14,0.6,1.0,0.52,0.61,1.0


In [50]:

df_cat.iloc[:, 2 * exp_size : 3 * exp_size : 2]

Unnamed: 0_level_0,Unnamed: 1_level_0,OOD Classes,os,os,os,os,os,os,os,os,os,os,os,os
Unnamed: 0_level_1,Unnamed: 1_level_1,Remove OOD,False,False,False,False,False,False,False,False,False,False,False,False
Unnamed: 0_level_2,Unnamed: 1_level_2,Residual,False,False,False,False,False,False,True,True,True,True,True,True
Unnamed: 0_level_3,Unnamed: 1_level_3,Spectral Norm,False,False,False,True,True,True,False,False,False,True,True,True
Unnamed: 0_level_4,Unnamed: 1_level_4,OOD Experiment,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal
Unnamed: 0_level_5,Unnamed: 1_level_5,Stat,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Remove-Edges,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6
False,AUC-ROC,7 mog:no,0.481,0.74,1.0,0.418,0.57,0.998,0.39,0.65,1.0,0.44,0.41,1.0
False,AUC-ROC,gpc full:no,0.484,0.77,1.0,0.476,0.78,0.998,0.4,0.68,1.0,0.57,0.63,1.0
False,AUC-ROC,logit energy,0.586,0.92,0.21,0.575,0.94,0.279,0.64,0.92,0.0618,0.64,0.79,0.0778
False,AUC-ROC,max score,0.573,0.91,0.4,0.57,0.96,0.377,0.64,0.91,0.255,0.67,0.93,0.132
False,AUC-ROC,total predictive entropy,0.582,0.92,0.35,0.573,0.95,0.275,0.66,0.93,0.211,0.68,0.9,0.0488
False,accuracy,,0.8,0.8,0.74,0.805,0.81,0.764,0.79,0.8,0.717,0.79,0.79,0.73
False,accuracy,id,0.808,0.78,0.79,0.811,0.79,0.8,0.8,0.75,0.779,0.8,0.78,0.788
False,accuracy,ood,0.731,,0.32,0.749,,0.446,0.74,,0.16,0.69,,0.206
True,AUC-ROC,7 mog:no,0.00129,0.51,1.0,0.0,0.28,1.0,0.12,0.58,1.0,0.31,0.34,1.0
True,AUC-ROC,gpc full:no,0.00295,0.54,1.0,0.00989,0.42,1.0,0.12,0.61,1.0,0.54,0.52,1.0


In [53]:

df_cat.iloc[:, 3 * exp_size : 4 * exp_size : 2]

Unnamed: 0_level_0,Unnamed: 1_level_0,OOD Classes,os,os,os,os,os,os,os,os,os,os,os,os
Unnamed: 0_level_1,Unnamed: 1_level_1,Remove OOD,True,True,True,True,True,True,True,True,True,True,True,True
Unnamed: 0_level_2,Unnamed: 1_level_2,Residual,False,False,False,False,False,False,True,True,True,True,True,True
Unnamed: 0_level_3,Unnamed: 1_level_3,Spectral Norm,False,False,False,True,True,True,False,False,False,True,True,True
Unnamed: 0_level_4,Unnamed: 1_level_4,OOD Experiment,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal
Unnamed: 0_level_5,Unnamed: 1_level_5,Stat,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Remove-Edges,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6
False,AUC-ROC,7 mog:no,0.462,0.82,0.997,0.42,0.72,0.997,0.39,0.68,0.999,0.45,0.48,0.999
False,AUC-ROC,gpc full:no,0.472,0.85,0.997,0.48,0.88,0.997,0.41,0.78,0.999,0.57,0.72,1.0
False,AUC-ROC,logit energy,0.6,0.92,0.233,0.6,0.87,0.276,0.66,0.9,0.0757,0.69,0.83,0.0543
False,AUC-ROC,max score,0.589,0.93,0.472,0.6,0.93,0.415,0.65,0.92,0.299,0.69,0.89,0.155
False,AUC-ROC,total predictive entropy,0.594,0.93,0.422,0.6,0.9,0.294,0.67,0.92,0.258,0.71,0.88,0.0498
False,accuracy,,0.827,0.82,0.765,0.84,0.84,0.797,0.82,0.81,0.747,0.82,0.83,0.765
False,accuracy,id,0.832,0.85,0.814,0.85,0.86,0.832,0.82,0.82,0.808,0.83,0.83,0.823
False,accuracy,ood,0.789,,0.32,0.81,,0.483,0.78,,0.2,0.75,,0.237
True,AUC-ROC,7 mog:no,0.00458,0.51,1.0,0.0,0.35,1.0,0.11,0.57,1.0,0.29,0.36,1.0
True,AUC-ROC,gpc full:no,0.00463,0.57,1.0,0.02,0.52,1.0,0.14,0.64,1.0,0.56,0.62,1.0
