In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seml
import pandas as pd
import json
from collections import defaultdict
from functools import reduce

  from tqdm.autonotebook import tqdm


In [2]:
#collection_name = 'week8_cora_full_ensemble'
collection_name = 'week9_cora_full_dropout'

collection = seml.database.get_collection(collection_name)
experiments = [{'config' : r['config'], 'result' : r['result'], 'id' : r['_id']} for r in collection.find() if r['status'] in ('COMPLETED',)]
for ex in experiments:
    with open(ex['result']) as f:
        ex['result'] = json.load(f)

print(f'Number of finished experiments : {len(experiments)}')

Number of finished experiments : 13


In [3]:
def get_experiment(experiments, residual=True, spectral_norm=True, train_labels_remove_other=True):
    exs = [
        r for r in experiments if r['config']['model']['residual'] == residual and r['config']['model']['use_spectral_norm'] == spectral_norm and r['config']['data']['train_labels_remove_other'] == train_labels_remove_other
    ]
    return exs[0]

In [4]:
experiment = get_experiment(experiments, residual=True, spectral_norm=True, train_labels_remove_other=True)

In [5]:
print([k for k in experiment['result'].keys() if 'auroc' in k])

['auroc_total-predictive-entropy_loc', 'auroc_max-score_loc', 'auroc_expected-softmax-entropy_loc', 'auroc_mutual-information_loc', 'auroc_predicted-class-variance_loc', 'auroc_total-predictive-entropy_loc-no-edges', 'auroc_max-score_loc-no-edges', 'auroc_expected-softmax-entropy_loc-no-edges', 'auroc_mutual-information_loc-no-edges', 'auroc_predicted-class-variance_loc-no-edges', 'auroc_total-predictive-entropy_bernoulli', 'auroc_max-score_bernoulli', 'auroc_expected-softmax-entropy_bernoulli', 'auroc_mutual-information_bernoulli', 'auroc_predicted-class-variance_bernoulli', 'auroc_total-predictive-entropy_bernoulli-no-edges', 'auroc_max-score_bernoulli-no-edges', 'auroc_expected-softmax-entropy_bernoulli-no-edges', 'auroc_mutual-information_bernoulli-no-edges', 'auroc_predicted-class-variance_bernoulli-no-edges', 'auroc_total-predictive-entropy_normal', 'auroc_max-score_normal', 'auroc_expected-softmax-entropy_normal', 'auroc_mutual-information_normal', 'auroc_predicted-class-varianc

In [6]:

d = {}
for experiment in experiments:
    residual = experiment['config']['model']['residual']
    spectral_norm = experiment['config']['model']['use_spectral_norm']
    remove_other = experiment['config']['data']['train_labels_remove_other']
    if any('Operating_Systems' in label for label in experiment['config']['data']['val_labels']):
        ood = 'os'
    else:
        ood = 'ai'
    
    prefix = (ood, remove_other, residual, spectral_norm)
    
    for k, v in experiment['result'].items():
        mean, std = np.array(v).mean(), np.array(v).std()
        med = np.median(np.array(v))
        if 'auroc' in k:
            if 'no-edges' in k:
                no_edges = True
                k = k.replace('_no-edges', '')
                k = k.replace('-no-edges', '')
            else:
                no_edges = False
            
            k_tokens = k.split('_')
            k, ood_type = '_'.join(k_tokens[:-1]), k_tokens[-1]
            prefix_edges = prefix + (no_edges, ood_type,)
                
            if not 'no' in k and ':' in k:
                continue
            d[prefix_edges + ('AUC-ROC', k.replace('auroc_', '').replace('-', ' '), 'mean')] = [mean]
            d[prefix_edges + ('AUC-ROC', k.replace('auroc_', '').replace('-', ' '), 'std')] = [std]
            #d[prefix_edges + ('feature density', k.replace('auroc:', ''), 'median')] = [med]
        elif k.startswith('accuracy'):
            k_tokens = k.split('_')
            if k_tokens[1] in ('id', 'ood'):
                k_tokens = ['_'.join(k_tokens[:2])] + k_tokens[2:]
            if 'no-edges' in k_tokens[-1]:
                k_tokens[-1] = k_tokens[-1].replace('no-edges', '')[:-1]
                no_edges = True
            else:
                no_edges = False
            k_tokens = [token for token in k_tokens if len(token) > 0]
            if len(k_tokens) == 3:
                ood_type = k_tokens[-1]
                k_tokens = k_tokens[:-1]
            else:
                #ood_type = ''
                continue
            name, dataset = k_tokens
            
            d[prefix + (no_edges, ood_type, 'accuracy', name.replace('accuracy', '').replace('_', ''), 'mean')] = [mean]
            d[prefix + (no_edges, ood_type, 'accuracy', name.replace('accuracy', '').replace('_', ''), 'std')] = [std]
            #d[prefix + (no_edges, ood_type, 'accuracy', name.replace('accuracy', '').replace('_', ''), 'med')] = [med]
        elif k.startswith('ece') and False:
            if 'no_edges' in k:
                k = k.replace('no_edges', '')[:-1]
                no_edges = True
            else:
                no_edges = False
            
            k_tokens = k.split('_')
            d[prefix + (no_edges, '', 'ece', '', 'mean')] = [mean]
            d[prefix + (no_edges, '', 'ece', '', 'std')] = [std]
            
            #print(name, dataset, prefix + (no_edges, ood_type,))
            
            
            #d[(ood, remove_other, residual, spectral_norm, '', k, '', 'mean')] = [mean]
            #d[(ood, remove_other, residual, spectral_norm, '', k, '', 'std')] = [std]
            #d[(ood, remove_other, residual, spectral_norm, '', k, '', 'median')] = [med]
                
df = pd.DataFrame(d).T.sort_index()

In [7]:
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,0
ai,False,False,True,False,bernoulli,AUC-ROC,expected softmax entropy,mean,0.625079
ai,False,False,True,False,bernoulli,AUC-ROC,expected softmax entropy,std,0.071041
ai,False,False,True,False,bernoulli,AUC-ROC,logit energy,mean,0.604104
ai,False,False,True,False,bernoulli,AUC-ROC,logit energy,std,0.063007
ai,False,False,True,False,bernoulli,AUC-ROC,max score,mean,0.582494
...,...,...,...,...,...,...,...,...,...
os,True,True,True,True,normal,accuracy,,std,0.035966
os,True,True,True,True,normal,accuracy,id,mean,0.670159
os,True,True,True,True,normal,accuracy,id,std,0.039846
os,True,True,True,True,normal,accuracy,ood,mean,0.117143


In [8]:
df = df.reset_index((0, 1, 2, 3, 5, -1))

In [9]:
df.shape

(1404, 7)

In [10]:
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,level_0,level_1,level_2,level_3,level_5,level_8,0
False,AUC-ROC,expected softmax entropy,ai,False,False,True,bernoulli,mean,0.625079
False,AUC-ROC,expected softmax entropy,ai,False,False,True,bernoulli,std,0.071041
False,AUC-ROC,logit energy,ai,False,False,True,bernoulli,mean,0.604104
False,AUC-ROC,logit energy,ai,False,False,True,bernoulli,std,0.063007
False,AUC-ROC,max score,ai,False,False,True,bernoulli,mean,0.582494
...,...,...,...,...,...,...,...,...,...
True,accuracy,,os,True,True,True,normal,std,0.035966
True,accuracy,id,os,True,True,True,normal,mean,0.670159
True,accuracy,id,os,True,True,True,normal,std,0.039846
True,accuracy,ood,os,True,True,True,normal,mean,0.117143


In [11]:
subdfs, names = [], []
for n, g in df.groupby(['level_0', 'level_1', 'level_2', 'level_3', 'level_5', 'level_8']):
    g = pd.DataFrame(g[0])
    g.columns = [n]
    subdfs.append(g)

In [12]:
df_cat = pd.concat(subdfs, axis=1)
idx = pd.MultiIndex.from_tuples(df_cat.columns, names=('OOD Classes', 'Remove OOD', 'Residual', 'Spectral Norm', 'OOD Experiment', 'Stat'))
df_cat.columns = idx
df_cat.index.names = ('Remove-Edges', '', '')
pd.set_option("display.precision", 2)
df_cat = df_cat.T.sort_index().T

In [13]:
exp_sizes = [
    1, # ood data
    1, # remove ood vertices from train
    2, # residual,
    2, # Spectral norm,
    3, # ood type
    2, # stats
]
exp_size = reduce(lambda x, y: x * y, exp_sizes)
exp_size

24

In [14]:


df_cat.iloc[:, 0 * exp_size : 1 * exp_size : 2]

Unnamed: 0_level_0,Unnamed: 1_level_0,OOD Classes,ai,ai,ai,ai,ai,ai,ai,ai,ai,ai,ai,ai
Unnamed: 0_level_1,Unnamed: 1_level_1,Remove OOD,False,False,False,False,False,False,False,False,False,True,True,True
Unnamed: 0_level_2,Unnamed: 1_level_2,Residual,False,False,False,True,True,True,True,True,True,False,False,False
Unnamed: 0_level_3,Unnamed: 1_level_3,Spectral Norm,True,True,True,False,False,False,True,True,True,False,False,False
Unnamed: 0_level_4,Unnamed: 1_level_4,OOD Experiment,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal
Unnamed: 0_level_5,Unnamed: 1_level_5,Stat,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Remove-Edges,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6
False,AUC-ROC,expected softmax entropy,0.63,0.73,0.215,0.637,0.82,0.213,0.73,0.76,0.0208,0.606,0.86,0.463
False,AUC-ROC,logit energy,0.6,0.67,0.22,0.7,0.8,0.039,0.65,0.68,0.0593,0.624,0.86,0.172
False,AUC-ROC,max score,0.58,0.79,0.391,0.601,0.82,0.552,0.67,0.8,0.139,0.591,0.86,0.66
False,AUC-ROC,mutual information,0.39,0.45,0.794,0.451,0.64,0.817,0.28,0.4,0.992,0.508,0.62,0.818
False,AUC-ROC,predicted class variance,0.4,0.31,0.748,0.469,0.5,0.825,0.33,0.31,0.946,0.522,0.46,0.793
False,AUC-ROC,total predictive entropy,0.59,0.77,0.311,0.613,0.81,0.452,0.68,0.79,0.0555,0.597,0.86,0.617
False,accuracy,,0.76,0.78,0.737,0.803,0.81,0.736,0.74,0.76,0.691,0.823,0.83,0.797
False,accuracy,id,0.77,0.76,0.769,0.811,0.77,0.795,0.76,0.75,0.744,0.825,0.85,0.828
False,accuracy,ood,0.7,,0.446,0.734,,0.206,0.61,,0.209,0.806,,0.517
True,AUC-ROC,expected softmax entropy,0.98,0.69,0.00492,0.997,0.63,0.00896,0.95,0.67,0.00186,0.991,0.68,0.28


In [15]:

df_cat.iloc[:, 1 * exp_size : 2 * exp_size : 2]

Unnamed: 0_level_0,Unnamed: 1_level_0,OOD Classes,ai,ai,ai,ai,ai,ai,ai,ai,ai,os,os,os
Unnamed: 0_level_1,Unnamed: 1_level_1,Remove OOD,True,True,True,True,True,True,True,True,True,False,False,False
Unnamed: 0_level_2,Unnamed: 1_level_2,Residual,False,False,False,True,True,True,True,True,True,False,False,False
Unnamed: 0_level_3,Unnamed: 1_level_3,Spectral Norm,True,True,True,False,False,False,True,True,True,False,False,False
Unnamed: 0_level_4,Unnamed: 1_level_4,OOD Experiment,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal
Unnamed: 0_level_5,Unnamed: 1_level_5,Stat,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Remove-Edges,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6
False,AUC-ROC,expected softmax entropy,0.63,0.77,0.214,0.662,0.84,0.252,0.75,0.74,0.0238,0.593,0.91,0.414
False,AUC-ROC,logit energy,0.63,0.71,0.199,0.71,0.84,0.0438,0.68,0.7,0.0458,0.608,0.93,0.2
False,AUC-ROC,max score,0.62,0.84,0.419,0.619,0.86,0.638,0.7,0.82,0.159,0.582,0.89,0.614
False,AUC-ROC,mutual information,0.38,0.43,0.833,0.478,0.71,0.881,0.28,0.44,0.991,0.513,0.49,0.798
False,AUC-ROC,predicted class variance,0.39,0.29,0.766,0.497,0.53,0.889,0.3,0.34,0.947,0.499,0.35,0.778
False,AUC-ROC,total predictive entropy,0.61,0.82,0.348,0.633,0.86,0.521,0.71,0.8,0.0637,0.588,0.89,0.569
False,accuracy,,0.81,0.81,0.769,0.822,0.82,0.755,0.78,0.78,0.728,0.801,0.81,0.775
False,accuracy,id,0.81,0.81,0.809,0.825,0.84,0.816,0.8,0.78,0.787,0.807,0.78,0.802
False,accuracy,ood,0.77,,0.414,0.794,,0.206,0.66,,0.203,0.749,,0.534
True,AUC-ROC,expected softmax entropy,0.97,0.71,0.00846,0.999,0.67,0.0134,0.93,0.69,0.00245,0.993,0.66,0.256


In [16]:

df_cat.iloc[:, 2 * exp_size : 3 * exp_size : 2]

Unnamed: 0_level_0,Unnamed: 1_level_0,OOD Classes,os,os,os,os,os,os,os,os,os,os,os,os
Unnamed: 0_level_1,Unnamed: 1_level_1,Remove OOD,False,False,False,True,True,True,True,True,True,True,True,True
Unnamed: 0_level_2,Unnamed: 1_level_2,Residual,True,True,True,False,False,False,False,False,False,True,True,True
Unnamed: 0_level_3,Unnamed: 1_level_3,Spectral Norm,True,True,True,False,False,False,True,True,True,False,False,False
Unnamed: 0_level_4,Unnamed: 1_level_4,OOD Experiment,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal
Unnamed: 0_level_5,Unnamed: 1_level_5,Stat,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Remove-Edges,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6
False,AUC-ROC,expected softmax entropy,0.74,0.83,0.0183,0.6,0.91,0.459,0.62,0.85,0.214,0.679,0.87,0.251
False,AUC-ROC,logit energy,0.63,0.72,0.0515,0.63,0.93,0.172,0.64,0.79,0.198,0.715,0.89,0.0515
False,AUC-ROC,max score,0.68,0.86,0.13,0.59,0.9,0.666,0.61,0.9,0.428,0.634,0.88,0.638
False,AUC-ROC,mutual information,0.27,0.34,0.991,0.525,0.53,0.823,0.4,0.32,0.85,0.48,0.7,0.889
False,AUC-ROC,predicted class variance,0.32,0.28,0.956,0.533,0.39,0.796,0.4,0.21,0.786,0.506,0.51,0.899
False,AUC-ROC,total predictive entropy,0.69,0.86,0.0551,0.594,0.89,0.615,0.61,0.89,0.354,0.65,0.88,0.534
False,accuracy,,0.74,0.76,0.694,0.823,0.83,0.797,0.81,0.81,0.769,0.822,0.82,0.755
False,accuracy,id,0.76,0.73,0.747,0.825,0.85,0.828,0.81,0.81,0.808,0.825,0.84,0.816
False,accuracy,ood,0.62,,0.214,0.806,,0.517,0.76,,0.414,0.794,,0.206
True,AUC-ROC,expected softmax entropy,0.95,0.7,0.000975,0.996,0.7,0.298,0.97,0.74,0.0104,0.999,0.69,0.014


In [17]:

df_cat.iloc[:, 3 * exp_size : 4 * exp_size : 2]

Unnamed: 0_level_0,Unnamed: 1_level_0,OOD Classes,os,os,os
Unnamed: 0_level_1,Unnamed: 1_level_1,Remove OOD,True,True,True
Unnamed: 0_level_2,Unnamed: 1_level_2,Residual,True,True,True
Unnamed: 0_level_3,Unnamed: 1_level_3,Spectral Norm,True,True,True
Unnamed: 0_level_4,Unnamed: 1_level_4,OOD Experiment,bernoulli,loc,normal
Unnamed: 0_level_5,Unnamed: 1_level_5,Stat,mean,mean,mean
Remove-Edges,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6
False,AUC-ROC,expected softmax entropy,0.75,0.81,0.0299
False,AUC-ROC,logit energy,0.69,0.75,0.0659
False,AUC-ROC,max score,0.69,0.87,0.168
False,AUC-ROC,mutual information,0.28,0.36,0.988
False,AUC-ROC,predicted class variance,0.32,0.28,0.953
False,AUC-ROC,total predictive entropy,0.71,0.86,0.0737
False,accuracy,,0.79,0.79,0.725
False,accuracy,id,0.8,0.78,0.785
False,accuracy,ood,0.67,,0.189
True,AUC-ROC,expected softmax entropy,0.93,0.72,0.00113
