In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seml
import pandas as pd
import json
from collections import defaultdict
from functools import reduce

  from tqdm.autonotebook import tqdm


In [2]:
collection_name = 'week9_cora_full'
collection = seml.database.get_collection(collection_name)
experiments = [{'config' : r['config'], 'result' : r['result'], 'id' : r['_id']} for r in collection.find() if r['status'] in ('COMPLETED',)]
for ex in experiments:
    with open(ex['result']) as f:
        ex['result'] = json.load(f)

print(f'Number of finished experiments : {len(experiments)}')

Number of finished experiments : 14


In [3]:
def get_experiment(experiments, residual=True, spectral_norm=True, train_labels_remove_other=True):
    exs = [
        r for r in experiments if r['config']['model']['residual'] == residual and r['config']['model']['use_spectral_norm'] == spectral_norm and r['config']['data']['train_labels_remove_other'] == train_labels_remove_other
    ]
    return exs[0]

In [4]:
experiment = get_experiment(experiments, residual=True, spectral_norm=True, train_labels_remove_other=True)

In [5]:
print([k for k in experiment['result'].keys() if 'acc' in k])

['val_accuracy-val-train-labels-0', 'val_accuracy-val-reduced-0', 'accuracy_val-reduced', 'accuracy_id_val-reduced', 'accuracy_ood_val-reduced', 'accuracy_val-reduced_no-edges', 'accuracy_id_val-reduced_no-edges', 'accuracy_ood_val-reduced_no-edges', 'accuracy_val_loc', 'accuracy_id_val_loc', 'accuracy_ood_val_loc', 'accuracy_val_loc-no-edges', 'accuracy_id_val_loc-no-edges', 'accuracy_ood_val_loc-no-edges', 'accuracy_val-reduced-bernoulli_bernoulli', 'accuracy_id_val-reduced-bernoulli_bernoulli', 'accuracy_ood_val-reduced-bernoulli_bernoulli', 'accuracy_val-reduced-bernoulli_bernoulli-no-edges', 'accuracy_id_val-reduced-bernoulli_bernoulli-no-edges', 'accuracy_ood_val-reduced-bernoulli_bernoulli-no-edges', 'accuracy_val-reduced-normal_normal', 'accuracy_id_val-reduced-normal_normal', 'accuracy_ood_val-reduced-normal_normal', 'accuracy_val-reduced-normal_normal-no-edges', 'accuracy_id_val-reduced-normal_normal-no-edges', 'accuracy_ood_val-reduced-normal_normal-no-edges']


In [6]:

d = {}
for experiment in experiments:
    residual = experiment['config']['model']['residual']
    spectral_norm = experiment['config']['model']['use_spectral_norm']
    remove_other = experiment['config']['data']['train_labels_remove_other']
    if any('Operating_Systems' in label for label in experiment['config']['data']['val_labels']):
        ood = 'os'
    else:
        ood = 'ai'
    
    prefix = (ood, remove_other, residual, spectral_norm)
    
    for k, v in experiment['result'].items():
        mean, std = np.array(v).mean(), np.array(v).std()
        med = np.median(np.array(v))
        if 'auroc' in k:
            if 'no-edges' in k:
                no_edges = True
                k = k.replace('_no-edges', '')
                k = k.replace('-no-edges', '')
            else:
                no_edges = False
            
            k_tokens = k.split('_')
            k, ood_type = '_'.join(k_tokens[:-1]), k_tokens[-1]
            prefix_edges = prefix + (no_edges, ood_type,)
                
            if not 'no' in k and ':' in k:
                continue
            d[prefix_edges + ('AUC-ROC', k.replace('auroc_', '').replace('-', ' '), 'mean')] = [mean]
            d[prefix_edges + ('AUC-ROC', k.replace('auroc_', '').replace('-', ' '), 'std')] = [std]
            #d[prefix_edges + ('feature density', k.replace('auroc:', ''), 'median')] = [med]
        elif k.startswith('accuracy'):
            k_tokens = k.split('_')
            if k_tokens[1] in ('id', 'ood'):
                k_tokens = ['_'.join(k_tokens[:2])] + k_tokens[2:]
            if 'no-edges' in k_tokens[-1]:
                k_tokens[-1] = k_tokens[-1].replace('no-edges', '')[:-1]
                no_edges = True
            else:
                no_edges = False
            k_tokens = [token for token in k_tokens if len(token) > 0]
            if len(k_tokens) == 3:
                ood_type = k_tokens[-1]
                k_tokens = k_tokens[:-1]
            else:
                #ood_type = ''
                continue
            name, dataset = k_tokens
            
            d[prefix + (no_edges, ood_type, 'accuracy', name.replace('accuracy', '').replace('_', ''), 'mean')] = [mean]
            d[prefix + (no_edges, ood_type, 'accuracy', name.replace('accuracy', '').replace('_', ''), 'std')] = [std]
            #d[prefix + (no_edges, ood_type, 'accuracy', name.replace('accuracy', '').replace('_', ''), 'med')] = [med]
        elif k.startswith('ece') and False:
            if 'no_edges' in k:
                k = k.replace('no_edges', '')[:-1]
                no_edges = True
            else:
                no_edges = False
            
            k_tokens = k.split('_')
            d[prefix + (no_edges, '', 'ece', '', 'mean')] = [mean]
            d[prefix + (no_edges, '', 'ece', '', 'std')] = [std]
            
            #print(name, dataset, prefix + (no_edges, ood_type,))
            
            
            #d[(ood, remove_other, residual, spectral_norm, '', k, '', 'mean')] = [mean]
            #d[(ood, remove_other, residual, spectral_norm, '', k, '', 'std')] = [std]
            #d[(ood, remove_other, residual, spectral_norm, '', k, '', 'median')] = [med]
                
df = pd.DataFrame(d).T.sort_index()

In [7]:
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,0
ai,False,False,False,False,bernoulli,AUC-ROC,7 mog:no,mean,0.432812
ai,False,False,False,False,bernoulli,AUC-ROC,7 mog:no,std,0.065340
ai,False,False,False,False,bernoulli,AUC-ROC,gpc full:no,mean,0.455578
ai,False,False,False,False,bernoulli,AUC-ROC,gpc full:no,std,0.077607
ai,False,False,False,False,bernoulli,AUC-ROC,logit energy,mean,0.583401
...,...,...,...,...,...,...,...,...,...
os,True,True,True,True,normal,accuracy,,std,0.023602
os,True,True,True,True,normal,accuracy,id,mean,0.762222
os,True,True,True,True,normal,accuracy,id,std,0.027353
os,True,True,True,True,normal,accuracy,ood,mean,0.142857


In [8]:
df = df.reset_index((0, 1, 2, 3, 5, -1))

In [9]:
df.shape

(1344, 7)

In [10]:
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,level_0,level_1,level_2,level_3,level_5,level_8,0
False,AUC-ROC,7 mog:no,ai,False,False,False,bernoulli,mean,0.432812
False,AUC-ROC,7 mog:no,ai,False,False,False,bernoulli,std,0.065340
False,AUC-ROC,gpc full:no,ai,False,False,False,bernoulli,mean,0.455578
False,AUC-ROC,gpc full:no,ai,False,False,False,bernoulli,std,0.077607
False,AUC-ROC,logit energy,ai,False,False,False,bernoulli,mean,0.583401
...,...,...,...,...,...,...,...,...,...
True,accuracy,,os,True,True,True,normal,std,0.023602
True,accuracy,id,os,True,True,True,normal,mean,0.762222
True,accuracy,id,os,True,True,True,normal,std,0.027353
True,accuracy,ood,os,True,True,True,normal,mean,0.142857


In [11]:
subdfs, names = [], []
for n, g in df.groupby(['level_0', 'level_1', 'level_2', 'level_3', 'level_5', 'level_8']):
    g = pd.DataFrame(g[0])
    g.columns = [n]
    subdfs.append(g)

In [12]:
df_cat = pd.concat(subdfs, axis=1)
idx = pd.MultiIndex.from_tuples(df_cat.columns, names=('OOD Classes', 'Remove OOD', 'Residual', 'Spectral Norm', 'OOD Experiment', 'Stat'))
df_cat.columns = idx
df_cat.index.names = ('Remove-Edges', '', '')
pd.set_option("display.precision", 2)
df_cat = df_cat.T.sort_index().T

In [13]:
exp_sizes = [
    1, # ood data
    1, # remove ood vertices from train
    2, # residual,
    2, # Spectral norm,
    3, # ood type
    2, # stats
]
exp_size = reduce(lambda x, y: x * y, exp_sizes)
exp_size

24

In [14]:


df_cat.iloc[:, 0 * exp_size : 1 * exp_size : 2]

Unnamed: 0_level_0,Unnamed: 1_level_0,OOD Classes,ai,ai,ai,ai,ai,ai,ai,ai,ai,ai,ai,ai
Unnamed: 0_level_1,Unnamed: 1_level_1,Remove OOD,False,False,False,False,False,False,True,True,True,True,True,True
Unnamed: 0_level_2,Unnamed: 1_level_2,Residual,False,False,False,False,False,False,False,False,False,False,False,False
Unnamed: 0_level_3,Unnamed: 1_level_3,Spectral Norm,False,False,False,True,True,True,False,False,False,True,True,True
Unnamed: 0_level_4,Unnamed: 1_level_4,OOD Experiment,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal
Unnamed: 0_level_5,Unnamed: 1_level_5,Stat,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Remove-Edges,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6
False,AUC-ROC,7 mog:no,0.433,0.67,0.998,0.39,0.62,0.998,0.419,0.74,0.997,0.396,0.73,0.998
False,AUC-ROC,gpc full:no,0.456,0.73,0.998,0.45,0.76,0.998,0.444,0.84,0.997,0.438,0.88,0.998
False,AUC-ROC,logit energy,0.583,0.83,0.249,0.57,0.7,0.305,0.6,0.88,0.289,0.599,0.75,0.283
False,AUC-ROC,max score,0.575,0.83,0.438,0.58,0.82,0.383,0.585,0.87,0.492,0.592,0.86,0.422
False,AUC-ROC,total predictive entropy,0.582,0.84,0.395,0.58,0.79,0.288,0.594,0.87,0.451,0.594,0.84,0.304
False,accuracy,,0.807,0.81,0.759,0.8,0.8,0.752,0.836,0.83,0.779,0.834,0.83,0.791
False,accuracy,id,0.812,0.78,0.798,0.81,0.79,0.79,0.839,0.86,0.827,0.837,0.85,0.83
False,accuracy,ood,0.763,,0.409,0.74,,0.414,0.811,,0.346,0.811,,0.44
True,AUC-ROC,7 mog:no,0.000317,0.41,1.0,0.0,0.38,1.0,0.0,0.42,1.0,0.0,0.44,1.0
True,AUC-ROC,gpc full:no,0.0,0.44,1.0,0.0,0.48,1.0,0.000317,0.5,1.0,0.00213,0.55,1.0


In [15]:

df_cat.iloc[:, 1 * exp_size : 2 * exp_size : 2]

Unnamed: 0_level_0,Unnamed: 1_level_0,OOD Classes,ai,ai,ai,ai,ai,ai,os,os,os,os,os,os
Unnamed: 0_level_1,Unnamed: 1_level_1,Remove OOD,True,True,True,True,True,True,False,False,False,False,False,False
Unnamed: 0_level_2,Unnamed: 1_level_2,Residual,True,True,True,True,True,True,False,False,False,False,False,False
Unnamed: 0_level_3,Unnamed: 1_level_3,Spectral Norm,False,False,False,True,True,True,False,False,False,True,True,True
Unnamed: 0_level_4,Unnamed: 1_level_4,OOD Experiment,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal
Unnamed: 0_level_5,Unnamed: 1_level_5,Stat,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Remove-Edges,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6
False,AUC-ROC,7 mog:no,0.42,0.63,0.999,0.5,0.47,1.0,0.433,0.67,0.998,0.39,0.6,0.998
False,AUC-ROC,gpc full:no,0.46,0.75,0.999,0.66,0.68,1.0,0.456,0.73,0.998,0.45,0.78,0.998
False,AUC-ROC,logit energy,0.66,0.86,0.0936,0.65,0.75,0.0656,0.583,0.94,0.249,0.57,0.76,0.305
False,AUC-ROC,max score,0.65,0.87,0.341,0.7,0.85,0.107,0.575,0.93,0.438,0.58,0.93,0.384
False,AUC-ROC,total predictive entropy,0.67,0.87,0.303,0.71,0.83,0.0373,0.582,0.94,0.395,0.58,0.92,0.288
False,accuracy,,0.82,0.81,0.743,0.82,0.82,0.756,0.807,0.81,0.759,0.8,0.8,0.753
False,accuracy,id,0.82,0.82,0.805,0.83,0.83,0.819,0.812,0.78,0.798,0.81,0.78,0.79
False,accuracy,ood,0.81,,0.189,0.74,,0.189,0.763,,0.409,0.74,,0.423
True,AUC-ROC,7 mog:no,0.09,0.53,1.0,0.34,0.41,1.0,0.000317,0.4,1.0,0.0,0.4,1.0
True,AUC-ROC,gpc full:no,0.11,0.62,1.0,0.67,0.59,1.0,0.0,0.43,1.0,0.0,0.52,1.0


In [16]:

df_cat.iloc[:, 2 * exp_size : 3 * exp_size : 2]

Unnamed: 0_level_0,Unnamed: 1_level_0,OOD Classes,os,os,os,os,os,os,os,os,os,os,os,os
Unnamed: 0_level_1,Unnamed: 1_level_1,Remove OOD,False,False,False,False,False,False,True,True,True,True,True,True
Unnamed: 0_level_2,Unnamed: 1_level_2,Residual,True,True,True,True,True,True,False,False,False,False,False,False
Unnamed: 0_level_3,Unnamed: 1_level_3,Spectral Norm,False,False,False,True,True,True,False,False,False,True,True,True
Unnamed: 0_level_4,Unnamed: 1_level_4,OOD Experiment,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal,bernoulli,loc,normal
Unnamed: 0_level_5,Unnamed: 1_level_5,Stat,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Remove-Edges,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6
False,AUC-ROC,7 mog:no,0.42,0.65,1.0,0.45,0.38,1.0,0.419,0.72,0.997,0.399,0.69,0.998
False,AUC-ROC,gpc full:no,0.42,0.68,0.999,0.59,0.56,1.0,0.444,0.84,0.997,0.44,0.86,0.998
False,AUC-ROC,logit energy,0.65,0.91,0.0778,0.59,0.73,0.072,0.6,0.91,0.289,0.599,0.79,0.283
False,AUC-ROC,max score,0.64,0.91,0.305,0.67,0.88,0.0969,0.585,0.92,0.492,0.593,0.92,0.422
False,AUC-ROC,total predictive entropy,0.66,0.92,0.263,0.67,0.86,0.0388,0.594,0.91,0.451,0.592,0.89,0.305
False,accuracy,,0.8,0.8,0.731,0.76,0.77,0.707,0.836,0.83,0.779,0.835,0.83,0.789
False,accuracy,id,0.81,0.77,0.792,0.77,0.77,0.763,0.839,0.86,0.827,0.837,0.85,0.829
False,accuracy,ood,0.73,,0.183,0.65,,0.203,0.811,,0.346,0.814,,0.434
True,AUC-ROC,7 mog:no,0.09,0.56,1.0,0.32,0.34,1.0,0.0,0.41,1.0,0.0,0.44,1.0
True,AUC-ROC,gpc full:no,0.09,0.59,1.0,0.52,0.48,1.0,0.000317,0.5,1.0,0.00213,0.54,1.0


In [17]:

df_cat.iloc[:, 3 * exp_size : 4 * exp_size : 2]

Unnamed: 0_level_0,Unnamed: 1_level_0,OOD Classes,os,os,os,os,os,os
Unnamed: 0_level_1,Unnamed: 1_level_1,Remove OOD,True,True,True,True,True,True
Unnamed: 0_level_2,Unnamed: 1_level_2,Residual,True,True,True,True,True,True
Unnamed: 0_level_3,Unnamed: 1_level_3,Spectral Norm,False,False,False,True,True,True
Unnamed: 0_level_4,Unnamed: 1_level_4,OOD Experiment,bernoulli,loc,normal,bernoulli,loc,normal
Unnamed: 0_level_5,Unnamed: 1_level_5,Stat,mean,mean,mean,mean,mean,mean
Remove-Edges,Unnamed: 1_level_6,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6
False,AUC-ROC,7 mog:no,0.42,0.64,0.999,0.49,0.46,1.0
False,AUC-ROC,gpc full:no,0.46,0.76,0.999,0.67,0.69,1.0
False,AUC-ROC,logit energy,0.66,0.89,0.0936,0.65,0.76,0.0634
False,AUC-ROC,max score,0.65,0.89,0.341,0.7,0.88,0.107
False,AUC-ROC,total predictive entropy,0.67,0.9,0.303,0.71,0.86,0.0356
False,accuracy,,0.82,0.81,0.743,0.82,0.82,0.759
False,accuracy,id,0.82,0.82,0.805,0.83,0.82,0.82
False,accuracy,ood,0.81,,0.189,0.74,,0.203
True,AUC-ROC,7 mog:no,0.09,0.53,1.0,0.33,0.38,1.0
True,AUC-ROC,gpc full:no,0.11,0.62,1.0,0.67,0.6,1.0
