In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seml
import pandas as pd
import json
from collections import defaultdict
from functools import reduce
import seaborn as sns
from scipy.stats import binned_statistic
from matplotlib.lines import Line2D

In [2]:
%cd ..

/nfs/homedirs/fuchsgru/MastersThesis


In [3]:
import data.constants as dc

In [4]:
collection_name = 'week18_dropout_all_datasets'
collection = seml.database.get_collection(collection_name)
experiments = [{'config' : r['config'], 'result' : r['result'], 'id' : r['_id']} for r in collection.find() if r['status'] in ('COMPLETED',)]
for ex in experiments:
    # print(ex['result'].keys())
    ex['metrics'] = ex['result']['results']

In [5]:
experiments_killed = [r for r in collection.find() if r['status'] in ('KILLED',)]

In [6]:
set(e['config']['data']['dataset'] for e in experiments_killed)

set()

In [7]:
experiments[0]['metrics'].keys()

dict_keys(['accuracy_val_val', 'accuracy_id_val_val', 'accuracy_ood_val_val', 'accuracy_val_no-edges_val', 'accuracy_id_val_no-edges_val', 'accuracy_ood_val_no-edges_val', 'ece_val_val', 'ece_val_no-edges_val', 'accuracy_ood-val_loc-no-edges_val', 'accuracy_id_ood-val_loc-no-edges_val', 'accuracy_ood_ood-val_loc-no-edges_val', 'ood_auroc_total-predictive-entropy_loc-no-edges_val', 'ood_aucpr_total-predictive-entropy_loc-no-edges_val', 'misclassification_auroc_total-predictive-entropy_loc-no-edges_val', 'misclassification_aucpr_total-predictive-entropy_loc-no-edges_val', 'ood_auroc_max-score_loc-no-edges_val', 'ood_aucpr_max-score_loc-no-edges_val', 'misclassification_auroc_max-score_loc-no-edges_val', 'misclassification_aucpr_max-score_loc-no-edges_val', 'ood_auroc_expected-softmax-entropy_loc-no-edges_val', 'ood_aucpr_expected-softmax-entropy_loc-no-edges_val', 'misclassification_auroc_expected-softmax-entropy_loc-no-edges_val', 'misclassification_aucpr_expected-softmax-entropy_loc-no

In [8]:
METRIC = 'Metric'
OOD_AUROC = 'OOD AUC-ROC'
OOD_AUCPR = 'OOD AUC-PR'
MISCLASSICIFACTION_AUROC = 'Misclassification AUC-ROC'
MISCLASSICIFACTION_AUCPR = 'Misclassification AUC-PR'

DATASET = 'Dataset'

SETTING = 'Setting'
HYBRID = 'Inductive'
TRANSDUCTIVE = 'Transductive'
LOC = 'Leave Out Classes'
NORMAL = 'Normal'
BERNOULLI = 'Bernoulli'
NO_EDGES = 'No Edges'
PROXY = 'Proxy'
EXPERIMENT = 'Experiment'

OOD_TYPE = 'OOD Type'


RESIDUAL = 'Residual'
SPECTRAL_NORM = 'Spectral Norm'
WEIGHT_SCALE = 'Weight Scale'
LOWER_LIPSCHITZ = 'Empirical Lower Lipschitz Bound'
UPPER_LIPSCHITZ = 'Empirical Upper Lipschitz Bound'



EPISTEMIC = 'Epistemic'
ALEATORIC = 'Aleatoric'

ACCURACY = 'Accuracy'
ACCURACY_ID = 'In-distribution Accuracy'
ECE = 'Expected Calibration Error'

MODEL = 'Model'
DROPOUT = 'Dropout'
DROP_EDGE = 'Drop Edge'

In [9]:
set(ex['config']['model']['residual'] for ex in experiments), \
    set(ex['config']['model']['use_spectral_norm'] for ex in experiments)

({False}, {False})

In [10]:
metrics = set()
for ex in experiments:
    for m in [m for m in ex['metrics'].keys() if 'accuracy' in m]:
        metrics.add(m)

list(metrics)

['accuracy_normal-test_normal-no-edges_test',
 'accuracy_id_ood-test_loc-no-edges_test',
 'accuracy_ood_ood-test_loc_test',
 'accuracy_id_ber-test_ber-no-edges_test',
 'accuracy_id_ber-val_ber-no-edges_val',
 'accuracy_id_normal-val_normal-no-edges_val',
 'accuracy_ood_test_test',
 'accuracy_ood_ood-val_loc_val',
 'accuracy_ber-test_ber-no-edges_test',
 'accuracy_ood_val_no-edges_val',
 'accuracy_ood_ood-val_loc-no-edges_val',
 'accuracy_ber-val_ber-no-edges_val',
 'accuracy_ood_ber-val_ber_val',
 'accuracy_ood_val_val',
 'accuracy_id_normal-test_normal_test',
 'accuracy_ood_normal-val_normal-no-edges_val',
 'accuracy_ood_ber-val_ber-no-edges_val',
 'accuracy_id_normal-val_normal_val',
 'accuracy_ood-val_loc-no-edges_val',
 'accuracy_ood_test_no-edges_test',
 'accuracy_ood-val_loc_val',
 'accuracy_ber-val_ber_val',
 'accuracy_normal-val_normal-no-edges_val',
 'accuracy_ber-test_ber_test',
 'accuracy_ood-test_loc-no-edges_test',
 'accuracy_normal-test_normal_test',
 'accuracy_ood_ber-te

In [11]:
mode = 'test'

In [12]:
data_proxy, data_acc_ece = [], []
for ex in experiments:
    cfg = ex['config']
    if cfg['model']['dropout'] > 0:
        model = DROPOUT
    elif cfg['model']['drop_edge'] > 0:
        model = DROP_EDGE
    else:
        raise RuntimeError
    base = {
        SETTING : {dc.HYBRID : HYBRID, dc.TRANSDUCTIVE : TRANSDUCTIVE}[cfg['data']['setting']],
        DATASET : cfg['data']['dataset'],
        MODEL : model,
    }
    data_acc_ece.append(base | {
        OOD_TYPE : cfg['data']['ood_type'],
        ACCURACY : ex['metrics'][f'accuracy_{mode}_{mode}'][0],
        ECE : ex['metrics'][f'ece_{mode}_{mode}'][0]['value'],
    })
    if cfg['data']['ood_type'] == dc.PERTURBATION:
        ood_types = (
            (BERNOULLI, 'ber'),
            (NORMAL, 'normal'),
        )
    elif cfg['data']['ood_type'] == dc.LEFT_OUT_CLASSES:
        ood_types = (
            (LOC, 'loc'),
        )
    else:
        raise ValueError(cfg['data']['ood_type'])
    
    for no_edge_suffix, no_edges in (('-no-edges', True), ('', False)):
    
        for ood_type, ood_name in ood_types:
            for proxy, proxy_name in ((ALEATORIC, 'expected-softmax-entropy'), (EPISTEMIC, 'mutual-information')):
                data_proxy += [
                    base | {
                        EXPERIMENT : ood_type,
                        PROXY : proxy,
                        OOD_AUROC : ex['metrics'][f'ood_auroc_{proxy_name}_{ood_name}{no_edge_suffix}_{mode}'][0]['value'],
                        OOD_AUCPR : ex['metrics'][f'ood_aucpr_{proxy_name}_{ood_name}{no_edge_suffix}_{mode}'][0]['value'],
                        MISCLASSICIFACTION_AUROC : ex['metrics'][f'misclassification_auroc_{proxy_name}_{ood_name}{no_edge_suffix}_{mode}'][0]['value'],
                        MISCLASSICIFACTION_AUCPR : ex['metrics'][f'misclassification_aucpr_{proxy_name}_{ood_name}{no_edge_suffix}_{mode}'][0]['value'],
                        NO_EDGES : no_edges,
                    }
                ]
    
        
        
    
df_proxy = pd.DataFrame(data_proxy)
df_acc_ece = pd.DataFrame(data_acc_ece)

In [13]:
def agg_mean_and_std(group):
    mean = group.mean()
    std = group.std()
    return f'{mean:.2f} ± {std:.2f}'
    return mean

df_proxy_agg = df_proxy.groupby([SETTING, MODEL, DATASET, EXPERIMENT, NO_EDGES, PROXY]).agg(agg_mean_and_std)

In [14]:
df_tmp = df_proxy_agg.reset_index()
df_tmp = df_tmp.melt(id_vars=[MODEL, SETTING, DATASET, EXPERIMENT, NO_EDGES, PROXY], value_vars = [OOD_AUROC, OOD_AUCPR, MISCLASSICIFACTION_AUROC, MISCLASSICIFACTION_AUCPR], var_name='Metric')
df_tmp = df_tmp[(df_tmp[METRIC] == OOD_AUROC) | (df_tmp[METRIC] == OOD_AUCPR)]
df_tmp = df_tmp.pivot(index=[SETTING, DATASET, MODEL, PROXY], columns=[METRIC, EXPERIMENT, NO_EDGES]).T.sort_index().T
df_tmp.to_csv('~/dropout_all_datasets_ood_detection.csv')

In [15]:
df_tmp

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,value,value,value,value,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Metric,OOD AUC-PR,OOD AUC-PR,OOD AUC-PR,OOD AUC-PR,OOD AUC-PR,OOD AUC-PR,OOD AUC-ROC,OOD AUC-ROC,OOD AUC-ROC,OOD AUC-ROC,OOD AUC-ROC,OOD AUC-ROC
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Experiment,Bernoulli,Bernoulli,Leave Out Classes,Leave Out Classes,Normal,Normal,Bernoulli,Bernoulli,Leave Out Classes,Leave Out Classes,Normal,Normal
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,No Edges,False,True,False,True,False,True,False,True,False,True,False,True
Setting,Dataset,Model,Proxy,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4
Inductive,amazon_photo,Drop Edge,Aleatoric,0.50 ± 0.02,1.00 ± 0.00,0.61 ± 0.09,0.36 ± 0.04,0.49 ± 0.02,0.66 ± 0.04,0.50 ± 0.01,1.00 ± 0.00,0.74 ± 0.07,0.62 ± 0.03,0.50 ± 0.01,0.68 ± 0.04
Inductive,amazon_photo,Drop Edge,Epistemic,0.49 ± 0.01,0.30 ± 0.02,0.44 ± 0.07,0.19 ± 0.02,0.49 ± 0.02,0.37 ± 0.04,0.48 ± 0.01,0.00 ± 0.00,0.52 ± 0.05,0.33 ± 0.03,0.49 ± 0.02,0.26 ± 0.04
Inductive,amazon_photo,Dropout,Aleatoric,0.47 ± 0.03,1.00 ± 0.00,0.48 ± 0.10,0.39 ± 0.06,0.49 ± 0.04,0.52 ± 0.05,0.50 ± 0.02,1.00 ± 0.00,0.69 ± 0.07,0.65 ± 0.04,0.50 ± 0.02,0.51 ± 0.04
Inductive,amazon_photo,Dropout,Epistemic,0.46 ± 0.02,0.30 ± 0.02,0.21 ± 0.04,0.23 ± 0.02,0.49 ± 0.03,0.62 ± 0.06,0.48 ± 0.01,0.00 ± 0.00,0.37 ± 0.06,0.46 ± 0.03,0.50 ± 0.02,0.61 ± 0.04
Inductive,citeseer,Drop Edge,Aleatoric,0.75 ± 0.01,1.00 ± 0.00,0.66 ± 0.11,0.46 ± 0.09,0.59 ± 0.02,0.53 ± 0.03,0.55 ± 0.03,1.00 ± 0.01,0.82 ± 0.06,0.66 ± 0.07,0.21 ± 0.02,0.10 ± 0.03
Inductive,citeseer,Drop Edge,Epistemic,0.67 ± 0.03,0.51 ± 0.02,0.15 ± 0.01,0.19 ± 0.03,0.90 ± 0.03,0.97 ± 0.01,0.45 ± 0.04,0.00 ± 0.00,0.14 ± 0.04,0.32 ± 0.06,0.79 ± 0.04,0.94 ± 0.02
Inductive,citeseer,Dropout,Aleatoric,0.74 ± 0.02,0.99 ± 0.00,0.59 ± 0.08,0.45 ± 0.09,0.62 ± 0.03,0.52 ± 0.02,0.54 ± 0.03,0.98 ± 0.01,0.77 ± 0.05,0.66 ± 0.06,0.31 ± 0.03,0.06 ± 0.02
Inductive,citeseer,Dropout,Epistemic,0.68 ± 0.04,0.52 ± 0.02,0.18 ± 0.02,0.20 ± 0.02,0.79 ± 0.06,0.98 ± 0.01,0.44 ± 0.04,0.03 ± 0.01,0.33 ± 0.07,0.37 ± 0.05,0.65 ± 0.06,0.96 ± 0.03
Inductive,coauthor_cs,Drop Edge,Aleatoric,0.55 ± 0.01,1.00 ± 0.00,0.26 ± 0.07,0.27 ± 0.03,0.55 ± 0.01,0.67 ± 0.02,0.51 ± 0.01,1.00 ± 0.00,0.65 ± 0.10,0.71 ± 0.04,0.50 ± 0.01,0.61 ± 0.02
Inductive,coauthor_cs,Drop Edge,Epistemic,0.55 ± 0.02,0.34 ± 0.01,0.18 ± 0.03,0.10 ± 0.01,0.57 ± 0.01,0.51 ± 0.02,0.51 ± 0.02,0.00 ± 0.00,0.41 ± 0.02,0.28 ± 0.04,0.55 ± 0.02,0.39 ± 0.03


In [16]:
df_tmp = df_proxy_agg.reset_index()
df_tmp = df_tmp.melt(id_vars=[MODEL, SETTING, DATASET, EXPERIMENT, NO_EDGES, PROXY], value_vars = [OOD_AUROC, OOD_AUCPR, MISCLASSICIFACTION_AUROC, MISCLASSICIFACTION_AUCPR], var_name='Metric')
df_tmp = df_tmp[(df_tmp[METRIC] == MISCLASSICIFACTION_AUROC) | (df_tmp[METRIC] == MISCLASSICIFACTION_AUCPR)]
df_tmp = df_tmp.pivot(index=[SETTING, DATASET, MODEL, PROXY], columns=[METRIC, EXPERIMENT, NO_EDGES]).T.sort_index().T
df_tmp.to_csv('~/dropout_all_datasets_misclassification_detection.csv')