In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seml
import pandas as pd
import json
from collections import defaultdict
from functools import reduce
import seaborn as sns
from itertools import product
from scipy.stats import binned_statistic
from matplotlib.lines import Line2D

In [30]:
%cd ..

/nfs/homedirs/fuchsgru/MastersThesis


In [31]:
import data.constants as dc

In [9]:
SETTING = 'Setting'
RESIDUAL = 'Residual'
ACCURACY = 'Accuracy'
ECE = 'Expected Calibration Error'
WEIGHT_SCALE = 'Weight Scale'
LIPSCHITZ = 'Median Empirical Lipschitz Constant'
LIPSCHITZ_MAX = 'Empirical Upper Lipschitz Constant'
LIPSCHITZ_MIN = 'Empirical Lower Lipschitz Constant'
OOD = 'Out of Distribution Setting'
PROXY = 'Proxy'
LOC = 'Leave out Classes'
BERNOULLI = 'Bernoulli'
NORMAL = 'Normal'
GPC = 'Feature Density'
MAX_SCORE = 'Max Score'
AUROC = 'AUC-ROC'
TRANSDUCTIVE = 'Transductive'
HYBRID = 'Hybrid'
NO_EDGES = 'Remove Edges'

Number of finished experiments : 100


In [8]:
baselines = {} # Aggregate all results here

### Ensemble of GCNs

In [10]:
collection_name_ensemble = 'week15_ensemble'
collection_ensemble = seml.database.get_collection(collection_name_ensemble)
experiments_ensemble = [{'config' : r['config'], 'result' : r['result']['results'], 'id' : r['_id']} for r in collection_ensemble.find() if r['status'] in ('COMPLETED',)]

print(f'Number of finished experiments : {len(experiments_ensemble)}')

Number of finished experiments : 100


In [34]:
results_ensemble = []
for ex in experiments_ensemble:
    config = ex['config']
    metrics = {}
    for k, vs in ex['result'].items():
        if len(vs) != 1:
            raise RuntimeError(f'{k} : {vs}')
        v = vs[0]
        if isinstance(v, dict):
            v = v['value']
        metrics[k] = v
    
    base = {
        SETTING : {dc.TRANSDUCTIVE : TRANSDUCTIVE, dc.HYBRID : HYBRID}[config['data']['setting']],
        LIPSCHITZ : metrics['empirical_lipschitz_val_slope_median_perturbation_noise'],
        LIPSCHITZ_MAX : metrics['empirical_lipschitz_val_slope_max_perturbation_noise'],
        LIPSCHITZ_MIN : metrics['empirical_lipschitz_val_slope_min_perturbation_noise'],
    }
    results_ensemble.append({
        ECE : metrics['ece_val'],
        ACCURACY : metrics['accuracy_val'],
    } | base)
    for k, v in metrics.items():
        if '-no-edges' in k or '_no-edges' in k:
            k = k.replace('-no-edges', '').replace('_no-edges', '')
            no_edges = True
        else:
            no_edges = False
        
        # AUROCs
        if not 'auroc' in k:
            continue
        
        print(k)
            
        if k.endswith('loc'):
            ood = LOC
        elif k.endswith('ber'):
            ood = BERNOULLI
        elif k.endswith('normal'):
            ood = NORMAL
    break
results_ensemble

auroc_total-predictive-entropy_loc
auroc_max-score_loc
auroc_expected-softmax-entropy_loc
auroc_mutual-information_loc
auroc_predicted-class-variance_loc
auroc_logit-energy_loc
auroc_total-predictive-entropy_loc
auroc_max-score_loc
auroc_expected-softmax-entropy_loc
auroc_mutual-information_loc
auroc_predicted-class-variance_loc
auroc_logit-energy_loc


[{'Expected Calibration Error': 0.14169385348046526,
  'Accuracy': 0.8357142806053162,
  'Setting': 'Transductive',
  'Median Empirical Lipschitz Constant': 0.11281829274418162,
  'Empirical Upper Lipschitz Constant': 0.755159330368042,
  'Empirical Lower Lipschitz Constant': 0.0036934067858225205}]

In [6]:
experiments[0]['result']

{'val_member_0_cross_entropy-val-4': [0.5912452340126038],
 'val_member_0_accuracy-val-4': [0.8357142806053162],
 'val_member_0_loss-val-4': [0.5912452340126038],
 'val_member_1_cross_entropy-val-4': [0.5859426259994507],
 'val_member_1_accuracy-val-4': [0.8500000238418579],
 'val_member_1_loss-val-4': [0.5859426259994507],
 'val_member_2_cross_entropy-val-4': [0.5993340611457825],
 'val_member_2_accuracy-val-4': [0.8357142806053162],
 'val_member_2_loss-val-4': [0.5993340611457825],
 'val_member_3_cross_entropy-val-4': [0.6111315488815308],
 'val_member_3_accuracy-val-4': [0.8428571224212646],
 'val_member_3_loss-val-4': [0.6111315488815308],
 'val_member_4_cross_entropy-val-4': [0.5939404368400574],
 'val_member_4_accuracy-val-4': [0.8357142806053162],
 'val_member_4_loss-val-4': [0.5939404368400574],
 'ensemble_accuracy-val-4': [0.8357142806053162],
 'accuracy_val': [0.8357142806053162],
 'accuracy_id_val': [0.8357142806053162],
 'accuracy_ood_val': [nan],
 'accuracy_val_no-edges': 

In [7]:
experiments[0]['config']

{'overwrite': 1,
 'db_collection': 'week15_ensemble',
 'data': {'base_labels': ['Artificial_Intelligence/Machine_Learning/Case-Based',
   'Artificial_Intelligence/Machine_Learning/Theory',
   'Artificial_Intelligence/Machine_Learning/Genetic_Algorithms',
   'Artificial_Intelligence/Machine_Learning/Probabilistic_Methods',
   'Artificial_Intelligence/Machine_Learning/Neural_Networks',
   'Artificial_Intelligence/Machine_Learning/Rule_Learning',
   'Artificial_Intelligence/Machine_Learning/Reinforcement_Learning',
   'Operating_Systems/Distributed',
   'Operating_Systems/Memory_Management',
   'Operating_Systems/Realtime',
   'Operating_Systems/Fault_Tolerance'],
  'corpus_labels': ['Artificial_Intelligence/Machine_Learning/Case-Based',
   'Artificial_Intelligence/Machine_Learning/Theory',
   'Artificial_Intelligence/Machine_Learning/Genetic_Algorithms',
   'Artificial_Intelligence/Machine_Learning/Probabilistic_Methods',
   'Artificial_Intelligence/Machine_Learning/Neural_Networks',
   