In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seml
import pandas as pd
import json
from collections import defaultdict

  from tqdm.autonotebook import tqdm


In [105]:
collection_name = 'week5_density_feature_space_grid'
collection = seml.database.get_collection(collection_name)
results = [{'config' : r['config'], 'result' : r['result'], 'id' : r['_id']} for r in collection.find() if r['status'] in ('COMPLETED',)]

In [106]:
len(results)

4

In [107]:
def flatten_dict(d):
    new = dict()
    for k, v in d.items():
        if isinstance(v, dict):
            for kk, vv in flatten_dict(v).items():
                new[k + '.' + kk] = vv
        else:
            new[k] = v
    return new
    

In [109]:
# Collect everything into a data frame
df = defaultdict(list)
metrics_list = []
for result in results:
    for k, v in flatten_dict(result['config']).items():
        df[k].append(v)
    with open(result['result']) as f:
        metrics = json.load(f)
        metrics_dict = dict()
        for metric, values in metrics.items():
            metrics_dict[metric + '.mean'] = (np.array(values).mean())
            metrics_dict[metric + '.std'] = (np.array(values).std())
        metrics_list.append(metrics_dict)

metric_names = set()
for metrics_dict in metrics_list:
    metric_names.update(metrics_dict.keys())
for metrics_dict in metrics_list:
    for metric in metric_names:
        df[metric].append(metrics_dict.get(metric, np.nan))
        
df = pd.DataFrame(df)
df.index = [f"removeOOD:{str(df.loc[i]['data.train_labels_remove_other'])[0]}-residual:{str(df.loc[i]['model.residual'])[0]}" for i in df.index]

In [191]:
def get_hyperparams(name):
    name = name.split('.')[0].replace('auroc_', '')
    tokens = name.split('-')
    density_type = re.match('([a-zA-Z]+)[0-9]*', tokens[0]).groups()[0]
    if density_type == 'mog':
        num_components = int(tokens[0].replace('mog', ''))
    elif density_type == 'gpc':
        num_components = np.nan
    else:
        raise RuntimeError(f'Dont understand density {density_type}')
    if tokens[1] == 'no':
        dim_red = None
        dim_red_dim = np.nan
        per_class = np.nan
        diagonal_covariance = np.nan
    else:
        dim_red_dim, dim_red = re.match('([0-9]+)([a-zA-Z]+)', tokens[1]).groups()
        dim_red_dim = int(dim_red_dim)
        if density_type == 'gpc':
            per_class = tokens[2][2:] == 'T'
            diagonal_covariance = tokens[3][1:] == 'T'
            tokens = tokens[:2] + tokens[4:]
        else:
            per_class = np.nan
            diagonal_covariance = np.nan
        
    fit_to = re.match('.*\[(.*)\].*', '-'.join(tokens[2:])).groups()[0]
    return {
      'density' : density_type, 'num-components' : num_components,
        'dimensionality-reduction' : dim_red, 'dimensionality' : dim_red_dim,
        'per-class' : per_class, 'diagonal-covariance' : diagonal_covariance,
        'fit-to' : fit_to
    }


df_auroc = df[[metric for metric in df.columns if 'auroc' in metric]].T.reset_index()
N, _ = df_auroc.shape
auroc_dict = df_auroc.to_dict()
by_index = defaultdict(lambda: defaultdict(dict))
for i in range(N):
    index, type_ = auroc_dict['index'][i].split('.')
    for col in auroc_dict.keys():
        by_index[index][type_][col] = auroc_dict[col][i]

new_dict = defaultdict(list)
for hps, m in by_index.items():
    for k in m['std'].keys():
        if k == 'index': continue
        new_dict[k + '.mean'].append(m['mean'][k])
        new_dict[k + '.std'].append(m['std'][k])
    for hp, value in get_hyperparams(hps).items():
        new_dict[hp].append(value)
df_auroc = pd.DataFrame(dict(new_dict))   

df_auroc['auroc_mean'] = df_auroc[[col for col in df_auroc.columns if '.mean' in col]].mean(axis=1)
df_auroc = df_auroc.sort_values(by=['auroc_mean'], ascending=False).drop('auroc_mean', axis=1)

df_auroc = df_auroc.round(2)
for exp in set(col.replace('.mean', '') for col in df_auroc.columns if '.mean' in col):
    df_auroc[exp] = df_auroc[exp + '.mean'].astype(str) + ' ± ' + df_auroc[exp + '.std'].astype(str)
    df_auroc = df_auroc.drop(exp + '.mean', axis=1)
    df_auroc = df_auroc.drop(exp + '.std', axis=1)

In [192]:
df_auroc

Unnamed: 0,density,num-components,dimensionality-reduction,dimensionality,per-class,diagonal-covariance,fit-to,removeOOD:F-residual:F,removeOOD:T-residual:F,removeOOD:T-residual:T,removeOOD:F-residual:T
230,gpc,,isomap,32.0,False,False,train,0.7 ± 0.1,0.73 ± 0.06,0.76 ± 0.09,0.73 ± 0.11
190,gpc,,isomap,8.0,False,False,train,0.7 ± 0.1,0.71 ± 0.08,0.76 ± 0.09,0.72 ± 0.13
268,gpc,,isomap,10.0,False,False,train,0.71 ± 0.11,0.71 ± 0.08,0.75 ± 0.1,0.72 ± 0.13
107,gpc,,pca,6.0,False,True,train,0.73 ± 0.08,0.66 ± 0.08,0.73 ± 0.09,0.75 ± 0.07
213,gpc,,isomap,6.0,False,False,train,0.69 ± 0.1,0.69 ± 0.09,0.76 ± 0.09,0.73 ± 0.11
...,...,...,...,...,...,...,...,...,...,...,...
73,gpc,,pca,10.0,True,True,train-val-reduced-test-reduced,nan ± nan,0.13 ± 0.05,0.13 ± 0.04,nan ± nan
156,gpc,,pca,8.0,True,False,train-val-reduced-test-reduced,nan ± nan,0.12 ± 0.05,0.13 ± 0.04,nan ± nan
234,gpc,,pca,8.0,True,True,train-val-reduced-test-reduced,nan ± nan,0.11 ± 0.04,0.12 ± 0.03,nan ± nan
280,gpc,,pca,6.0,True,False,train-val-reduced-test-reduced,nan ± nan,0.1 ± 0.04,0.12 ± 0.04,nan ± nan


In [102]:
data = df.to_dict()
data['']

dict_keys(['overwrite', 'db_collection', 'data.dataset', 'data.num_dataset_splits', 'data.split_type', 'data.test_portion', 'data.test_portion_fixed', 'data.train_labels', 'data.train_labels_remove_other', 'data.train_portion', 'data.val_labels', 'data.val_portion', 'evaluation.pipeline', 'model.activation', 'model.freeze_residual_projection', 'model.hidden_sizes', 'model.leaky_relu_slope', 'model.model_type', 'model.num_initializations', 'model.residual', 'model.use_bias', 'model.use_spectral_norm', 'model.weight_scale', 'run.args', 'run.name', 'training.early_stopping.min_delta', 'training.early_stopping.mode', 'training.early_stopping.monitor', 'training.early_stopping.patience', 'training.gpus', 'training.learning_rate', 'training.max_epochs', 'seed', 'auroc_mog6-8isomap-ft[train].std', 'auroc_gpc-16isomap-pcT-dF-f[train-val-reduced-test-reduced].std', 'auroc_gpc-2isomap-pcF-dT-f[train-val-reduced-test-reduced].mean', 'auroc_mog3-32pca-ft[train].std', 'auroc_mog6-32pca-ft[train].st

In [173]:
#mean_auroc = 
# df_auroc = df_auroc.round(2).sort_values(by=[col for col in df_auroc.columns if '.mean' in col], key=lambda row: row.mean(), ascending=False)  


In [197]:
pd.set_option('display.max_rows', None)
df_auroc

Unnamed: 0,density,num-components,dimensionality-reduction,dimensionality,per-class,diagonal-covariance,fit-to,removeOOD:F-residual:F,removeOOD:T-residual:F,removeOOD:T-residual:T,removeOOD:F-residual:T
230,gpc,,isomap,32.0,False,False,train,0.7 ± 0.1,0.73 ± 0.06,0.76 ± 0.09,0.73 ± 0.11
190,gpc,,isomap,8.0,False,False,train,0.7 ± 0.1,0.71 ± 0.08,0.76 ± 0.09,0.72 ± 0.13
268,gpc,,isomap,10.0,False,False,train,0.71 ± 0.11,0.71 ± 0.08,0.75 ± 0.1,0.72 ± 0.13
107,gpc,,pca,6.0,False,True,train,0.73 ± 0.08,0.66 ± 0.08,0.73 ± 0.09,0.75 ± 0.07
213,gpc,,isomap,6.0,False,False,train,0.69 ± 0.1,0.69 ± 0.09,0.76 ± 0.09,0.73 ± 0.11
111,gpc,,isomap,16.0,False,False,train,0.7 ± 0.09,0.69 ± 0.1,0.76 ± 0.08,0.72 ± 0.13
138,gpc,,isomap,32.0,False,True,train,0.67 ± 0.11,0.68 ± 0.07,0.75 ± 0.08,0.73 ± 0.11
0,mog,6.0,isomap,8.0,,,train,0.63 ± 0.14,0.69 ± 0.09,0.76 ± 0.09,0.72 ± 0.1
68,mog,2.0,isomap,32.0,,,train,0.66 ± 0.14,0.68 ± 0.1,0.74 ± 0.09,0.72 ± 0.13
145,mog,7.0,isomap,8.0,,,train,0.64 ± 0.13,0.68 ± 0.08,0.75 ± 0.09,0.72 ± 0.12
