In [67]:
import os
import pandas as pd

def load_results(experiment_folder):
  collection = {}
  for file in os.listdir(experiment_folder):
    if not file.endswith('.csv'):
      continue
    
    if "CIFAR10" in file:
      dataset = "CIFAR10"
    elif "Fashion MNIST" in file:
      dataset = "Fashion MNIST"
    elif "Motor Imagery BCI" in file:
      dataset = "Motor Imagery BCI"
    elif "Wine" in file:
      dataset = "Wine"
    else:
      dataset = None
        
    if "Deep Ensemble" in file:
      model = "Deep Ensemble"
    elif "MC-Dropout" in file:
      model = "MC-Dropout"
    elif "MC-DropConnect" in file:
      model = "MC-DropConnect"
    elif "Flipout" in file:
      model = "Flipout"
    else:
      model = None
  
    if "gaussian_logits" in file:
      disentanglement = "gaussian_logits"
    else:
      disentanglement = "information_theoretic"
    
    df = pd.read_csv(os.path.join(experiment_folder, file))
    
    if dataset and model:
      if dataset in collection:
        if model+"_"+disentanglement in collection[dataset]:
              collection[dataset][model+"_"+disentanglement].append(df)
        else:
          collection[dataset][model+"_"+disentanglement] = [df]
      else:
        if disentanglement == "gaussian_logits":
          collection[dataset] = {model + "_gaussian_logits": [df],
                                 model + "_information_theoretic": []
                                 }
        else:
          collection[dataset] = {model + "_gaussian_logits": [],
                                 model + "_information_theoretic": [df]
                                 }
  return collection
decreasing_dataset_collection = load_results(experiment_folder="./results_data_old_2/decreasing_dataset")
label_noise_collection = load_results(experiment_folder="./results_data_old_2/label_noise")

In [74]:
import numpy as np
import itertools

def calculate_disentanglement_error(decreasing_dataset_collection, label_noise_collection, dataset, model):
  disentanglement_errors = []
  for df_dataset, df_label_noise in zip(decreasing_dataset_collection[dataset][model], label_noise_collection[dataset][model]):
    ale_cor_dataset = df_dataset['accuracies'].corr(df_dataset['aleatoric_uncertainties'] * -1)
    epi_cor_dataset = df_dataset['accuracies'].corr(df_dataset['epistemic_uncertainties']* -1)
    
    ale_cor_label_noise = df_label_noise['accuracies'].corr(df_label_noise['aleatoric_uncertainties'] * -1)
    epi_cor_label_noise = df_label_noise['accuracies'].corr(df_label_noise['epistemic_uncertainties'] * -1)
    
    disentanglement_error = np.mean([
      np.abs(0 - ale_cor_dataset),
      np.abs(1 - epi_cor_dataset ),
      np.abs(1 - ale_cor_label_noise),
      np.abs(0 - epi_cor_label_noise),
    ]
    )
    disentanglement_errors.append(disentanglement_error)
    
  # print(f"{np.mean(disentanglement_errors):.3f}+-{(2 * np.std(disentanglement_errors)) / len(disentanglement_errors):.3f}")
  return np.mean(disentanglement_errors), (2 * np.std(disentanglement_errors)) / 5

  
datasets = list(decreasing_dataset_collection.keys())
models = list(decreasing_dataset_collection[datasets[0]].keys())

final_results_mean = {model: [] for model in models}
final_results_sem = {model: [] for model in models}
for dataset, model in itertools.product(datasets, models):
  dis_err_mean, dis_err_sem = calculate_disentanglement_error(decreasing_dataset_collection, label_noise_collection, dataset, model)
  final_results_mean[model].append(dis_err_mean)
  final_results_sem[model].append(dis_err_sem)
  print(f"Disentanglement Error for {dataset}, {model}: {np.mean(dis_err_mean):.3f} \pm {dis_err_sem:.3f}")
  
for model, means in final_results_mean.items():
  print(f"Mean Disentanglement Error for {model}: {np.mean(means):.3f}")
  
for model, sem in final_results_sem.items():
  print(f"2SEM Disentanglement Error for {model}: {np.mean(sem):.3f}")
  
# calculate_disentanglement_error(decreasing_dataset_collection, label_noise_collection, "CIFAR10", "Deep Ensemble_gaussian_logits")
# calculate_disentanglement_error(decreasing_dataset_collection, label_noise_collection, "CIFAR10", "Deep Ensemble_information_theoretic")
# calculate_disentanglement_error(decreasing_dataset_collection, label_noise_collection, "CIFAR10", "Flipout_gaussian_logits")
# calculate_disentanglement_error(decreasing_dataset_collection, label_noise_collection, "CIFAR10", "Flipout_information_theoretic")


Disentanglement Error for Fashion MNIST, Deep Ensemble_gaussian_logits: 0.439 \pm 0.013
Disentanglement Error for Fashion MNIST, Deep Ensemble_information_theoretic: 0.363 \pm 0.030
Disentanglement Error for Fashion MNIST, MC-Dropout_information_theoretic: 0.294 \pm 0.032
Disentanglement Error for Fashion MNIST, MC-Dropout_gaussian_logits: 0.673 \pm 0.034
Disentanglement Error for Fashion MNIST, Flipout_information_theoretic: 0.404 \pm 0.016
Disentanglement Error for Fashion MNIST, MC-DropConnect_gaussian_logits: 0.399 \pm 0.022
Disentanglement Error for Fashion MNIST, MC-DropConnect_information_theoretic: 0.320 \pm 0.035
Disentanglement Error for Fashion MNIST, Flipout_gaussian_logits: 0.480 \pm 0.004
Disentanglement Error for Wine, Deep Ensemble_gaussian_logits: 0.524 \pm 0.054
Disentanglement Error for Wine, Deep Ensemble_information_theoretic: 0.342 \pm 0.022
Disentanglement Error for Wine, MC-Dropout_information_theoretic: 0.645 \pm 0.041
Disentanglement Error for Wine, MC-Dropout

In [58]:
print(label_noise_collection['CIFAR10'].keys())
print(decreasing_dataset_collection['CIFAR10'].keys())


dict_keys(['MC-Dropout_gaussian_logits', 'MC-Dropout_information_theoretic', 'Flipout_gaussian_logits', 'Flipout_information_theoretic', 'Deep Ensemble_gaussian_logits', 'Deep Ensemble_information_theoretic'])
dict_keys(['Deep Ensemble_gaussian_logits', 'Deep Ensemble_information_theoretic', 'Flipout_gaussian_logits', 'Flipout_information_theoretic'])
