In [7]:
import sys, os
import warnings
from collections import defaultdict

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [12]:
folders   = [
    "tests/results/MNIST-OOD",
    # "tests/results/R-MNIST",
    # "tests/results/CIFAR-10-C",
    "tests/results/CIFAR-10-OOD"
]
methods = ["map", "ensemble", "csghmc", "swg", "laplace_all", "laplace_last_layer", "subspace", "swag_laplace", "bbb"]

def get_buckets(folders, methods):
    buckets = []
    for folder in folders:
        bucket = defaultdict(list)
        for fname in os.listdir(folder):
            hit = next((m for m in methods if fname.startswith(m)), None)
            if hit:
                bucket[hit].append(os.path.join(folder, fname))
        buckets.append(bucket)
    return buckets

buckets = get_buckets(folders, methods)

In [13]:
buckets

[defaultdict(list,
             {'ensemble': ['tests/results/MNIST-OOD/ensemble_13.npy',
               'tests/results/MNIST-OOD/ensemble_6.npy',
               'tests/results/MNIST-OOD/ensemble_12.npy',
               'tests/results/MNIST-OOD/ensemble_972394.npy',
               'tests/results/MNIST-OOD/ensemble_523.npy'],
              'map': ['tests/results/MNIST-OOD/map_13.npy',
               'tests/results/MNIST-OOD/map_12.npy',
               'tests/results/MNIST-OOD/map_972394.npy',
               'tests/results/MNIST-OOD/map_6.npy',
               'tests/results/MNIST-OOD/map_523.npy'],
              'laplace_last_layer': ['tests/results/MNIST-OOD/laplace_last_layer_kron_523.npy',
               'tests/results/MNIST-OOD/laplace_last_layer_kron_13.npy',
               'tests/results/MNIST-OOD/laplace_last_layer_kron_6.npy',
               'tests/results/MNIST-OOD/laplace_last_layer_kron_12.npy',
               'tests/results/MNIST-OOD/laplace_last_layer_kron_972394.npy'],
     

In [16]:
def get_results(buckets, methods):
    tables=[]
    for bucket in buckets:
        results = []
        for method in methods:
            runs = bucket.get(method, [])
            conf_runs = []
            auroc_runs = []
            time_runs = []
            for path in runs:
                arr = np.load(path, allow_pickle=True)[1:]
                conf_runs.append(np.mean([d["conf"] for d in arr]))
                auroc_runs.append(np.mean([d["auroc"] for d in arr]))
                time_runs.append(np.mean([d["test_time"] for d in arr]))
            with warnings.catch_warnings(action='ignore'):
                results.append({
                    "Method": method,
                    "conf_mean": np.mean(conf_runs),
                    "conf_std": np.std(conf_runs),
                    "auroc_mean": np.mean(auroc_runs),
                    "auroc_std": np.std(auroc_runs),
                    "time_mean": np.mean(time_runs),
                    "time_std": np.std(time_runs),
                })
        df = pd.DataFrame(results)
        df["Confidence"] = (df["conf_mean"] * 100).round(1).astype(str) + "±" + (df["conf_std"] * 100).round(1).astype(str)
        df["AUROC"]      = (df["auroc_mean"] * 100).round(1).astype(str) + "±" + (df["auroc_std"] * 100).round(1).astype(str)
        df["Test time (s)"] = df["time_mean"].round(2).astype(str) + "±" + df["time_std"].round(2).astype(str)
        df = df.set_index("Method").loc[methods].reset_index()
        table = df[["Method", "Confidence", "AUROC", "Test time (s)"]]
        tables.append(table)
    return tables


tables = get_results(buckets, methods)
print(tables[0].to_markdown(index=False))


| Method             | Confidence   | AUROC    | Test time (s)   |
|:-------------------|:-------------|:---------|:----------------|
| map                | 75.0±0.6     | 96.5±0.2 | 0.64±0.01       |
| ensemble           | 65.7±0.5     | 97.5±0.0 | 0.68±0.05       |
| csghmc             | 69.2±3.2     | 96.1±0.3 | 0.66±0.01       |
| swg                | 76.8±0.0     | 96.3±0.0 | 1.25±0.0        |
| laplace_all        | nan±nan      | nan±nan  | nan±nan         |
| laplace_last_layer | 43.1±0.9     | 95.7±0.4 | 0.68±0.04       |
| subspace           | 68.2±0.0     | 95.8±0.0 | 55.91±0.0       |
| swag_laplace       | 11.8±0.0     | 95.9±0.0 | 56.37±0.0       |
| bbb                | 73.3±1.4     | 95.9±0.3 | 1.76±0.01       |


In [15]:
# data = np.load("tests/results/MNIST-OOD/ensemble_13.npy", allow_pickle=True)
data = np.load("tests/results/MNIST-OOD/subspace_6.npy", allow_pickle=True)

data

array([{'conf': 0.9860426187515259, 'test_time': 18.564986328125},
       {'conf': 0.7627784609794617, 'test_time': 122.63265625, 'auroc': 0.9225487640625001, 'fpr95': 0.316875},
       {'conf': 0.5911074280738831, 'test_time': 22.581904296875, 'auroc': 0.98396110625, 'fpr95': 0.0808},
       {'conf': 0.6924473643302917, 'test_time': 22.50443359375, 'auroc': 0.9686708624999999, 'fpr95': 0.1665}],
      dtype=object)