In [2]:
import json
import os
from pathlib import Path

import numpy as np
import torch
from omegaconf import OmegaConf

base_results_path = "D:\\Dokumente\\Git\\dal-toolbox\\experiments\\active_learning\\results"

In [3]:
def load_json(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)
    return data


def load_results(path):
    path = Path(path)
    assert path.is_dir(), 'Path does not exist.'
    exp_json = path / 'results.json'
    results = load_json(exp_json)
    return results


def load_args(path):
    path = Path(path)
    assert path.is_dir(), 'Path does not exist.'
    exp_cfg = path / '.hydra' / 'config.yaml'
    cfg = OmegaConf.load(exp_cfg)
    return cfg


def load_checkpoint(path, final=True):
    path = Path(path)
    assert path.is_dir(), 'Path does not exist.'
    if final:
        exp_pth = path / 'model_final.pth'
    else:
        exp_pth = path / 'checkpoint.pth'
    checkpoint = torch.load(exp_pth)
    return checkpoint


def get_experiments(result_path, glob_pattern, train_results=False):
    # Aggregate results over multiple glob pattern such as seeds
    experiments = []
    for exp_path in result_path.glob(glob_pattern):
        try:
            d = load_results(exp_path)
            experiments.append(d)
        except FileNotFoundError:
            print(f"{exp_path} does not exist!")
    assert len(experiments) != 0, f'No experiments found for {result_path}.'
    return experiments


def create_le_results_path(dataset, model, path=base_results_path):
    return Path(os.path.join(path, dataset, "linearevaluation", model))

def create_al_results_path(dataset, model, strategy, budget, path=base_results_path):
    return Path(os.path.join(path, dataset, "al_performance_test", model, strategy+budget))

In [4]:
path = create_le_results_path("imagenet100", "vits14")
exp_results = get_experiments(path, 'seed*')
print(exp_results[0]["cycle0"]["train_stats"])

{'accuracy': 0.9903774857521057, 'nll': 0.03581102192401886, 'brier': 0.016905449330806732, 'ece': 0.010587764903903008, 'ace': 0.012491952627897263}


In [5]:
models = ["vits14", "vits16"]

for model in models:
    path = create_le_results_path("imagenet100", model)
    exp_results = get_experiments(path, 'seed*')

    train_accuracies = []
    val_accuracies = []
    test_accuracies = []

    for exp_result in exp_results:
        train_accuracies.append(exp_result["cycle0"]["train_stats"]["accuracy"])
        val_accuracies.append(exp_result["cycle0"]["validation_stats"]["accuracy"])
        test_accuracies.append(exp_result["cycle0"]["test_stats"]["accuracy"])

    train_accuracy = np.mean(train_accuracies)
    val_accuracy = np.mean(val_accuracies)
    test_accuracy = np.mean(test_accuracies)

    train_std = np.std(train_accuracies, ddof=1)
    val_std = np.std(val_accuracies, ddof=1)
    test_std = np.std(test_accuracies, ddof=1)
    print(f"Linear evaluation accuracy of model {model} - Train: {train_accuracy:.4f} (± {train_std:.4f}) - Val: {val_accuracy:.4f} (± {val_std:.4f}) - Test: {test_accuracy:.4f} (± {test_std:.4f})")

Linear evaluation accuracy of model vits14 - Train: 0.9905 (± 0.0001) - Val: 0.9282 (± 0.0012) - Test: 0.9277 (± 0.0001)
Linear evaluation accuracy of model vits16 - Train: 0.9910 (± 0.0002) - Val: 0.8910 (± 0.0018) - Test: 0.8811 (± 0.0012)


In [12]:
models = ["vits16", "vits14"]
scenarios = [("random", "100"), ("random", "1000"), ("typiclust", "100"), ("typiclust", "1000")]
for scenario in scenarios:
    for model in models:
        path = create_al_results_path("imagenet100", model, scenario[0], scenario[1])
        exp_results = get_experiments(path, 'seed*')

        train_accuracies = []
        val_accuracies = []
        test_accuracies = []

        for exp_result in exp_results:
            train_accuracies.append(exp_result["cycle0"]["train_stats"]["accuracy"])
            val_accuracies.append(exp_result["cycle0"]["validation_stats"]["accuracy"])
            test_accuracies.append(exp_result["cycle0"]["test_stats"]["accuracy"])

        train_accuracy = np.mean(train_accuracies)
        val_accuracy = np.mean(val_accuracies)
        test_accuracy = np.mean(test_accuracies)

        train_std = np.std(train_accuracies, ddof=1)
        val_std = np.std(val_accuracies, ddof=1)
        test_std = np.std(test_accuracies, ddof=1)
        print(f"AL performance of model {model} with scenario {scenario} - Train: {train_accuracy:.4f} (± {train_std:.4f}) - Val: {val_accuracy:.4f} (± {val_std:.4f}) - Test: {test_accuracy:.4f} (± {test_std:.4f})")

AL performance of model vits16 with scenario ('random', '100') - Train: 1.0000 (± 0.0000) - Val: 0.3510 (± 0.0347) - Test: 0.3376 (± 0.0356)
AL performance of model vits14 with scenario ('random', '100') - Train: 1.0000 (± 0.0000) - Val: 0.4673 (± 0.0123) - Test: 0.4614 (± 0.0108)
AL performance of model vits16 with scenario ('random', '1000') - Train: 1.0000 (± 0.0000) - Val: 0.7663 (± 0.0075) - Test: 0.7494 (± 0.0044)
AL performance of model vits14 with scenario ('random', '1000') - Train: 1.0000 (± 0.0000) - Val: 0.8547 (± 0.0057) - Test: 0.8563 (± 0.0047)
AL performance of model vits16 with scenario ('typiclust', '100') - Train: 1.0000 (± 0.0000) - Val: 0.5952 (± 0.0140) - Test: 0.5837 (± 0.0178)
AL performance of model vits14 with scenario ('typiclust', '100') - Train: 1.0000 (± 0.0000) - Val: 0.7599 (± 0.0142) - Test: 0.7549 (± 0.0197)
AL performance of model vits16 with scenario ('typiclust', '1000') - Train: 0.9998 (± 0.0004) - Val: 0.7715 (± 0.0046) - Test: 0.7604 (± 0.0047)
A