In [1]:
import os
import json
import tqdm
import yaml

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from omegaconf import OmegaConf
from pathlib import Path
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
%%bash
mkdir -p results
fusermount -uz results
sshfs compute.ies:/mnt/work/deep_al/results/ssl results
ls results

CIFAR10


In [3]:
def load_json(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)
    return data

def load_results(path, train_results=False):
    path = Path(path)
    assert path.is_dir(), 'Path does not exist.'
        
    results = {}

    exp_json = path / 'results.json'
    exp_cfg = path / '.hydra' / 'config.yaml'
    try:
        cfg =  OmegaConf.load(exp_cfg)
        data = load_json(exp_json)
    except:
        print(f'{path} has missing results.')
        return

    results['cfg'] = cfg
    results['results'] = data
    return results

def collect_results_by_pattern(result_path, glob_pattern, train_results=False):
    experiments = []
    for exp_path in result_path.glob(glob_pattern):
        d = load_results(exp_path, train_results=train_results)
        experiments.append(d)
    assert len(experiments) != 0, f'No experiments found for {result_path}.'
    return experiments

In [7]:
dataset = 'CIFAR10'
n_labeled_samples = 250
experiments = {
    'fully_supervised': f'results/{dataset}/wideresnet2810/fully_supervised/{n_labeled_samples}labeled_samples/',
    'pseudo_labels': f'results/{dataset}/wideresnet2810/pseudo_labels/{n_labeled_samples}labeled_samples/',
    # 'pi_model': f'results/{dataset}/wideresnet2810/pi_model/{n_labeled_samples}labeled_samples/',
}

aggregated_results = {}
for exp_name, exp_path in tqdm(experiments.items()):
    # Loads all results by seeds in a list
    all_results = collect_results_by_pattern(Path(exp_path), 'seed*')

    # aggregate the results
    result = []
    for results_seed in all_results:
        result_dict = results_seed['results']
        result.append(result_dict['test_history'][-1])
    aggregated_results[exp_name] = result

100%|██████████| 2/2 [00:00<00:00, 12.16it/s]


In [8]:
avg_results = {}
for exp_name in aggregated_results:
    results_per_seed = aggregated_results[exp_name]
    keys = results_per_seed[0].keys()
    avg_result = {}
    for key in keys:
        avg_result[key] = np.mean([d[key] for d in results_per_seed])
        avg_result[key + '_std'] = np.std([d[key] for d in results_per_seed])
    avg_results[exp_name] = avg_result

df = pd.DataFrame(avg_results).T
df
# USB benchmark
# fully_supervised : 22.82 
# pseudo label     : 24.05 
# pi model         : 24.42 

Unnamed: 0,test_acc1,test_acc1_std,test_prec,test_prec_std,test_loss,test_loss_std,test_nll,test_nll_std,test_tce,test_tce_std,test_mce,test_mce_std
fully_supervised,40.549998,0.302435,0.403557,0.001637,2.936891,0.01487,2.936891,0.01487,0.398173,0.00226,0.141157,0.000175
pseudo_labels,42.383333,0.681193,0.422148,0.005609,3.549727,0.072354,3.549727,0.072354,0.43806,0.005338,0.154019,0.002574
