In [1]:
import pandas as pd
import math
import numpy as np

%matplotlib inline

from pathlib import Path
from omegaconf import OmegaConf

In [2]:
# Collect predictions

def check_num_epochs(run):
    if Path(run / "valid_log.csv").is_file():
        cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
        cfg = OmegaConf.to_container(cfg)
        num_epochs = cfg['optim']['epochs']

        valid_log = pd.read_csv(run / "valid_log.csv", header=None, index_col=0)
        if (len(valid_log.index)-3) < num_epochs:
            print("Wrong number of epochs in run: {}".format(run))
    else:
        print("valid_log.csv not exists in run: {}".format(run))

def check_only_one_tensorboard(run):
    len(list(Path('.').glob('*')))
    if len(list(Path(run / "runs").glob('*'))) > 1:
        print("More than 1 tensorboard folder in run: {}".format(run))

def collect_one(model_name, run, csv_file):
    check_num_epochs(run)
    check_only_one_tensorboard(run)
    
    cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
    cfg = OmegaConf.to_container(cfg)

    run_number = cfg['data']['train']['cross_val_bucket_validation_index']
    regime, inv_temp = float(run.parent.parts[-1].rsplit('-', 1)[1]), float(run.parent.parts[-2].rsplit('-', 1)[1])

    csv_path = run / 'test_predictions' / csv_file
    if not csv_path.exists():
        print(f'Skipping not found: {csv_path}')
        return pd.DataFrame()
    
    data = pd.read_csv(csv_path)
    if data.empty:
        print(f'Pred file is empty: {csv_path}')

    data['model'] = model_name
    data['run_number'] = run_number
    data['inv_temp'] = inv_temp
    data['regime'] = regime
    
    return data

def collect_all(model_name, root, csv_file):
    root = Path(root)

    metrics = []
    for inv_temp in list(root.glob("inv_temp-*")):
        for regime in list(inv_temp.glob("regime-*")):
            for run in list(regime.glob("run-*")):
                metrics.append(collect_one(model_name, run, csv_file))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

def collect_all_regimes(model_name, root, csv_file, regimes=['0.1', '0.2', '0.5', '1.0']):
    root = Path(root)

    metrics = []
    for run in list(root.glob("run-*")):
        if root.name.rsplit("-", 1)[1] in regimes:
            metrics.append(collect_one(model_name, run, csv_file))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

In [3]:
# Compute metrics for each detected run

def compute_metrics(data, grouping, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance']):
    metrics_dict_names = {
        'Dice': 'dice',
        'Jaccard': 'jaccard',
        'Hausdorff Distance': '95hd',
        'Average Surface Distance': 'asd'
    }

    columns = ['Model', '# Run', 'Inv Temp', 'Regime']
    columns.extend(metric_names)
    metrics = []
    
    data = data.copy().reset_index()
    grouped = data.groupby(grouping)
    
    for model_group, predictions in grouped:
        model_name, run_number, inv_temp, regime = model_group[0], model_group[1], model_group[2], model_group[3]
        
        metric_values = []
        for metric_name in metric_names:
            values = predictions['segm/{}'.format(metrics_dict_names[metric_name])].values
            mean_value = np.nanmean(values)
            if metric_name == 'Dice' or metric_name == 'Jaccard':
                metric_values.append((math.ceil(mean_value*10000)/10000)*100)
            else:
                if not np.isnan(mean_value):
                    metric_values.append(math.ceil(mean_value*100)/100)
                else:
                    metric_values.append(mean_value)
        
        metrics.append([model_name, run_number, inv_temp, regime, *metric_values])
        
    metrics_df = pd.DataFrame(metrics, columns=columns)
    
    return metrics_df

def summarize_metrics(metrics, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance']):
    mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate(['mean', 'std'])
    
    return mean_metrics

<h1>Evaluation - Searching temperature hyperparameter</h1>

<p>Evaluate Hebbian models belonging to SWTA paradigm to search best temperature values (this value is dataset-specific)</p>

<h2>GlaS Dataset</h2>

In [15]:
# Unsupervised learning models are evaluate over the whole dataset and a fraction of data for backpropagating over BN layers, 
# while fine tuned models over a fraction of data

EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation-new/runs"

runs = {
    # UNET-based
    'H-UNet-BASE-SWTA-BA1': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_ba1')),
    'H-UNet-BASE-SWTA-T-BA1': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_t_ba1')),
    'H-UNet-BASE-SWTA-BA4': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_ba4')),
    'H-UNet-BASE-SWTA-T-BA4': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_t_ba4')),
}

In [16]:
# Collect predictions scanning runs
#predictions = pd.concat([collect_all(k, r, 'preds_from_last.csv') for k, v in runs.items() for r in v], ignore_index=True)
predictions = pd.concat([collect_all(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [17]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA-BA1,0,1.0,0.2,62.56,46.11,43.24,6.51
1,H-UNet-BASE-SWTA-BA1,0,5.0,0.2,67.98,52.05,31.50,4.15
2,H-UNet-BASE-SWTA-BA1,0,10.0,0.2,68.29,52.60,39.40,5.43
3,H-UNet-BASE-SWTA-BA1,0,20.0,0.2,69.43,53.78,27.95,3.74
4,H-UNet-BASE-SWTA-BA1,0,50.0,0.2,72.66,57.64,28.95,3.89
...,...,...,...,...,...,...,...,...
115,H-UNet-BASE-SWTA-T-BA4,4,5.0,0.2,65.40,49.23,32.38,4.55
116,H-UNet-BASE-SWTA-T-BA4,4,10.0,0.2,68.29,52.78,33.07,4.88
117,H-UNet-BASE-SWTA-T-BA4,4,20.0,0.2,69.22,53.48,23.33,3.37
118,H-UNet-BASE-SWTA-T-BA4,4,50.0,0.2,68.90,53.28,31.12,4.38


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA-BA1,1.0,0.2,63.736,1.841176,48.076,2.227685,52.38,7.904451,8.118,1.271306
H-UNet-BASE-SWTA-BA1,5.0,0.2,66.166,2.556713,50.284,2.703531,36.852,10.231357,5.382,1.800519
H-UNet-BASE-SWTA-BA1,10.0,0.2,68.04,1.949679,52.666,2.100019,40.154,6.856619,5.914,1.292471
H-UNet-BASE-SWTA-BA1,20.0,0.2,68.946,1.134782,53.398,1.227424,33.068,8.813116,4.726,1.35574
H-UNet-BASE-SWTA-BA1,50.0,0.2,70.198,1.973733,54.862,2.248782,28.68,4.000412,4.07,0.736105
H-UNet-BASE-SWTA-BA1,100.0,0.2,69.272,1.894629,53.888,1.960783,33.708,7.276151,4.726,1.231536
H-UNet-BASE-SWTA-BA4,1.0,0.2,61.686,1.918627,45.368,1.979209,44.224,5.362008,6.72,0.989621
H-UNet-BASE-SWTA-BA4,5.0,0.2,63.084,4.519268,47.022,4.84994,44.664,5.215125,6.492,0.916717
H-UNet-BASE-SWTA-BA4,10.0,0.2,65.13,3.555643,49.414,3.834538,42.61,6.565771,6.638,1.632703
H-UNet-BASE-SWTA-BA4,20.0,0.2,65.868,2.031285,50.142,2.423813,40.632,3.999521,6.014,0.475374


In [18]:
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation-new/runs"

runs = {
    # UNET-based
    'H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft_fromBestDice_fromBa1')),
    'H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ft_fromBestDice_fromBa1')),
    'H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft_fromBestDice_fromBa4')),
    'H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ft_fromBestDice_fromBa4')),
}

In [19]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [20]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,0,1.0,0.2,78.33,65.40,28.29,3.88
1,H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,0,5.0,0.2,78.68,65.89,23.97,3.81
2,H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,0,10.0,0.2,82.09,70.37,20.11,2.52
3,H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,0,20.0,0.2,83.28,72.13,16.33,2.34
4,H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,0,50.0,0.2,82.01,70.29,22.87,3.00
...,...,...,...,...,...,...,...,...
114,H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE-FROM-BA4,4,5.0,0.2,80.05,67.65,22.09,3.00
115,H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE-FROM-BA4,4,10.0,0.2,80.91,69.02,22.92,3.50
116,H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE-FROM-BA4,4,20.0,0.2,82.01,70.49,24.51,3.25
117,H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE-FROM-BA4,4,50.0,0.2,80.04,67.56,32.83,4.15


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,1.0,0.2,76.26,2.551206,62.69,3.317386,27.518,1.920825,3.91,0.286182
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,5.0,0.2,78.536,1.941811,65.778,2.557209,26.982,3.34541,3.932,0.548471
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,10.0,0.2,78.414,2.982923,65.69,3.803584,27.608,5.0756,3.864,0.834134
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,20.0,0.2,79.904,3.037438,67.674,3.967534,24.308,5.601814,3.472,0.800106
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,50.0,0.2,80.066,2.903813,67.958,3.781147,25.73,4.149964,3.63,0.766225
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,100.0,0.2,81.056,2.38852,69.176,3.09176,23.482,5.50603,3.236,0.619621
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA4,1.0,0.2,75.814,1.765922,62.148,2.293343,27.306,3.133086,3.948,0.456202
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA4,5.0,0.2,79.042,2.017318,66.532,2.578589,26.526,7.027804,3.738,0.879301
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA4,10.0,0.2,80.292,1.911745,68.098,2.340335,25.52,2.309091,3.366,0.458181
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA4,20.0,0.2,78.612,2.865078,65.85,3.793218,26.748,5.071299,3.644,0.461768


<h1>Evaluation - Hebbian Unsupervised Pretraining</h1>

<p>Evaluate Hebbian models pretrained in an unsupervised way over the datasets; only best temperature values for SWTA are considered.</p>

In [32]:
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation-new/runs"

INV_TEMP_GlaS=50          # to be set accordingly, used by SWTA
REGIMES=['0.2']

runs = {
    # UNET-based
    'H-UNet-HPCA-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_ba1/inv_temp-1/regime-0.2').glob('run-*')),
    'H-UNet-HPCA-T-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_t_ba1/inv_temp-1/regime-0.2').glob('run-*')),
    'H-UNet-HPCA-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_ba4/inv_temp-1/regime-0.2').glob('run-*')),
    'H-UNet-HPCA-T-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_t_ba4/inv_temp-1/regime-0.2').glob('run-*')),
    'H-UNet-BASE-SWTA-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_ba1/inv_temp-{}/regime-0.2'.format(INV_TEMP_GlaS)).glob('run-*')),
    'H-UNet-BASE-SWTA-T-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_t_ba1/inv_temp-{}/regime-0.2'.format(INV_TEMP_GlaS)).glob('run-*')),
    'H-UNet-BASE-SWTA-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_ba4/inv_temp-{}/regime-0.2'.format(INV_TEMP_GlaS)).glob('run-*')),
    'H-UNet-BASE-SWTA-T-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_t_ba4/inv_temp-{}/regime-0.2'.format(INV_TEMP_GlaS)).glob('run-*')),
}

In [33]:
# Collect predictions scanning runs
#predictions = pd.concat([collect_one(k, r, 'preds_from_last.csv') for k, v in runs.items() for r in v], ignore_index=True)
predictions = pd.concat([collect_one(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [34]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA-BA1,0,50.0,0.2,72.66,57.64,28.95,3.89
1,H-UNet-BASE-SWTA-BA1,1,50.0,0.2,70.87,55.44,22.97,3.15
2,H-UNet-BASE-SWTA-BA1,2,50.0,0.2,67.24,51.39,32.36,4.7
3,H-UNet-BASE-SWTA-BA1,3,50.0,0.2,69.72,54.68,32.41,4.93
4,H-UNet-BASE-SWTA-BA1,4,50.0,0.2,70.5,55.16,26.71,3.68
5,H-UNet-BASE-SWTA-BA4,0,50.0,0.2,68.39,52.79,38.51,5.29
6,H-UNet-BASE-SWTA-BA4,1,50.0,0.2,66.96,51.0,31.65,4.35
7,H-UNet-BASE-SWTA-BA4,2,50.0,0.2,64.22,48.12,38.73,5.34
8,H-UNet-BASE-SWTA-BA4,3,50.0,0.2,66.56,50.62,39.47,5.52
9,H-UNet-BASE-SWTA-BA4,4,50.0,0.2,69.64,54.3,31.49,4.23


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA-BA1,50.0,0.2,70.198,1.973733,54.862,2.248782,28.68,4.000412,4.07,0.736105
H-UNet-BASE-SWTA-BA4,50.0,0.2,67.154,2.043693,51.366,2.337901,35.97,4.032741,4.946,0.606407
H-UNet-BASE-SWTA-T-BA1,50.0,0.2,69.918,2.295402,54.626,2.569344,31.686,6.589843,4.474,0.970325
H-UNet-BASE-SWTA-T-BA4,50.0,0.2,67.666,2.455958,51.946,2.659413,32.73,4.596558,4.712,0.881346
H-UNet-HPCA-BA1,1.0,0.2,47.53,3.760943,31.871,3.434629,69.742,17.370862,12.326,2.574448
H-UNet-HPCA-BA4,1.0,0.2,48.79,2.747162,32.92,2.664977,61.252,19.444879,10.098,2.838727
H-UNet-HPCA-T-BA1,1.0,0.2,47.257,3.285592,31.533,3.004526,80.106,86.86066,18.071,27.624402
H-UNet-HPCA-T-BA4,1.0,0.2,48.326,1.664926,32.39,1.535855,51.83,15.945383,8.794,2.214742


<h1>Evaluation - Data regime variations</h1>

<p>Evaluate Hebbian models over the datasets, by varying the quantity of training data; only fine-tuned models are considered since during pre-training we can consider the whole dataset; only best temperature values for SWTA are considered.</p>

<h2>GlaS Dataset</h2>

In [37]:
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation-new/runs"

REGIMES = ['0.2']       # regimes to be considered
INV_TEMP_GlaS=50          # to be set accordingly, used by SWTA

runs = {
    # UNET-based
    'UNet': list(Path(EXP_ROOT + '/experiment=glas/unet_base/inv_temp-1').glob('regime-*')),
    #'UNet-256': list(Path(EXP_ROOT + '/experiment=glas/unet_base-256/inv_temp-1').glob('regime-*')),
    'UNet-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=glas/unet/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ft_fromBestDice_fromBa1/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-T-FT-FROM-BEST-DICE-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ft_fromBestDice_fromBa1/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ft_fromBestDice_fromBa4/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-T-FT-FROM-BEST-DICE-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ft_fromBestDice_fromBa4/inv_temp-1').glob('regime-*')),
    #'H-UNet-Pseudolabeling-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_ft/inv_temp-1').glob('regime-*')),
    #'H-UNet-Pseudolabeling-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_t_ft/inv_temp-1').glob('regime-*')),
    'H-UNet-SWTA-FT-FROM-BEST-DICE-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_ft_fromBestDice_fromBa1/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    'H-UNet-SWTA-T-FT-FROM-BEST-DICE-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t_ft_fromBestDice_fromBa1/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    'H-UNet-SWTA-FT-FROM-BEST-DICE-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_ft_fromBestDice_fromBa4/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    'H-UNet-SWTA-T-FT-FROM-BEST-DICE-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t_ft_fromBestDice_fromBa4/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    #'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    #'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_t_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
}

In [38]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all_regimes(k, r, 'preds.csv', regimes=REGIMES) for k, v in runs.items() for r in v], ignore_index=True)

In [39]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,0,1.0,0.2,80.2,67.72,17.15,2.78
1,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,1,1.0,0.2,77.48,64.42,25.85,3.46
2,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,2,1.0,0.2,78.93,66.21,23.1,3.34
3,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,3,1.0,0.2,80.14,67.88,25.32,3.69
4,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,4,1.0,0.2,78.87,65.98,19.77,2.6
5,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,5,1.0,0.2,81.61,69.85,28.72,3.94
6,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,6,1.0,0.2,79.89,67.49,18.83,2.63
7,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,7,1.0,0.2,80.47,68.8,33.11,5.03
8,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,8,1.0,0.2,79.58,67.18,23.25,3.68
9,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,9,1.0,0.2,78.49,65.71,27.03,3.92


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,1.0,0.2,79.566,1.165706,67.124,1.590215,24.213,4.854315,3.507,0.737218
H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA4,1.0,0.2,77.818,2.42673,64.672,3.29666,23.906,2.919971,3.362,0.295415
H-UNet-HPCA-T-FT-FROM-BEST-DICE-FROM-BA1,1.0,0.2,78.482222,2.071043,65.662222,2.802752,25.763333,4.384849,3.595556,0.717027
H-UNet-HPCA-T-FT-FROM-BEST-DICE-FROM-BA4,1.0,0.2,78.766,2.863604,66.238,3.656586,24.546,3.11358,3.596,0.456213
H-UNet-SWTA-FT-FROM-BEST-DICE-FROM-BA1,50.0,0.2,80.066,2.903813,67.958,3.781147,25.73,4.149964,3.63,0.766225
H-UNet-SWTA-FT-FROM-BEST-DICE-FROM-BA4,50.0,0.2,80.462,1.228218,68.358,1.549119,24.098,0.857887,3.286,0.239541
H-UNet-SWTA-T-FT-FROM-BEST-DICE-FROM-BA1,50.0,0.2,80.2975,1.988372,68.315,2.612566,25.0075,3.172301,3.495,0.541018
H-UNet-SWTA-T-FT-FROM-BEST-DICE-FROM-BA4,50.0,0.2,79.054,1.63914,66.39,2.033495,28.028,4.115625,3.746,0.565049
