In [42]:
import pandas as pd
import math
import numpy as np

%matplotlib inline

from pathlib import Path
from omegaconf import OmegaConf
import scipy.stats as st

In [2]:
# Collect predictions

def check_num_epochs(run):
    if Path(run / "valid_log.csv").is_file():
        cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
        cfg = OmegaConf.to_container(cfg)
        num_epochs = cfg['optim']['epochs']

        valid_log = pd.read_csv(run / "valid_log.csv", header=None, index_col=0)
        if (len(valid_log.index)-3) < num_epochs:
            print("Wrong number of epochs in run: {}".format(run))
    else:
        print("valid_log.csv not exists in run: {}".format(run))

def check_only_one_tensorboard(run):
    len(list(Path('.').glob('*')))
    if len(list(Path(run / "runs").glob('*'))) > 1:
        print("More than 1 tensorboard folder in run: {}".format(run))

def collect_one(model_name, run, csv_file):
    check_num_epochs(run)
    check_only_one_tensorboard(run)
    
    cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
    cfg = OmegaConf.to_container(cfg)

    run_number = cfg['data']['train']['cross_val_bucket_validation_index']
    regime, inv_temp = float(run.parent.parts[-1].rsplit('-', 1)[1]), float(run.parent.parts[-2].rsplit('-', 1)[1])

    csv_path = run / 'test_predictions' / csv_file
    if not csv_path.exists():
        print(f'Skipping not found: {csv_path}')
        return pd.DataFrame()
    
    data = pd.read_csv(csv_path)
    if data.empty:
        print(f'Pred file is empty: {csv_path}')

    data['model'] = model_name
    data['run_number'] = run_number
    data['inv_temp'] = inv_temp
    data['regime'] = regime
    
    return data

def collect_all(model_name, root, csv_file):
    root = Path(root)

    metrics = []
    for inv_temp in list(root.glob("inv_temp-*")):
        for regime in list(inv_temp.glob("regime-*")):
            for run in list(regime.glob("run-*")):
                metrics.append(collect_one(model_name, run, csv_file))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

def collect_all_regimes(model_name, root, csv_file, regimes=['0.1', '0.2', '0.5', '1.0']):
    root = Path(root)

    metrics = []
    for run in list(root.glob("run-*")):
        if root.name.rsplit("-", 1)[1] in regimes:
            metrics.append(collect_one(model_name, run, csv_file))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

In [46]:
# Compute metrics for each detected run

def compute_metrics(data, grouping, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance']):
    metrics_dict_names = {
        'Dice': 'dice',
        'Jaccard': 'jaccard',
        'Hausdorff Distance': '95hd',
        'Average Surface Distance': 'asd'
    }

    columns = ['Model', '# Run', 'Inv Temp', 'Regime']
    columns.extend(metric_names)
    metrics = []
    
    data = data.copy().reset_index()
    grouped = data.groupby(grouping)
    
    for model_group, predictions in grouped:
        model_name, run_number, inv_temp, regime = model_group[0], model_group[1], model_group[2], model_group[3]
        
        metric_values = []
        for metric_name in metric_names:
            values = predictions['segm/{}'.format(metrics_dict_names[metric_name])].values
            mean_value = np.nanmean(values)
            if metric_name == 'Dice' or metric_name == 'Jaccard':
                metric_values.append((math.ceil(mean_value*10000)/10000)*100)
            else:
                if not np.isnan(mean_value):
                    metric_values.append(math.ceil(mean_value*100)/100)
                else:
                    metric_values.append(mean_value)
        
        metrics.append([model_name, run_number, inv_temp, regime, *metric_values])
        
    metrics_df = pd.DataFrame(metrics, columns=columns)
    
    return metrics_df

def summarize_metrics(metrics, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance'], t_student=False):
    mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate(['mean', 'std'])
    
    return mean_metrics

<h1>Evaluation - Searching temperature hyperparameter</h1>

<p>Evaluate Hebbian models belonging to SWTA paradigm to search best temperature values (this value is dataset-specific)</p>

<h2>GlaS Dataset</h2>

In [4]:
# Unsupervised learning models are evaluate over the whole dataset while fine tuned models over a fraction of data

EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs"

runs = {
    # UNET-based
    'H-UNet-BASE-SWTA-BA1': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ba1')),
    'H-UNet-BASE-SWTA-T-BA1': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ba1')),
    'H-UNet-BASE-SWTA-BA4': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ba4')),
    'H-UNet-BASE-SWTA-T-BA4': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ba4')),
}

In [5]:
# Collect predictions scanning runs
#predictions = pd.concat([collect_all(k, r, 'preds_from_last.csv') for k, v in runs.items() for r in v], ignore_index=True)
predictions = pd.concat([collect_all(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [6]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA-BA4,0,1.0,1.0,51.19,35.42,98.0,15.93
1,H-UNet-BASE-SWTA-BA4,0,5.0,1.0,57.13,41.3,75.36,11.96
2,H-UNet-BASE-SWTA-BA4,0,10.0,1.0,58.14,42.35,72.67,11.51
3,H-UNet-BASE-SWTA-BA4,0,20.0,1.0,58.31,42.53,72.16,11.42
4,H-UNet-BASE-SWTA-BA4,0,50.0,1.0,58.17,42.38,72.53,11.48
5,H-UNet-BASE-SWTA-BA4,0,100.0,1.0,58.39,42.61,72.0,11.39
6,H-UNet-BASE-SWTA-T-BA4,0,1.0,1.0,53.7,37.84,86.94,13.96
7,H-UNet-BASE-SWTA-T-BA4,0,5.0,1.0,61.6,46.11,65.87,10.31
8,H-UNet-BASE-SWTA-T-BA4,0,10.0,1.0,60.43,44.82,63.41,10.0
9,H-UNet-BASE-SWTA-T-BA4,0,20.0,1.0,61.81,46.37,64.47,10.11


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA-BA4,1.0,1.0,51.19,,35.42,,98.0,,15.93,
H-UNet-BASE-SWTA-BA4,5.0,1.0,57.13,,41.3,,75.36,,11.96,
H-UNet-BASE-SWTA-BA4,10.0,1.0,58.14,,42.35,,72.67,,11.51,
H-UNet-BASE-SWTA-BA4,20.0,1.0,58.31,,42.53,,72.16,,11.42,
H-UNet-BASE-SWTA-BA4,50.0,1.0,58.17,,42.38,,72.53,,11.48,
H-UNet-BASE-SWTA-BA4,100.0,1.0,58.39,,42.61,,72.0,,11.39,
H-UNet-BASE-SWTA-T-BA4,1.0,1.0,53.7,,37.84,,86.94,,13.96,
H-UNet-BASE-SWTA-T-BA4,5.0,1.0,61.6,,46.11,,65.87,,10.31,
H-UNet-BASE-SWTA-T-BA4,10.0,1.0,60.43,,44.82,,63.41,,10.0,
H-UNet-BASE-SWTA-T-BA4,20.0,1.0,61.81,,46.37,,64.47,,10.11,


In [31]:
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs"

runs = {
    # UNET-based
    'H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft_fromBestDice_fromBa1')),
    'H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ft_fromBestDice_fromBa1')),
    'H-UNet-BASE-SWTA-FT-FROM-LAST-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft_fromLast_fromBa1')),
    'H-UNet-BASE-SWTA-T-FT-FROM-LAST-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ft_fromLast_fromBa1')),
    'H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft_fromBestDice_fromBa4')),
    'H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ft_fromBestDice_fromBa4')),
    'H-UNet-BASE-SWTA-FT-FROM-LAST-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft_fromLast_fromBa4')),
    'H-UNet-BASE-SWTA-T-FT-FROM-LAST-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ft_fromLast_fromBa4')),
    #'H-UNet-Pseudolabeling-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_t')),
    #'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_t_ft')),
}

In [32]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [33]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,0,1.0,0.2,78.80,66.08,30.37,4.49
1,H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,0,5.0,0.2,77.41,64.07,24.02,3.32
2,H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,0,10.0,0.2,81.86,70.11,19.71,3.08
3,H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,0,20.0,0.2,79.47,66.91,21.82,2.89
4,H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,0,50.0,0.2,80.74,68.56,21.99,3.11
...,...,...,...,...,...,...,...,...
175,H-UNet-BASE-SWTA-T-FT-FROM-LAST-FROM-BA4,4,5.0,0.2,83.21,72.13,19.54,2.71
176,H-UNet-BASE-SWTA-T-FT-FROM-LAST-FROM-BA4,4,10.0,0.2,83.06,71.99,19.65,2.99
177,H-UNet-BASE-SWTA-T-FT-FROM-LAST-FROM-BA4,4,20.0,0.2,81.60,69.98,25.96,3.60
178,H-UNet-BASE-SWTA-T-FT-FROM-LAST-FROM-BA4,4,50.0,0.2,85.14,74.73,21.20,2.64


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,1.0,0.2,77.414,2.79126,64.27,3.606598,26.346,4.023218,3.83,0.549545
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,5.0,0.2,77.898,1.195082,64.992,1.628149,26.67,1.592341,3.78,0.352349
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,10.0,0.2,80.956,1.882214,69.006,2.521394,23.346,3.797885,3.23,0.354824
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,20.0,0.2,79.46,3.953701,67.17,5.212447,24.904,5.278795,3.504,0.893465
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,50.0,0.2,80.902,1.72381,68.958,2.274581,24.44,4.522477,3.302,0.459206
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE-FROM-BA1,100.0,0.2,79.118,3.257571,66.616,4.263418,23.748,4.49933,3.406,0.64512
H-UNet-BASE-SWTA-FT-FROM-LAST-FROM-BA1,1.0,0.2,77.908,2.273152,64.846,2.924257,25.102,3.203298,3.478,0.435511
H-UNet-BASE-SWTA-FT-FROM-LAST-FROM-BA1,5.0,0.2,79.028,3.051388,66.52,3.953347,26.982,4.91184,3.714,0.674003
H-UNet-BASE-SWTA-FT-FROM-LAST-FROM-BA1,10.0,0.2,79.58,1.912106,67.064,2.56986,24.5,2.350979,3.27,0.188812
H-UNet-BASE-SWTA-FT-FROM-LAST-FROM-BA1,20.0,0.2,80.082,2.020587,67.826,2.67572,24.232,4.013816,3.3,0.516382


<h1>Evaluation - Hebbian Unsupervised Pretraining</h1>

<p>Evaluate Hebbian models pretrained in an unsupervised way over the datasets; only best temperature values for SWTA are considered.</p>

In [4]:
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs"

INV_TEMP_GlaS=100          # to be set accordingly, used by SWTA

runs = {
    # UNET-based
    'H-UNet-HPCA-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ba1/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-HPCA-T-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ba1/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-HPCA-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ba4/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-HPCA-T-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ba4/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-BASE-SWTA-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_ba1/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run-*')),
    'H-UNet-BASE-SWTA-T-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t_ba1/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run-*')),
    'H-UNet-BASE-SWTA-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_ba4/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run-*')),
    'H-UNet-BASE-SWTA-T-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t_ba4/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run-*')),
}

In [7]:
# Collect predictions scanning runs
predictions = pd.concat([collect_one(k, r, 'preds_from_last.csv') for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_one(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

Skipping not found: /home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs/experiment=glas/hunet_base-swta_ba1/inv_temp-100/regime-1.0/run-0/test_predictions/preds_from_last.csv
Skipping not found: /home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs/experiment=glas/hunet_base-swta_t_ba1/inv_temp-100/regime-1.0/run-0/test_predictions/preds_from_last.csv


In [8]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

  mean_value = np.nanmean(values)


Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-HPCA-BA1,0,1.0,1.0,34.04,20.71,150.06,46.11
1,H-UNet-HPCA-BA4,0,1.0,1.0,34.94,21.37,206.94,65.3
2,H-UNet-HPCA-T-BA1,0,1.0,1.0,33.38,20.22,,
3,H-UNet-HPCA-T-BA4,0,1.0,1.0,35.21,21.58,,


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-HPCA-BA1,1.0,1.0,34.04,,20.71,,150.06,,46.11,
H-UNet-HPCA-BA4,1.0,1.0,34.94,,21.37,,206.94,,65.3,
H-UNet-HPCA-T-BA1,1.0,1.0,33.38,,20.22,,,,,
H-UNet-HPCA-T-BA4,1.0,1.0,35.21,,21.58,,,,,


<h1>Evaluation - Data regime variations</h1>

<p>Evaluate Hebbian models over the datasets, by varying the quantity of training data; only fine-tuned models are considered since during pre-training we can consider the whole dataset; only best temperature values for SWTA are considered.</p>

<h2>GlaS Dataset</h2>

In [47]:
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs"

REGIMES = ['0.2']       # regimes to be considered
INV_TEMP_GlaS=10          # to be set accordingly, used by SWTA

runs = {
    # UNET-based
    'UNet': list(Path(EXP_ROOT + '/experiment=glas/unet_base/inv_temp-1').glob('regime-*')),
    #'UNet-256': list(Path(EXP_ROOT + '/experiment=glas/unet_base-256/inv_temp-1').glob('regime-*')),
    'UNet-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=glas/unet/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ft_fromBestDice_fromBa1/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-T-FT-FROM-BEST-DICE-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ft_fromBestDice_fromBa1/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-FT-FROM-LAST-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ft_fromLast_fromBa1/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-T-FT-FROM-LAST-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ft_fromLast_fromBa1/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ft_fromBestDice_fromBa4/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-T-FT-FROM-BEST-DICE-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ft_fromBestDice_fromBa4/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-FT-FROM-LAST-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ft_fromLast_fromBa4/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-T-FT-FROM-LAST-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ft_fromLast_fromBa4/inv_temp-1').glob('regime-*')),    
    #'H-UNet-Pseudolabeling-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_ft/inv_temp-1').glob('regime-*')),
    #'H-UNet-Pseudolabeling-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_t_ft/inv_temp-1').glob('regime-*')),
    'H-UNet-SWTA-FT-FROM-BEST-DICE-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_ft_fromBestDice_fromBa1/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    'H-UNet-SWTA-T-FT-FROM-BEST-DICE-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t_ft_fromBestDice_fromBa1/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    'H-UNet-SWTA-FT-FROM-LAST-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_ft_fromLast_fromBa1/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    'H-UNet-SWTA-T-FT-FROM-LAST-FROM-BA1': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t_ft_fromLast_fromBa1/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    'H-UNet-SWTA-FT-FROM-BEST-DICE-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_ft_fromBestDice_fromBa4/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    'H-UNet-SWTA-T-FT-FROM-BEST-DICE-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t_ft_fromBestDice_fromBa4/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    'H-UNet-SWTA-FT-FROM-LAST-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_ft_fromLast_fromBa4/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    'H-UNet-SWTA-T-FT-FROM-LAST-FROM-BA4': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t_ft_fromLast_fromBa4/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),    
    #'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    #'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_t_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
}

In [48]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all_regimes(k, r, 'preds.csv', regimes=REGIMES) for k, v in runs.items() for r in v], ignore_index=True)

In [50]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,0,1.0,0.2,78.87,65.96,22.38,2.98
1,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,1,1.0,0.2,77.11,63.87,22.57,2.99
2,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,2,1.0,0.2,79.25,66.61,22.91,2.96
3,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,3,1.0,0.2,81.50,69.80,23.73,3.11
4,H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,4,1.0,0.2,79.35,66.87,22.41,3.60
...,...,...,...,...,...,...,...,...
125,UNet-Pseudolabeling,5,1.0,0.2,79.70,67.29,42.19,5.54
126,UNet-Pseudolabeling,6,1.0,0.2,82.17,71.05,28.57,3.99
127,UNet-Pseudolabeling,7,1.0,0.2,83.65,73.21,29.79,4.07
128,UNet-Pseudolabeling,8,1.0,0.2,83.05,72.48,24.12,3.84


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA1,1.0,0.2,79.495,1.218389,67.043,1.677121,26.071,4.496254,3.66,0.68326
H-UNet-HPCA-FT-FROM-BEST-DICE-FROM-BA4,1.0,0.2,78.817,3.197103,66.148,4.162726,26.175,2.651378,3.633,0.379006
H-UNet-HPCA-FT-FROM-LAST-FROM-BA1,1.0,0.2,79.945,1.418154,67.626,1.933967,25.719,5.813894,3.576,0.76378
H-UNet-HPCA-FT-FROM-LAST-FROM-BA4,1.0,0.2,79.072,2.641219,66.505,3.583624,26.582,4.438455,3.697,0.619893
H-UNet-HPCA-T-FT-FROM-BEST-DICE-FROM-BA1,1.0,0.2,78.402,1.321504,65.657,1.727581,30.809,6.65485,4.285,0.963134
H-UNet-HPCA-T-FT-FROM-BEST-DICE-FROM-BA4,1.0,0.2,77.644,3.431641,64.686,4.503826,28.994,7.894878,4.124,1.07319
H-UNet-HPCA-T-FT-FROM-LAST-FROM-BA1,1.0,0.2,80.54,3.52545,68.601,4.589274,24.523,6.0838,3.487,0.939622
H-UNet-HPCA-T-FT-FROM-LAST-FROM-BA4,1.0,0.2,80.011,1.746899,67.742,2.347224,25.912,5.878331,3.615,0.905946
H-UNet-SWTA-FT-FROM-BEST-DICE-FROM-BA1,10.0,0.2,80.956,1.882214,69.006,2.521394,23.346,3.797885,3.23,0.354824
H-UNet-SWTA-FT-FROM-LAST-FROM-BA1,10.0,0.2,79.58,1.912106,67.064,2.56986,24.5,2.350979,3.27,0.188812
