In [1]:
import pandas as pd
import math
import numpy as np

%matplotlib inline

from pathlib import Path
from omegaconf import OmegaConf
import scipy.stats as st

In [2]:
# Collect predictions

def check_num_epochs(run):
    if Path(run / "valid_log.csv").is_file():
        cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
        cfg = OmegaConf.to_container(cfg)
        num_epochs = cfg['optim']['epochs']

        valid_log = pd.read_csv(run / "valid_log.csv", header=None, index_col=0)
        if (len(valid_log.index)-3) < num_epochs:
            print("Wrong number of epochs in run: {}".format(run))
    else:
        print("valid_log.csv not exists in run: {}".format(run))

def check_only_one_tensorboard(run):
    len(list(Path('.').glob('*')))
    if len(list(Path(run / "runs").glob('*'))) > 1:
        print("More than 1 tensorboard folder in run: {}".format(run))

def check_image_size(run, image_size=480):
    cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
    cfg = OmegaConf.to_container(cfg)
    conf_image_size = cfg['data']['image_size']
    if int(conf_image_size) != image_size:
        print("Different image size in conf. Image size in conf: {}, Image size: {}".format(conf_image_size, image_size))

def collect_one(model_name, run, csv_file, image_size=480):
    check_num_epochs(run)
    check_only_one_tensorboard(run)
    check_image_size(run, image_size=image_size)
    
    cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
    cfg = OmegaConf.to_container(cfg)

    run_number = cfg['data']['train']['cross_val_bucket_validation_index']
    regime, inv_temp = float(run.parent.parts[-1].rsplit('-', 1)[1]), float(run.parent.parts[-2].rsplit('-', 1)[1])

    csv_path = run / 'test_predictions' / csv_file
    if not csv_path.exists():
        print(f'Skipping not found: {csv_path}')
        return pd.DataFrame()
    
    data = pd.read_csv(csv_path)
    if data.empty:
        print(f'Pred file is empty: {csv_path}')

    data['model'] = model_name
    data['run_number'] = run_number
    data['inv_temp'] = inv_temp
    data['regime'] = regime
    
    return data

def collect_all(model_name, root, csv_file, regimes=['0.01', '0.02', '0.05', '0.1', '0.2', '0.25', '1.0'], image_size=480, ignore_outliers=True):
    root = Path(root)

    metrics = []
    for inv_temp in list(root.glob("inv_temp-*")):
        for regime in list(inv_temp.glob("regime-*")):
            if regime.name.rsplit("-", 1)[1] in regimes:
                for run in list(regime.glob("run-*")):
                    if ignore_outliers and 'outlier' in run.as_posix():
                        continue
                    else:
                        metrics.append(collect_one(model_name, run, csv_file, image_size=image_size))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

def collect_all_temperature(model_name, root, csv_file, regimes=['0.01', '0.02', '0.05', '0.1', '0.2', '0.25', '1.0'], image_size=480, ignore_outliers=True):
    root = Path(root)

    metrics = []
    for regime in list(root.glob("regime-*")):
        if regime.name.rsplit("-", 1)[1] in regimes:
            for run in list(regime.glob("run-*")):
                if ignore_outliers and 'outlier' in run.as_posix():
                    continue
                else:
                    metrics.append(collect_one(model_name, run, csv_file, image_size=image_size))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

In [3]:
# Compute metrics for each detected run

def compute_metrics(data, grouping, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance']):
    metrics_dict_names = {
        'Dice': 'dice',
        'Jaccard': 'jaccard',
        'Hausdorff Distance': '95hd',
        'Average Surface Distance': 'asd'
    }

    columns = ['Model', '# Run', 'Inv Temp', 'Regime']
    columns.extend(metric_names)
    metrics = []
    
    data = data.copy().reset_index()
    grouped = data.groupby(grouping)
    
    for model_group, predictions in grouped:
        model_name, run_number, inv_temp, regime = model_group[0], model_group[1], model_group[2], model_group[3]
        
        metric_values = []
        for metric_name in metric_names:
            values = predictions['segm/{}'.format(metrics_dict_names[metric_name])].values
            mean_value = np.nanmean(values)
            if metric_name == 'Dice' or metric_name == 'Jaccard':
                metric_values.append((math.ceil(mean_value*10000)/10000)*100)
            else:
                if not np.isnan(mean_value):
                    metric_values.append(math.ceil(mean_value*100)/100)
                else:
                    metric_values.append(mean_value)
        
        metrics.append([model_name, run_number, inv_temp, regime, *metric_values])
        
    metrics_df = pd.DataFrame(metrics, columns=columns)
    
    return metrics_df

def summarize_metrics(metrics, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance'], confidence_level=0.90, return_ranges=False):

    def compute_ci(values, return_ranges=False):
        ci = st.t.interval(confidence_level, len(values)-1, loc=np.mean(values), scale=np.std(values)/(len(values)**0.5))

        if return_ranges:
            return ci
        else:
            return (ci[1]-ci[0]) / 2

    #mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
    mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)]) 

    return mean_metrics

<h1>GlaS Dataset</h1>

<h2>Evaluation - Searching temperature hyperparameter</h2>

<p>Evaluate Hebbian models belonging to SWTA paradigm to search best temperature values (this value is dataset-specific)</p>

In [13]:
# Unsupervised learning models are evaluate over the whole dataset while fine tuned models over a fraction of data

EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['1.0']
IMAGE_SIZE = 480

runs = {
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t')),
}

In [14]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds_from_last.csv', regimes=REGIMES, image_size=IMAGE_SIZE, ignore_outliers=False) for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_all(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [15]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,1.0,1.0,52.31,36.16,41.72,8.55
1,H-UNet-BASE-SWTA,0,5.0,1.0,49.74,33.87,42.4,8.16
2,H-UNet-BASE-SWTA,0,10.0,1.0,45.79,30.24,45.59,8.88
3,H-UNet-BASE-SWTA,0,20.0,1.0,49.81,33.74,38.32,7.2
4,H-UNet-BASE-SWTA,0,50.0,1.0,49.98,34.01,42.11,7.88
5,H-UNet-BASE-SWTA,0,100.0,1.0,49.48,33.46,40.05,7.27
6,H-UNet-BASE-SWTA-T,0,1.0,1.0,45.6,30.22,44.54,9.98
7,H-UNet-BASE-SWTA-T,0,5.0,1.0,48.07,32.26,54.12,10.52
8,H-UNet-BASE-SWTA-T,0,10.0,1.0,49.61,33.6,42.02,8.29
9,H-UNet-BASE-SWTA-T,0,20.0,1.0,50.11,34.03,42.04,7.84


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])
  lower_bound = _a * scale + loc
  upper_bound = _b * scale + loc
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA,1.0,1.0,52.31,,36.16,,41.72,,8.55,
H-UNet-BASE-SWTA,5.0,1.0,49.74,,33.87,,42.4,,8.16,
H-UNet-BASE-SWTA,10.0,1.0,45.79,,30.24,,45.59,,8.88,
H-UNet-BASE-SWTA,20.0,1.0,49.81,,33.74,,38.32,,7.2,
H-UNet-BASE-SWTA,50.0,1.0,49.98,,34.01,,42.11,,7.88,
H-UNet-BASE-SWTA,100.0,1.0,49.48,,33.46,,40.05,,7.27,
H-UNet-BASE-SWTA-T,1.0,1.0,45.6,,30.22,,44.54,,9.98,
H-UNet-BASE-SWTA-T,5.0,1.0,48.07,,32.26,,54.12,,10.52,
H-UNet-BASE-SWTA-T,10.0,1.0,49.61,,33.6,,42.02,,8.29,
H-UNet-BASE-SWTA-T,20.0,1.0,50.11,,34.03,,42.04,,7.84,


In [34]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['0.01', '0.02', '0.05', '0.1', '0.2']
IMAGE_SIZE = 480

runs = {
    'H-UNet-BASE-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft')),
    'H-UNet-BASE-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ft')),
    'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_ft')),
    'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_t_ft')),
}

In [37]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE, ignore_outliers=False) for k, v in runs.items() for r in v], ignore_index=True)

In [38]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA-FT,0,1.0,0.01,64.24,49.26,57.84,9.26
1,H-UNet-BASE-SWTA-FT,0,1.0,0.02,67.96,52.81,51.52,7.29
2,H-UNet-BASE-SWTA-FT,0,1.0,0.05,72.36,57.89,42.10,5.75
3,H-UNet-BASE-SWTA-FT,0,1.0,0.10,72.69,57.82,28.34,3.90
4,H-UNet-BASE-SWTA-FT,0,1.0,0.20,78.29,65.23,25.62,3.60
...,...,...,...,...,...,...,...,...
1095,H-UNet-Pseudolabeling-SWTA-T-FT,9,100.0,0.01,66.95,51.33,40.67,6.38
1096,H-UNet-Pseudolabeling-SWTA-T-FT,9,100.0,0.02,72.61,59.59,41.55,6.94
1097,H-UNet-Pseudolabeling-SWTA-T-FT,9,100.0,0.05,65.87,50.95,61.45,9.47
1098,H-UNet-Pseudolabeling-SWTA-T-FT,9,100.0,0.10,78.39,65.91,40.10,5.96


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA-FT,1.0,0.01,65.742,0.977153,50.876,1.161506,56.590,2.489949,9.148,0.373108
H-UNet-BASE-SWTA-FT,1.0,0.02,64.977,1.902475,49.779,2.011092,52.690,5.107129,8.353,1.036061
H-UNet-BASE-SWTA-FT,1.0,0.05,70.160,1.702422,55.482,1.925587,45.307,4.653122,6.600,0.865517
H-UNet-BASE-SWTA-FT,1.0,0.10,74.023,0.951953,60.101,1.271095,33.106,2.614397,4.906,0.479240
H-UNet-BASE-SWTA-FT,1.0,0.20,77.890,1.098575,64.949,1.419287,28.680,1.919219,4.102,0.327690
...,...,...,...,...,...,...,...,...,...,...
H-UNet-Pseudolabeling-SWTA-T-FT,100.0,0.01,66.874,1.864946,52.035,2.057103,54.541,4.040427,8.364,0.655031
H-UNet-Pseudolabeling-SWTA-T-FT,100.0,0.02,70.349,1.294908,56.447,1.405975,50.192,2.363189,7.920,0.571784
H-UNet-Pseudolabeling-SWTA-T-FT,100.0,0.05,72.386,2.303595,58.900,2.854746,47.805,5.100017,7.322,0.867951
H-UNet-Pseudolabeling-SWTA-T-FT,100.0,0.10,78.894,1.685691,66.768,2.292143,35.750,3.831478,5.310,0.636758


<h2>Evaluation - Hebbian Unsupervised Pretraining</h2>

<p>Evaluate Hebbian models pretrained in an unsupervised way over the datasets; only best temperature values for SWTA are considered.</p>

In [39]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

IMAGE_SIZE = 480
INV_TEMP_GlaS=1          # to be set accordingly, used by SWTA

runs = {
    'H-UNet-HPCA': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-HPCA-T': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run-*')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run-*')),
}

In [40]:
# Collect predictions scanning runs
predictions = pd.concat([collect_one(k, r, 'preds_from_last.csv', image_size=IMAGE_SIZE) for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_one(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [41]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

  mean_value = np.nanmean(values)


Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,1.0,1.0,52.31,36.16,41.72,8.55
1,H-UNet-BASE-SWTA-T,0,1.0,1.0,45.6,30.22,44.54,9.98
2,H-UNet-HPCA,0,1.0,1.0,33.72,20.47,,
3,H-UNet-HPCA-T,0,1.0,1.0,34.84,21.3,,


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])
  lower_bound = _a * scale + loc
  upper_bound = _b * scale + loc
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA,1.0,1.0,52.31,,36.16,,41.72,,8.55,
H-UNet-BASE-SWTA-T,1.0,1.0,45.6,,30.22,,44.54,,9.98,
H-UNet-HPCA,1.0,1.0,33.72,,20.47,,,,,
H-UNet-HPCA-T,1.0,1.0,34.84,,21.3,,,,,


<h2>Evaluation - Data regime variations</h2>

<p>Evaluate Hebbian models over the datasets, by varying the quantity of training data; only fine-tuned models are considered since during pre-training we can consider the whole dataset; only best temperature values for SWTA are considered.</p>

In [42]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['0.01', '0.02', '0.05', '0.1', '0.2', '1.0']       # regimes to be considered
IMAGE_SIZE = 480
INV_TEMP_GlaS=20          # to be set accordingly, used by SWTA

runs = {
    'UNet': list(Path(EXP_ROOT + '/experiment=glas/').glob('unet_base')),
    #'UNet-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=glas').glob('unet')),
    #'H-UNet-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-hpca_ft')),
    #'H-UNet-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-hpca_t_ft')),
    #'H-UNet-Pseudolabeling-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas').glob('hunet-hpca_ft')),
    #'H-UNet-Pseudolabeling-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas').glob('hunet-hpca_t_ft')),
}

runs_swta = {
    #'H-UNet-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_ft').glob('inv_temp-{}'.format(INV_TEMP_GlaS))),
    #'H-UNet-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t_ft').glob('inv_temp-{}'.format(INV_TEMP_GlaS))),
    #'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    #'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_t_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
}

In [43]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE, ignore_outliers=True) for k, v in runs.items() for r in v], ignore_index=True)
if bool([a for a in runs_swta.values() if a != []]):
    predictions_swta = pd.concat([collect_all_temperature(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE, ignore_outliers=True) for k, v in runs_swta.items() for r in v], ignore_index=True)
    predictions = pd.concat([predictions, predictions_swta])

In [44]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics, confidence_level=0.90)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,UNet,0,1.0,0.01,66.75,51.98,56.58,8.82
1,UNet,0,1.0,0.02,68.93,54.11,51.35,7.46
2,UNet,0,1.0,0.05,69.35,54.02,44.38,6.26
3,UNet,0,1.0,0.1,71.04,55.98,33.19,4.57
4,UNet,0,1.0,0.2,82.31,70.79,21.56,2.75
5,UNet,0,1.0,1.0,88.94,80.68,9.36,1.45
6,UNet,1,1.0,0.01,68.89,54.25,52.27,7.85
7,UNet,1,1.0,0.02,69.97,56.12,50.18,7.8
8,UNet,1,1.0,0.05,72.35,57.84,44.46,5.82
9,UNet,1,1.0,0.1,74.05,60.37,38.34,5.83


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
UNet,1.0,0.01,66.862,1.335991,52.148,1.500145,55.383,2.215164,8.879,0.664581
UNet,1.0,0.02,67.843,1.427688,53.129,1.620535,50.237,2.721977,7.811,0.412108
UNet,1.0,0.05,69.953,2.696365,55.235,3.05425,41.687,4.782975,6.202,0.773673
UNet,1.0,0.1,74.39,1.497551,60.615,1.953559,34.839,3.291246,5.017,0.503963
UNet,1.0,0.2,79.676,1.109602,67.314,1.470966,26.129,2.694007,3.541,0.432587
UNet,1.0,1.0,88.724,0.229545,80.376,0.354158,11.456,1.28313,1.618,0.12269


<h1>PH2 Dataset</h1>

<h2>Evaluation - Searching temperature hyperparameter</h2>

<p>Evaluate Hebbian models belonging to SWTA paradigm to search best temperature values (this value is dataset-specific)</p>

In [45]:
# Unsupervised learning models are evaluate over the whole dataset while fine tuned models over a fraction of data

EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['1.0']
IMAGE_SIZE = 480

runs = {
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-swta')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-swta_t')),
}

In [46]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds_from_last.csv', regimes=REGIMES, image_size=IMAGE_SIZE, ignore_outliers=False) for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_all(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [47]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,1.0,1.0,38.46,24.57,88.37,19.27
1,H-UNet-BASE-SWTA,0,5.0,1.0,48.69,33.11,128.4,17.75
2,H-UNet-BASE-SWTA,0,10.0,1.0,41.5,26.8,128.04,21.26
3,H-UNet-BASE-SWTA,0,20.0,1.0,50.86,35.08,118.39,16.46
4,H-UNet-BASE-SWTA,0,50.0,1.0,51.85,36.25,67.03,10.53
5,H-UNet-BASE-SWTA,0,100.0,1.0,49.54,34.06,128.33,18.33
6,H-UNet-BASE-SWTA-T,0,1.0,1.0,39.18,25.08,142.65,29.84
7,H-UNet-BASE-SWTA-T,0,5.0,1.0,47.29,31.72,128.85,18.26
8,H-UNet-BASE-SWTA-T,0,10.0,1.0,51.08,35.17,118.49,16.77
9,H-UNet-BASE-SWTA-T,0,20.0,1.0,45.4,30.08,128.75,20.37


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])
  lower_bound = _a * scale + loc
  upper_bound = _b * scale + loc
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA,1.0,1.0,38.46,,24.57,,88.37,,19.27,
H-UNet-BASE-SWTA,5.0,1.0,48.69,,33.11,,128.4,,17.75,
H-UNet-BASE-SWTA,10.0,1.0,41.5,,26.8,,128.04,,21.26,
H-UNet-BASE-SWTA,20.0,1.0,50.86,,35.08,,118.39,,16.46,
H-UNet-BASE-SWTA,50.0,1.0,51.85,,36.25,,67.03,,10.53,
H-UNet-BASE-SWTA,100.0,1.0,49.54,,34.06,,128.33,,18.33,
H-UNet-BASE-SWTA-T,1.0,1.0,39.18,,25.08,,142.65,,29.84,
H-UNet-BASE-SWTA-T,5.0,1.0,47.29,,31.72,,128.85,,18.26,
H-UNet-BASE-SWTA-T,10.0,1.0,51.08,,35.17,,118.49,,16.77,
H-UNet-BASE-SWTA-T,20.0,1.0,45.4,,30.08,,128.75,,20.37,


In [48]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['0.01', '0.02', '0.05', '0.1', '0.2']
IMAGE_SIZE = 480

runs = {
    'H-UNet-BASE-SWTA-FT': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-swta_ft')),
    'H-UNet-BASE-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-swta_t_ft')),
    'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet-swta_ft')),
    'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet-swta_t_ft')),
}

In [49]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE, ignore_outliers=False) for k, v in runs.items() for r in v], ignore_index=True)

In [50]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA-FT,0,1.0,0.01,67.89,53.01,33.65,6.16
1,H-UNet-BASE-SWTA-FT,0,1.0,0.02,80.03,68.27,20.57,3.14
2,H-UNet-BASE-SWTA-FT,0,1.0,0.05,81.05,69.37,18.56,2.40
3,H-UNet-BASE-SWTA-FT,0,1.0,0.10,81.11,69.60,11.03,1.64
4,H-UNet-BASE-SWTA-FT,0,1.0,0.20,87.10,78.26,14.26,2.13
...,...,...,...,...,...,...,...,...
1165,H-UNet-Pseudolabeling-SWTA-T-FT,9,50.0,0.10,88.61,80.39,14.51,2.10
1166,H-UNet-Pseudolabeling-SWTA-T-FT,9,100.0,0.01,63.90,54.71,75.11,21.44
1167,H-UNet-Pseudolabeling-SWTA-T-FT,9,100.0,0.02,83.68,74.96,19.20,3.39
1168,H-UNet-Pseudolabeling-SWTA-T-FT,9,100.0,0.05,86.64,78.35,16.46,2.48


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA-FT,1.0,0.01,69.108,3.332321,56.034,3.837842,53.191,15.067495,9.968,3.768152
H-UNet-BASE-SWTA-FT,1.0,0.02,79.365,2.132228,67.474,2.546203,20.693,4.358541,3.227,0.843734
H-UNet-BASE-SWTA-FT,1.0,0.05,79.324,1.531145,67.333,2.087435,19.674,3.583420,2.907,0.530591
H-UNet-BASE-SWTA-FT,1.0,0.10,82.191,1.638170,70.886,2.286052,15.291,5.426397,1.957,0.634527
H-UNet-BASE-SWTA-FT,1.0,0.20,84.971,1.217873,74.865,1.543937,13.357,5.084979,1.982,0.844812
...,...,...,...,...,...,...,...,...,...,...
H-UNet-Pseudolabeling-SWTA-T-FT,50.0,0.10,88.333,1.383919,80.335,1.877844,14.662,3.586553,2.069,0.535215
H-UNet-Pseudolabeling-SWTA-T-FT,100.0,0.01,74.314,3.960167,63.876,4.651427,54.657,16.876555,10.160,3.588099
H-UNet-Pseudolabeling-SWTA-T-FT,100.0,0.02,80.598,3.135147,71.400,3.559263,31.792,8.662330,5.411,1.846039
H-UNet-Pseudolabeling-SWTA-T-FT,100.0,0.05,85.798,1.539773,77.046,1.968156,19.493,4.574220,2.859,0.712105


<h2>Evaluation - Hebbian Unsupervised Pretraining</h2>

<p>Evaluate Hebbian models pretrained in an unsupervised way over the datasets; only best temperature values for SWTA are considered.</p>

In [51]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

IMAGE_SIZE = 480
INV_TEMP_PH2=100          # to be set accordingly, used by SWTA

runs = {
    'H-UNet-HPCA': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-hpca/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-HPCA-T': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-hpca_t/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-swta/inv_temp-{}/regime-1.0'.format(INV_TEMP_PH2)).glob('run-*')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-swta_t/inv_temp-{}/regime-1.0'.format(INV_TEMP_PH2)).glob('run-*')),
}

In [52]:
# Collect predictions scanning runs
predictions = pd.concat([collect_one(k, r, 'preds_from_last.csv', image_size=IMAGE_SIZE) for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_one(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [53]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

  mean_value = np.nanmean(values)


Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,100.0,1.0,49.54,34.06,128.33,18.33
1,H-UNet-BASE-SWTA-T,0,100.0,1.0,53.49,37.81,92.96,12.72
2,H-UNet-HPCA,0,1.0,1.0,21.33,12.19,246.21,86.46
3,H-UNet-HPCA-T,0,1.0,1.0,21.98,12.62,,


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])
  lower_bound = _a * scale + loc
  upper_bound = _b * scale + loc
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA,100.0,1.0,49.54,,34.06,,128.33,,18.33,
H-UNet-BASE-SWTA-T,100.0,1.0,53.49,,37.81,,92.96,,12.72,
H-UNet-HPCA,1.0,1.0,21.33,,12.19,,246.21,,86.46,
H-UNet-HPCA-T,1.0,1.0,21.98,,12.62,,,,,


<h2>Evaluation - Data regime variations</h2>

<p>Evaluate Hebbian models over the datasets, by varying the quantity of training data; only fine-tuned models are considered since during pre-training we can consider the whole dataset; only best temperature values for SWTA are considered.</p>

In [54]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['0.01', '0.02', '0.05', '0.1', '0.2', '1.0']       # regimes to be considered
IMAGE_SIZE = 480
INV_TEMP_PH2=100          # to be set accordingly, used by SWTA

runs = {
    'UNet': list(Path(EXP_ROOT + '/experiment=ph2/').glob('unet_base')),
    #'UNet-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=ph2').glob('unet')),
    #'H-UNet-HPCA-FT': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-hpca_ft')),
    #'H-UNet-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-hpca_t_ft')),
    #'H-UNet-Pseudolabeling-HPCA-FT': list(Path(EXP_ROOT + '/experiment=ph2').glob('hunet-hpca_ft')),
    #'H-UNet-Pseudolabeling-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2').glob('hunet-hpca_t_ft')),
}

runs_swta = {
    #'H-UNet-SWTA-FT': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-swta_ft').glob('inv_temp-{}'.format(INV_TEMP_PH2))),
    #'H-UNet-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-swta_t_ft').glob('inv_temp-{}'.format(INV_TEMP_PH2))),
    #'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=ph2/hunet-swta_ft/inv_temp-{}'.format(INV_TEMP_PH2)).glob('regime-*')),
    #'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2/hunet-swta_t_ft/inv_temp-{}'.format(INV_TEMP_PH2)).glob('regime-*')),
}

In [55]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE) for k, v in runs.items() for r in v], ignore_index=True)
if bool([a for a in runs_swta.values() if a != []]):
    predictions_swta = pd.concat([collect_all_temperature(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE) for k, v in runs_swta.items() for r in v], ignore_index=True)
    predictions = pd.concat([predictions, predictions_swta])

In [56]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,UNet,0,1.0,0.01,82.1,71.65,19.81,3.04
1,UNet,0,1.0,0.02,77.74,65.58,23.6,4.04
2,UNet,0,1.0,0.05,74.41,62.28,45.76,7.49
3,UNet,0,1.0,0.1,85.51,75.59,7.23,0.99
4,UNet,0,1.0,0.2,79.33,67.1,14.08,1.87
5,UNet,0,1.0,1.0,89.11,81.31,12.49,1.77
6,UNet,1,1.0,0.01,71.05,60.77,53.91,10.17
7,UNet,1,1.0,0.02,67.47,55.72,67.38,12.53
8,UNet,1,1.0,0.05,73.6,60.44,52.82,7.93
9,UNet,1,1.0,0.1,74.55,61.64,47.74,7.36


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
UNet,1.0,0.01,73.764,2.927977,62.455,3.437301,49.3,13.638165,8.763,2.684645
UNet,1.0,0.02,74.482,3.562861,62.332,4.249428,36.842,10.258153,5.926,1.711405
UNet,1.0,0.05,77.064,2.173455,65.028,2.813769,30.054,7.139774,4.676,1.204499
UNet,1.0,0.1,80.056,2.133722,68.528,2.567291,19.371,6.9621,3.029,1.175288
UNet,1.0,0.2,84.177,1.276208,73.532,1.777131,11.118,3.234734,1.44,0.309052
UNet,1.0,1.0,90.982,0.94072,83.943,1.43154,6.673,3.755556,1.016,0.522339


<h1>TREND Dataset</h1>

<h2>Evaluation - Searching temperature hyperparameter</h2>

<p>Evaluate Hebbian models belonging to SWTA paradigm to search best temperature values (this value is dataset-specific)</p>

In [57]:
# Unsupervised learning models are evaluate over the whole dataset while fine tuned models over a fraction of data

EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['1.0']
IMAGE_SIZE = 480

runs = {
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=trend/').glob('hunet_base-swta')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=trend/').glob('hunet_base-swta_t')),
}

In [58]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds_from_last.csv', regimes=REGIMES, image_size=IMAGE_SIZE, ignore_outliers=False) for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_all(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [59]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

  mean_value = np.nanmean(values)


Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,1.0,1.0,6.23,3.22,,
1,H-UNet-BASE-SWTA,0,5.0,1.0,6.37,3.29,,
2,H-UNet-BASE-SWTA,0,10.0,1.0,6.4,3.31,,
3,H-UNet-BASE-SWTA,0,20.0,1.0,6.46,3.34,,
4,H-UNet-BASE-SWTA,0,50.0,1.0,6.53,3.38,,
5,H-UNet-BASE-SWTA,0,100.0,1.0,6.82,3.54,354.26,113.95
6,H-UNet-BASE-SWTA-T,0,1.0,1.0,6.18,3.19,,
7,H-UNet-BASE-SWTA-T,0,5.0,1.0,6.77,3.51,,
8,H-UNet-BASE-SWTA-T,0,10.0,1.0,6.99,3.62,286.18,84.99
9,H-UNet-BASE-SWTA-T,0,20.0,1.0,7.26,3.77,311.13,105.1


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])
  lower_bound = _a * scale + loc
  upper_bound = _b * scale + loc
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA,1.0,1.0,6.23,,3.22,,,,,
H-UNet-BASE-SWTA,5.0,1.0,6.37,,3.29,,,,,
H-UNet-BASE-SWTA,10.0,1.0,6.4,,3.31,,,,,
H-UNet-BASE-SWTA,20.0,1.0,6.46,,3.34,,,,,
H-UNet-BASE-SWTA,50.0,1.0,6.53,,3.38,,,,,
H-UNet-BASE-SWTA,100.0,1.0,6.82,,3.54,,354.26,,113.95,
H-UNet-BASE-SWTA-T,1.0,1.0,6.18,,3.19,,,,,
H-UNet-BASE-SWTA-T,5.0,1.0,6.77,,3.51,,,,,
H-UNet-BASE-SWTA-T,10.0,1.0,6.99,,3.62,,286.18,,84.99,
H-UNet-BASE-SWTA-T,20.0,1.0,7.26,,3.77,,311.13,,105.1,


In [63]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['0.02', '0.05', '0.1', '0.2']
IMAGE_SIZE = 480

runs = {
    'H-UNet-BASE-SWTA-FT': list(Path(EXP_ROOT + '/experiment=trend/').glob('hunet_base-swta_ft')),
    'H-UNet-BASE-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=trend/').glob('hunet_base-swta_t_ft')),
    'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=trend/').glob('hunet-swta_ft')),
    'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=trend/').glob('hunet-swta_t_ft')),
}

In [64]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE, ignore_outliers=False) for k, v in runs.items() for r in v], ignore_index=True)

In [65]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA-FT,0,1.0,0.05,24.63,14.10,81.74,13.40
1,H-UNet-BASE-SWTA-FT,0,1.0,0.10,34.27,20.72,21.13,3.17
2,H-UNet-BASE-SWTA-FT,0,1.0,0.20,39.78,24.88,15.90,2.35
3,H-UNet-BASE-SWTA-FT,0,10.0,0.02,32.42,19.39,19.35,2.87
4,H-UNet-BASE-SWTA-FT,0,10.0,0.05,34.62,20.99,16.73,2.53
...,...,...,...,...,...,...,...,...
234,H-UNet-BASE-SWTA-T-FT,9,10.0,0.20,43.87,28.14,20.16,2.66
235,H-UNet-BASE-SWTA-T-FT,9,50.0,0.02,28.10,16.38,31.67,4.69
236,H-UNet-BASE-SWTA-T-FT,9,50.0,0.05,34.57,20.94,19.98,2.87
237,H-UNet-BASE-SWTA-T-FT,9,50.0,0.10,39.63,24.77,19.56,2.71


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA-FT,1.0,0.02,22.235556,1.427593,12.565556,0.910657,108.903333,19.957747,17.794444,3.655553
H-UNet-BASE-SWTA-FT,1.0,0.05,21.06,1.638229,11.835,1.027826,122.058,22.305929,20.638,4.362028
H-UNet-BASE-SWTA-FT,1.0,0.1,32.655,1.300713,19.586,0.917924,37.536,10.272998,5.477,1.507512
H-UNet-BASE-SWTA-FT,1.0,0.2,37.639,0.71708,23.254,0.544556,23.33,2.317725,3.303,0.291909
H-UNet-BASE-SWTA-FT,10.0,0.02,29.252,1.311002,17.186,0.895048,29.279,7.717465,4.167,1.031618
H-UNet-BASE-SWTA-FT,10.0,0.05,31.419,0.916741,18.689,0.651194,27.333,4.743971,3.852,0.624862
H-UNet-BASE-SWTA-FT,10.0,0.1,38.846,0.923733,24.161,0.711037,19.146,2.727046,2.681,0.340381
H-UNet-BASE-SWTA-FT,10.0,0.2,43.75,0.72159,28.062,0.593039,17.754,2.889537,2.437,0.331323
H-UNet-BASE-SWTA-FT,50.0,0.02,31.661,0.712383,18.865,0.504691,24.242,3.421773,3.556,0.498048
H-UNet-BASE-SWTA-FT,50.0,0.05,33.352,0.921083,20.067,0.676739,20.785,3.123256,2.961,0.416011


<h2>Evaluation - Hebbian Unsupervised Pretraining</h2>

<p>Evaluate Hebbian models pretrained in an unsupervised way over the datasets; only best temperature values for SWTA are considered.</p>

In [66]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

IMAGE_SIZE = 480
INV_TEMP_TREND=50          # to be set accordingly, used by SWTA

runs = {
    'H-UNet-HPCA': list(Path(EXP_ROOT + '/experiment=trend/hunet_base-hpca/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-HPCA-T': list(Path(EXP_ROOT + '/experiment=trend/hunet_base-hpca_t/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=trend/hunet_base-swta/inv_temp-{}/regime-1.0'.format(INV_TEMP_TREND)).glob('run-*')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=trend/hunet_base-swta_t/inv_temp-{}/regime-1.0'.format(INV_TEMP_TREND)).glob('run-*')),
}

In [67]:
# Collect predictions scanning runs
predictions = pd.concat([collect_one(k, r, 'preds_from_last.csv', image_size=IMAGE_SIZE) for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_one(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [68]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

  mean_value = np.nanmean(values)


Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,50.0,1.0,6.53,3.38,,
1,H-UNet-BASE-SWTA-T,0,50.0,1.0,7.71,4.01,287.52,91.05
2,H-UNet-HPCA,0,1.0,1.0,3.34,1.7,,
3,H-UNet-HPCA-T,0,1.0,1.0,3.25,1.65,,


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])
  lower_bound = _a * scale + loc
  upper_bound = _b * scale + loc
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA,50.0,1.0,6.53,,3.38,,,,,
H-UNet-BASE-SWTA-T,50.0,1.0,7.71,,4.01,,287.52,,91.05,
H-UNet-HPCA,1.0,1.0,3.34,,1.7,,,,,
H-UNet-HPCA-T,1.0,1.0,3.25,,1.65,,,,,


<h2>Evaluation - Data regime variations</h2>

<p>Evaluate Hebbian models over the datasets, by varying the quantity of training data; only fine-tuned models are considered since during pre-training we can consider the whole dataset; only best temperature values for SWTA are considered.</p>

In [70]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['0.02', '0.05', '0.1', '0.2', '1.0']       # regimes to be considered
IMAGE_SIZE = 480
INV_TEMP_TREND=1          # to be set accordingly, used by SWTA

runs = {
    'UNet': list(Path(EXP_ROOT + '/experiment=trend/').glob('unet_base')),
    #'UNet-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=trend').glob('unet')),
    #'H-UNet-HPCA-FT': list(Path(EXP_ROOT + '/experiment=trend/').glob('hunet_base-hpca_ft')),
    #'H-UNet-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=trend/').glob('hunet_base-hpca_t_ft')),
    #'H-UNet-Pseudolabeling-HPCA-FT': list(Path(EXP_ROOT + '/experiment=trend').glob('hunet-hpca_ft')),
    #'H-UNet-Pseudolabeling-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=trend').glob('hunet-hpca_t_ft')),
}

runs_swta = {
    #'H-UNet-SWTA-FT': list(Path(EXP_ROOT + '/experiment=trend/hunet_base-swta_ft').glob('inv_temp-{}'.format(INV_TEMP_TREND))),
    #'H-UNet-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=trend/hunet_base-swta_t_ft').glob('inv_temp-{}'.format(INV_TEMP_TREND))),
    #'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=trend/hunet-swta_ft/inv_temp-{}'.format(INV_TEMP_TREND)).glob('regime-*')),
    #'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=trend/hunet-swta_t_ft/inv_temp-{}'.format(INV_TEMP_TREND)).glob('regime-*')),
}

In [71]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE, ignore_outliers=True) for k, v in runs.items() for r in v], ignore_index=True)
if bool([a for a in runs_swta.values() if a != []]):
    predictions_swta = pd.concat([collect_all_temperature(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE, ignore_outliers=True) for k, v in runs_swta.items() for r in v], ignore_index=True)
    predictions = pd.concat([predictions, predictions_swta])

In [72]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,UNet,0,1.0,0.02,38.59,23.96,16.74,2.42
1,UNet,0,1.0,0.05,38.23,23.71,28.43,4.04
2,UNet,0,1.0,0.1,40.95,25.82,13.66,2.13
3,UNet,0,1.0,0.2,42.9,27.36,14.12,2.17
4,UNet,0,1.0,1.0,50.15,33.5,13.78,1.83
5,UNet,1,1.0,0.02,34.92,21.2,45.17,6.23
6,UNet,1,1.0,0.05,32.14,19.19,46.9,6.76
7,UNet,1,1.0,0.1,38.85,24.18,22.36,3.15
8,UNet,1,1.0,0.2,43.34,27.78,11.95,1.97
9,UNet,1,1.0,1.0,49.58,33.03,14.92,2.03


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
UNet,1.0,0.02,35.353333,1.407369,21.547778,1.038271,42.168889,9.78249,6.084444,1.504435
UNet,1.0,0.05,35.519,1.392105,21.681,1.030263,38.121,4.276814,5.326,0.593534
UNet,1.0,0.1,38.558,1.208072,23.968,0.934096,26.957,6.056765,3.769,0.788407
UNet,1.0,0.2,42.659,0.706153,27.179,0.577785,18.148,2.56662,2.594,0.30004
UNet,1.0,1.0,50.393,0.632294,33.748,0.572224,13.194,1.413341,1.785,0.142164
