In [1]:
import pandas as pd
import math
import numpy as np

%matplotlib inline

from pathlib import Path
from omegaconf import OmegaConf
import scipy.stats as st

In [2]:
# Collect predictions

def check_num_epochs(run):
    if Path(run / "valid_log.csv").is_file():
        cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
        cfg = OmegaConf.to_container(cfg)
        num_epochs = cfg['optim']['epochs']

        valid_log = pd.read_csv(run / "valid_log.csv", header=None, index_col=0)
        if (len(valid_log.index)-3) < num_epochs:
            print("Wrong number of epochs in run: {}".format(run))
    else:
        print("valid_log.csv not exists in run: {}".format(run))

def check_only_one_tensorboard(run):
    len(list(Path('.').glob('*')))
    if len(list(Path(run / "runs").glob('*'))) > 1:
        print("More than 1 tensorboard folder in run: {}".format(run))

def check_image_size(run, image_size=480):
    cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
    cfg = OmegaConf.to_container(cfg)
    conf_image_size = cfg['data']['image_size']
    if int(conf_image_size) != image_size:
        print("Different image size in conf. Image size in conf: {}, Image size: {}".format(conf_image_size, image_size))

def collect_one(model_name, run, csv_file, image_size=480):
    check_num_epochs(run)
    check_only_one_tensorboard(run)
    check_image_size(run, image_size=image_size)
    
    cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
    cfg = OmegaConf.to_container(cfg)

    run_number = cfg['data']['train']['cross_val_bucket_validation_index']
    regime, inv_temp = float(run.parent.parts[-1].rsplit('-', 1)[1]), float(run.parent.parts[-2].rsplit('-', 1)[1])

    csv_path = run / 'test_predictions' / csv_file
    if not csv_path.exists():
        print(f'Skipping not found: {csv_path}')
        return pd.DataFrame()
    
    data = pd.read_csv(csv_path)
    if data.empty:
        print(f'Pred file is empty: {csv_path}')

    data['model'] = model_name
    data['run_number'] = run_number
    data['inv_temp'] = inv_temp
    data['regime'] = regime
    
    return data

def collect_all(model_name, root, csv_file, regimes=['0.01', '0.02', '0.05', '0.1', '0.2', '0.25', '1.0'], image_size=480):
    root = Path(root)

    metrics = []
    for inv_temp in list(root.glob("inv_temp-*")):
        for regime in list(inv_temp.glob("regime-*")):
            if regime.name.rsplit("-", 1)[1] in regimes:
                for run in list(regime.glob("run-*")):
                    metrics.append(collect_one(model_name, run, csv_file, image_size=image_size))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

def collect_all_temperature(model_name, root, csv_file, regimes=['0.01', '0.02', '0.05', '0.1', '0.2', '0.25', '1.0'], image_size=480):
    root = Path(root)

    metrics = []
    for regime in list(root.glob("regime-*")):
        if regime.name.rsplit("-", 1)[1] in regimes:
            for run in list(regime.glob("run-*")):
                metrics.append(collect_one(model_name, run, csv_file, image_size=image_size))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

In [3]:
# Compute metrics for each detected run

def compute_metrics(data, grouping, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance']):
    metrics_dict_names = {
        'Dice': 'dice',
        'Jaccard': 'jaccard',
        'Hausdorff Distance': '95hd',
        'Average Surface Distance': 'asd'
    }

    columns = ['Model', '# Run', 'Inv Temp', 'Regime']
    columns.extend(metric_names)
    metrics = []
    
    data = data.copy().reset_index()
    grouped = data.groupby(grouping)
    
    for model_group, predictions in grouped:
        model_name, run_number, inv_temp, regime = model_group[0], model_group[1], model_group[2], model_group[3]
        
        metric_values = []
        for metric_name in metric_names:
            values = predictions['segm/{}'.format(metrics_dict_names[metric_name])].values
            mean_value = np.nanmean(values)
            if metric_name == 'Dice' or metric_name == 'Jaccard':
                metric_values.append((math.ceil(mean_value*10000)/10000)*100)
            else:
                if not np.isnan(mean_value):
                    metric_values.append(math.ceil(mean_value*100)/100)
                else:
                    metric_values.append(mean_value)
        
        metrics.append([model_name, run_number, inv_temp, regime, *metric_values])
        
    metrics_df = pd.DataFrame(metrics, columns=columns)
    
    return metrics_df

def summarize_metrics(metrics, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance'], confidence_level=0.90):

    def compute_ci(values):
        ci = st.t.interval(confidence_level, len(values)-1, loc=np.mean(values), scale=np.std(values)/(len(values)**0.5))

        return ci

    mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
                
    return mean_metrics

<h1>GlaS Dataset</h1>

<h2>Evaluation - Searching temperature hyperparameter</h2>

<p>Evaluate Hebbian models belonging to SWTA paradigm to search best temperature values (this value is dataset-specific)</p>

In [18]:
# Unsupervised learning models are evaluate over the whole dataset while fine tuned models over a fraction of data

EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['1.0']
IMAGE_SIZE = 480

runs = {
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t')),
}

In [19]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds_from_last.csv', regimes=REGIMES, image_size=IMAGE_SIZE) for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_all(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [20]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,1.0,1.0,52.31,36.16,41.72,8.55
1,H-UNet-BASE-SWTA,0,5.0,1.0,49.74,33.87,42.4,8.16
2,H-UNet-BASE-SWTA,0,10.0,1.0,45.79,30.24,45.59,8.88
3,H-UNet-BASE-SWTA,0,20.0,1.0,49.81,33.74,38.32,7.2
4,H-UNet-BASE-SWTA,0,50.0,1.0,49.98,34.01,42.11,7.88
5,H-UNet-BASE-SWTA,0,100.0,1.0,49.48,33.46,40.05,7.27
6,H-UNet-BASE-SWTA-T,0,1.0,1.0,45.6,30.22,44.54,9.98
7,H-UNet-BASE-SWTA-T,0,5.0,1.0,48.07,32.26,54.12,10.52
8,H-UNet-BASE-SWTA-T,0,10.0,1.0,49.61,33.6,42.02,8.29
9,H-UNet-BASE-SWTA-T,0,20.0,1.0,50.11,34.03,42.04,7.84


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
  lower_bound = _a * scale + loc
  upper_bound = _b * scale + loc
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Dice,Jaccard,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
H-UNet-BASE-SWTA,1.0,1.0,52.31,,"(nan, nan)",36.16,,"(nan, nan)",41.72,,"(nan, nan)",8.55,,"(nan, nan)"
H-UNet-BASE-SWTA,5.0,1.0,49.74,,"(nan, nan)",33.87,,"(nan, nan)",42.4,,"(nan, nan)",8.16,,"(nan, nan)"
H-UNet-BASE-SWTA,10.0,1.0,45.79,,"(nan, nan)",30.24,,"(nan, nan)",45.59,,"(nan, nan)",8.88,,"(nan, nan)"
H-UNet-BASE-SWTA,20.0,1.0,49.81,,"(nan, nan)",33.74,,"(nan, nan)",38.32,,"(nan, nan)",7.2,,"(nan, nan)"
H-UNet-BASE-SWTA,50.0,1.0,49.98,,"(nan, nan)",34.01,,"(nan, nan)",42.11,,"(nan, nan)",7.88,,"(nan, nan)"
H-UNet-BASE-SWTA,100.0,1.0,49.48,,"(nan, nan)",33.46,,"(nan, nan)",40.05,,"(nan, nan)",7.27,,"(nan, nan)"
H-UNet-BASE-SWTA-T,1.0,1.0,45.6,,"(nan, nan)",30.22,,"(nan, nan)",44.54,,"(nan, nan)",9.98,,"(nan, nan)"
H-UNet-BASE-SWTA-T,5.0,1.0,48.07,,"(nan, nan)",32.26,,"(nan, nan)",54.12,,"(nan, nan)",10.52,,"(nan, nan)"
H-UNet-BASE-SWTA-T,10.0,1.0,49.61,,"(nan, nan)",33.6,,"(nan, nan)",42.02,,"(nan, nan)",8.29,,"(nan, nan)"
H-UNet-BASE-SWTA-T,20.0,1.0,50.11,,"(nan, nan)",34.03,,"(nan, nan)",42.04,,"(nan, nan)",7.84,,"(nan, nan)"


In [21]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['0.01', '0.02', '0.05', '0.1', '0.2']
IMAGE_SIZE = 480

runs = {
    'H-UNet-BASE-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft')),
    'H-UNet-BASE-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ft')),
    'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_ft')),
    'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_t_ft')),
}

In [22]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE) for k, v in runs.items() for r in v], ignore_index=True)

In [23]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA-FT,0,1.0,0.2,78.29,65.23,25.62,3.60
1,H-UNet-BASE-SWTA-FT,0,5.0,0.2,79.96,67.77,27.76,3.96
2,H-UNet-BASE-SWTA-FT,0,10.0,0.2,81.71,70.05,23.32,3.45
3,H-UNet-BASE-SWTA-FT,0,20.0,0.2,81.64,69.71,19.47,2.40
4,H-UNet-BASE-SWTA-FT,0,50.0,0.2,81.50,69.69,22.19,3.04
...,...,...,...,...,...,...,...,...
115,H-UNet-BASE-SWTA-T-FT,9,5.0,0.2,80.07,68.01,22.94,3.58
116,H-UNet-BASE-SWTA-T-FT,9,10.0,0.2,79.62,67.11,26.51,3.54
117,H-UNet-BASE-SWTA-T-FT,9,20.0,0.2,80.61,68.63,28.82,3.85
118,H-UNet-BASE-SWTA-T-FT,9,50.0,0.2,79.22,66.88,27.03,3.87


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Dice,Jaccard,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
H-UNet-BASE-SWTA-FT,1.0,0.2,77.89,1.997649,"(76.79142534517122, 78.98857465482875)",64.949,2.580833,"(63.5297125042603, 66.36828749573972)",28.68,3.489909,"(26.76078065864988, 30.599219341350118)",4.102,0.595871,"(3.77431036130307, 4.429689638696931)"
H-UNet-BASE-SWTA-FT,5.0,0.2,79.583,1.966842,"(78.50136690258473, 80.66463309741526)",67.351,2.601664,"(65.92025694139619, 68.78174305860378)",26.779,5.198413,"(23.920216508224204, 29.637783491775803)",3.807,0.716505,"(3.4129698273674687, 4.201030172632532)"
H-UNet-BASE-SWTA-FT,10.0,0.2,79.743,1.593606,"(78.86662227563097, 80.61937772436904)",67.467,2.118732,"(66.30183774464376, 68.63216225535623)",26.086,5.496421,"(23.063331620360085, 29.10866837963992)",3.717,0.72014,"(3.3209708099570974, 4.113029190042903)"
H-UNet-BASE-SWTA-FT,20.0,0.2,80.649,1.280663,"(79.94471977663792, 81.35328022336208)",68.642,1.708799,"(67.70227328943477, 69.58172671056522)",24.566,4.580646,"(22.04694770661536, 27.08505229338464)",3.346,0.760675,"(2.92767896969929, 3.7643210303007097)"
H-UNet-BASE-SWTA-FT,50.0,0.2,80.322,1.563833,"(79.4619953069681, 81.18200469303187)",68.282,2.008149,"(67.17765085441276, 69.38634914558723)",25.608,2.128227,"(24.437615746375602, 26.778384253624406)",3.521,0.304939,"(3.3533037270050436, 3.6886962729949553)"
H-UNet-BASE-SWTA-FT,100.0,0.2,80.417,2.325621,"(79.13806231670831, 81.69593768329169)",68.408,3.034636,"(66.73915065818034, 70.07684934181964)",25.24,4.685384,"(22.663348657538734, 27.81665134246127)",3.545,0.652521,"(3.186156730555101, 3.9038432694448977)"
H-UNet-BASE-SWTA-T-FT,1.0,0.2,78.21,1.914233,"(77.15729828867464, 79.26270171132538)",65.403,2.549466,"(64.00096205510367, 66.80503794489631)",28.162,5.176704,"(25.31515519527983, 31.00884480472017)",4.079,0.975676,"(3.5424428031056947, 4.615557196894306)"
H-UNet-BASE-SWTA-T-FT,5.0,0.2,80.459,1.78652,"(79.47653201208844, 81.4414679879116)",68.528,2.44121,"(67.18549586795967, 69.87050413204031)",25.713,3.538424,"(23.767100671812912, 27.65889932818709)",3.632,0.391828,"(3.4165207131473756, 3.8474792868526246)"
H-UNet-BASE-SWTA-T-FT,10.0,0.2,81.042,1.461079,"(80.23850329481239, 81.84549670518761)",69.245,1.992058,"(68.149499902368, 70.340500097632)",24.659,4.680369,"(22.085106489441475, 27.232893510558522)",3.455,0.584964,"(3.1333082654553177, 3.7766917345446815)"
H-UNet-BASE-SWTA-T-FT,20.0,0.2,81.511,2.092566,"(80.36022709270742, 82.66177290729257)",69.898,2.830684,"(68.34131082693528, 71.45468917306471)",25.0,4.665269,"(22.43441060747837, 27.56558939252163)",3.445,0.671669,"(3.075626606905211, 3.8143733930947876)"


<h2>Evaluation - Hebbian Unsupervised Pretraining</h2>

<p>Evaluate Hebbian models pretrained in an unsupervised way over the datasets; only best temperature values for SWTA are considered.</p>

In [24]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

IMAGE_SIZE = 480
INV_TEMP_GlaS=1          # to be set accordingly, used by SWTA

runs = {
    'H-UNet-HPCA': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-HPCA-T': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run-*')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run-*')),
}

In [25]:
# Collect predictions scanning runs
predictions = pd.concat([collect_one(k, r, 'preds_from_last.csv', image_size=IMAGE_SIZE) for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_one(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [26]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

  mean_value = np.nanmean(values)


Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,1.0,1.0,52.31,36.16,41.72,8.55
1,H-UNet-BASE-SWTA-T,0,1.0,1.0,45.6,30.22,44.54,9.98
2,H-UNet-HPCA,0,1.0,1.0,33.72,20.47,,
3,H-UNet-HPCA-T,0,1.0,1.0,34.84,21.3,,


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
  lower_bound = _a * scale + loc
  upper_bound = _b * scale + loc
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Dice,Jaccard,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
H-UNet-BASE-SWTA,1.0,1.0,52.31,,"(nan, nan)",36.16,,"(nan, nan)",41.72,,"(nan, nan)",8.55,,"(nan, nan)"
H-UNet-BASE-SWTA-T,1.0,1.0,45.6,,"(nan, nan)",30.22,,"(nan, nan)",44.54,,"(nan, nan)",9.98,,"(nan, nan)"
H-UNet-HPCA,1.0,1.0,33.72,,"(nan, nan)",20.47,,"(nan, nan)",,,"(nan, nan)",,,"(nan, nan)"
H-UNet-HPCA-T,1.0,1.0,34.84,,"(nan, nan)",21.3,,"(nan, nan)",,,"(nan, nan)",,,"(nan, nan)"


<h2>Evaluation - Data regime variations</h2>

<p>Evaluate Hebbian models over the datasets, by varying the quantity of training data; only fine-tuned models are considered since during pre-training we can consider the whole dataset; only best temperature values for SWTA are considered.</p>

In [27]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['0.01', '0.02', '0.05', '0.1', '0.2']       # regimes to be considered
IMAGE_SIZE = 480
INV_TEMP_GlaS=20          # to be set accordingly, used by SWTA

runs = {
    'UNet': list(Path(EXP_ROOT + '/experiment=glas/').glob('unet_base')),
    'UNet-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=glas').glob('unet')),
    'H-UNet-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-hpca_ft')),
    'H-UNet-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-hpca_t_ft')),
    'H-UNet-Pseudolabeling-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas').glob('hunet-hpca_ft')),
    'H-UNet-Pseudolabeling-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas').glob('hunet-hpca_t_ft')),
}

runs_swta = {
    'H-UNet-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_ft').glob('inv_temp-{}'.format(INV_TEMP_GlaS))),
    'H-UNet-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t_ft').glob('inv_temp-{}'.format(INV_TEMP_GlaS))),
    'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_t_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
}

In [28]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE) for k, v in runs.items() for r in v], ignore_index=True)
if bool([a for a in runs_swta.values() if a != []]):
    predictions_swta = pd.concat([collect_all_temperature(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE) for k, v in runs_swta.items() for r in v], ignore_index=True)
    predictions = pd.concat([predictions, predictions_swta])

In [29]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics, confidence_level=0.90)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-HPCA-FT,0,1.0,0.01,67.58,52.73,58.18,8.92
1,H-UNet-HPCA-FT,0,1.0,0.02,67.91,52.48,57.22,7.73
2,H-UNet-HPCA-FT,0,1.0,0.05,68.29,52.91,38.88,5.62
3,H-UNet-HPCA-FT,0,1.0,0.10,75.15,61.12,26.50,4.05
4,H-UNet-HPCA-FT,0,1.0,0.20,78.65,65.73,26.27,3.62
...,...,...,...,...,...,...,...,...
315,UNet-Pseudolabeling,9,1.0,0.01,67.13,52.14,59.80,8.74
316,UNet-Pseudolabeling,9,1.0,0.02,66.40,51.12,61.72,9.10
317,UNet-Pseudolabeling,9,1.0,0.05,65.88,50.97,61.57,9.49
318,UNet-Pseudolabeling,9,1.0,0.10,66.22,50.99,59.10,8.89


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Dice,Jaccard,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
H-UNet-HPCA-FT,1.0,0.01,65.402,2.947944,"(63.7808257647174, 67.02317423528257)",50.573,3.060233,"(48.89007443411823, 52.25592556588175)",54.423,5.868261,"(51.195844327734186, 57.65015567226582)",8.925,1.283686,"(8.219057523300666, 9.630942476699335)"
H-UNet-HPCA-FT,1.0,0.02,66.603,3.171295,"(64.85899741541225, 68.34700258458774)",51.606,3.528186,"(49.665731227319654, 53.546268772680335)",51.731,6.843294,"(47.967640623442215, 55.494359376557775)",8.169,1.065942,"(7.582802412583565, 8.755197587416436)"
H-UNet-HPCA-FT,1.0,0.05,71.776,2.84384,"(70.21207596556175, 73.33992403443824)",57.443,3.66965,"(55.424934889092945, 59.461065110907064)",38.839,6.625334,"(35.19550420936117, 42.48249579063883)",5.823,1.133775,"(5.19949876274309, 6.446501237256911)"
H-UNet-HPCA-FT,1.0,0.1,76.394,2.208781,"(75.17931623990002, 77.60868376009999)",63.169,2.891491,"(61.57887104962521, 64.75912895037479)",32.965,8.215582,"(28.446973258715243, 37.483026741284746)",4.866,1.226804,"(4.19133872949797, 5.54066127050203)"
H-UNet-HPCA-FT,1.0,0.2,79.593,1.425997,"(78.80879587285366, 80.37720412714631)",67.142,1.964755,"(66.06151465829424, 68.22248534170578)",25.142,4.265947,"(22.79601103094982, 27.48798896905018)",3.466,0.615904,"(3.1272935055894875, 3.804706494410512)"
H-UNet-HPCA-T-FT,1.0,0.01,64.482,6.466331,"(60.9259453606129, 68.0380546393871)",49.852,6.638423,"(46.201306188810165, 53.50269381118984)",53.731,6.106298,"(50.372939786542936, 57.08906021345705)",9.0,1.700007,"(8.065108810014472, 9.934891189985528)"
H-UNet-HPCA-T-FT,1.0,0.02,67.391,2.127061,"(66.2212571847747, 68.56074281522528)",52.533,2.284776,"(51.2765243010328, 53.78947569896719)",49.617,5.46305,"(46.61268387301691, 52.62131612698308)",7.781,1.038144,"(7.210089344664822, 8.351910655335175)"
H-UNet-HPCA-T-FT,1.0,0.05,70.056,4.095038,"(67.80399976024543, 72.30800023975459)",55.534,4.717191,"(52.93985671156759, 58.12814328843242)",44.567,8.627029,"(39.82270466322096, 49.31129533677904)",6.712,1.644315,"(5.807735219249708, 7.616264780750289)"
H-UNet-HPCA-T-FT,1.0,0.1,76.17,2.022765,"(75.05761305917726, 77.28238694082275)",62.813,2.617043,"(61.37379958291248, 64.25220041708752)",32.908,7.487005,"(28.790642434427397, 37.025357565572605)",4.749,1.157415,"(4.112498155508077, 5.385501844491923)"
H-UNet-HPCA-T-FT,1.0,0.2,79.974,1.944258,"(78.90478685171041, 81.0432131482896)",67.762,2.697887,"(66.27834040328244, 69.24565959671759)",26.488,5.306534,"(23.569756983942703, 29.406243016057296)",3.581,0.726735,"(3.1813439633848493, 3.9806560366151498)"


<h1>PH2 Dataset</h1>

<h2>Evaluation - Searching temperature hyperparameter</h2>

<p>Evaluate Hebbian models belonging to SWTA paradigm to search best temperature values (this value is dataset-specific)</p>

In [4]:
# Unsupervised learning models are evaluate over the whole dataset while fine tuned models over a fraction of data

EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['1.0']
IMAGE_SIZE = 480

runs = {
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-swta')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-swta_t')),
}

In [5]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds_from_last.csv', regimes=REGIMES, image_size=IMAGE_SIZE) for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_all(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [6]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,1.0,1.0,38.46,24.57,88.37,19.27
1,H-UNet-BASE-SWTA,0,5.0,1.0,48.69,33.11,128.4,17.75
2,H-UNet-BASE-SWTA,0,10.0,1.0,41.5,26.8,128.04,21.26
3,H-UNet-BASE-SWTA,0,20.0,1.0,50.86,35.08,118.39,16.46
4,H-UNet-BASE-SWTA,0,50.0,1.0,51.85,36.25,67.03,10.53
5,H-UNet-BASE-SWTA,0,100.0,1.0,49.54,34.06,128.33,18.33
6,H-UNet-BASE-SWTA-T,0,1.0,1.0,39.18,25.08,142.65,29.84
7,H-UNet-BASE-SWTA-T,0,5.0,1.0,47.29,31.72,128.85,18.26
8,H-UNet-BASE-SWTA-T,0,10.0,1.0,51.08,35.17,118.49,16.77
9,H-UNet-BASE-SWTA-T,0,20.0,1.0,45.4,30.08,128.75,20.37


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
  lower_bound = _a * scale + loc
  upper_bound = _b * scale + loc
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Dice,Jaccard,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
H-UNet-BASE-SWTA,1.0,1.0,38.46,,"(nan, nan)",24.57,,"(nan, nan)",88.37,,"(nan, nan)",19.27,,"(nan, nan)"
H-UNet-BASE-SWTA,5.0,1.0,48.69,,"(nan, nan)",33.11,,"(nan, nan)",128.4,,"(nan, nan)",17.75,,"(nan, nan)"
H-UNet-BASE-SWTA,10.0,1.0,41.5,,"(nan, nan)",26.8,,"(nan, nan)",128.04,,"(nan, nan)",21.26,,"(nan, nan)"
H-UNet-BASE-SWTA,20.0,1.0,50.86,,"(nan, nan)",35.08,,"(nan, nan)",118.39,,"(nan, nan)",16.46,,"(nan, nan)"
H-UNet-BASE-SWTA,50.0,1.0,51.85,,"(nan, nan)",36.25,,"(nan, nan)",67.03,,"(nan, nan)",10.53,,"(nan, nan)"
H-UNet-BASE-SWTA,100.0,1.0,49.54,,"(nan, nan)",34.06,,"(nan, nan)",128.33,,"(nan, nan)",18.33,,"(nan, nan)"
H-UNet-BASE-SWTA-T,1.0,1.0,39.18,,"(nan, nan)",25.08,,"(nan, nan)",142.65,,"(nan, nan)",29.84,,"(nan, nan)"
H-UNet-BASE-SWTA-T,5.0,1.0,47.29,,"(nan, nan)",31.72,,"(nan, nan)",128.85,,"(nan, nan)",18.26,,"(nan, nan)"
H-UNet-BASE-SWTA-T,10.0,1.0,51.08,,"(nan, nan)",35.17,,"(nan, nan)",118.49,,"(nan, nan)",16.77,,"(nan, nan)"
H-UNet-BASE-SWTA-T,20.0,1.0,45.4,,"(nan, nan)",30.08,,"(nan, nan)",128.75,,"(nan, nan)",20.37,,"(nan, nan)"


In [10]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['0.01', '0.02', '0.05', '0.1', '0.2']
IMAGE_SIZE = 480

runs = {
    'H-UNet-BASE-SWTA-FT': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-swta_ft')),
    'H-UNet-BASE-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-swta_t_ft')),
    'H-UNet-Pseudolabeling-SWTA-T': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet-swta_t')),
    'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet-swta_t_ft')),
}

In [None]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE) for k, v in runs.items() for r in v], ignore_index=True)

In [None]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

<h2>Evaluation - Hebbian Unsupervised Pretraining</h2>

<p>Evaluate Hebbian models pretrained in an unsupervised way over the datasets; only best temperature values for SWTA are considered.</p>

In [12]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

IMAGE_SIZE = 480
INV_TEMP_PH2=100          # to be set accordingly, used by SWTA

runs = {
    'H-UNet-HPCA': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-hpca/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-HPCA-T': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-hpca_t/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-swta/inv_temp-{}/regime-1.0'.format(INV_TEMP_PH2)).glob('run-*')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-swta_t/inv_temp-{}/regime-1.0'.format(INV_TEMP_PH2)).glob('run-*')),
}

In [13]:
# Collect predictions scanning runs
predictions = pd.concat([collect_one(k, r, 'preds_from_last.csv', image_size=IMAGE_SIZE) for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_one(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [14]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

  mean_value = np.nanmean(values)


Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,100.0,1.0,49.54,34.06,128.33,18.33
1,H-UNet-BASE-SWTA-T,0,100.0,1.0,53.49,37.81,92.96,12.72
2,H-UNet-HPCA,0,1.0,1.0,21.33,12.19,246.21,86.46
3,H-UNet-HPCA-T,0,1.0,1.0,21.98,12.62,,


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
  lower_bound = _a * scale + loc
  upper_bound = _b * scale + loc
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Dice,Jaccard,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
H-UNet-BASE-SWTA,100.0,1.0,49.54,,"(nan, nan)",34.06,,"(nan, nan)",128.33,,"(nan, nan)",18.33,,"(nan, nan)"
H-UNet-BASE-SWTA-T,100.0,1.0,53.49,,"(nan, nan)",37.81,,"(nan, nan)",92.96,,"(nan, nan)",12.72,,"(nan, nan)"
H-UNet-HPCA,1.0,1.0,21.33,,"(nan, nan)",12.19,,"(nan, nan)",246.21,,"(nan, nan)",86.46,,"(nan, nan)"
H-UNet-HPCA-T,1.0,1.0,21.98,,"(nan, nan)",12.62,,"(nan, nan)",,,"(nan, nan)",,,"(nan, nan)"


<h2>Evaluation - Data regime variations</h2>

<p>Evaluate Hebbian models over the datasets, by varying the quantity of training data; only fine-tuned models are considered since during pre-training we can consider the whole dataset; only best temperature values for SWTA are considered.</p>

In [15]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['0.01', '0.02', '0.05', '0.1', '0.2']       # regimes to be considered
IMAGE_SIZE = 480
INV_TEMP_PH2=20          # to be set accordingly, used by SWTA

runs = {
    'UNet': list(Path(EXP_ROOT + '/experiment=ph2/').glob('unet_base')),
    'UNet-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=ph2').glob('unet')),
    'H-UNet-HPCA-FT': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-hpca_ft')),
    'H-UNet-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-hpca_t_ft')),
    'H-UNet-Pseudolabeling-HPCA-FT': list(Path(EXP_ROOT + '/experiment=ph2').glob('hunet-hpca_ft')),
    'H-UNet-Pseudolabeling-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2').glob('hunet-hpca_t_ft')),
}

runs_swta = {
    'H-UNet-SWTA-FT': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-swta_ft').glob('inv_temp-{}'.format(INV_TEMP_PH2))),
    'H-UNet-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-swta_t_ft').glob('inv_temp-{}'.format(INV_TEMP_PH2))),
    'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=ph2/hunet-swta_ft/inv_temp-{}'.format(INV_TEMP_PH2)).glob('regime-*')),
    'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2/hunet-swta_t_ft/inv_temp-{}'.format(INV_TEMP_PH2)).glob('regime-*')),
}

In [16]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE) for k, v in runs.items() for r in v], ignore_index=True)
if bool([a for a in runs_swta.values() if a != []]):
    predictions_swta = pd.concat([collect_all_temperature(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE) for k, v in runs_swta.items() for r in v], ignore_index=True)
    predictions = pd.concat([predictions, predictions_swta])

In [17]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-HPCA-FT,0,1.0,0.01,62.41,49.19,63.41,14.78
1,H-UNet-HPCA-FT,0,1.0,0.02,79.86,68.31,12.28,1.90
2,H-UNet-HPCA-FT,0,1.0,0.05,75.04,61.78,38.92,6.86
3,H-UNet-HPCA-FT,0,1.0,0.10,85.94,76.63,11.83,1.69
4,H-UNet-HPCA-FT,0,1.0,0.20,87.72,78.98,9.28,1.27
...,...,...,...,...,...,...,...,...
295,UNet-Pseudolabeling,9,1.0,0.01,61.79,46.94,69.47,11.93
296,UNet-Pseudolabeling,9,1.0,0.02,69.18,57.83,74.87,11.71
297,UNet-Pseudolabeling,9,1.0,0.05,70.11,57.25,111.20,16.01
298,UNet-Pseudolabeling,9,1.0,0.10,83.72,74.73,20.54,3.75


  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
  mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Dice,Jaccard,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%,Mean,STD,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
H-UNet-HPCA-FT,1.0,0.01,72.647,5.08562,"(69.85024506434183, 75.44375493565818)",60.605,5.39862,"(57.63611613408053, 63.57388386591947)",44.342,14.640274,"(36.29081741213478, 52.39318258786521)",8.257,3.773825,"(6.181645664160064, 10.332354335839934)"
H-UNet-HPCA-FT,1.0,0.02,69.03,10.023519,"(63.51772730190882, 74.54227269809118)",56.269,10.734367,"(50.36580797688917, 62.1721920231108)",50.207,31.880058,"(32.675076277322674, 67.73892372267733)",9.172,5.850682,"(5.954511533070629, 12.389488466929372)"
H-UNet-HPCA-FT,1.0,0.05,77.375,4.425128,"(74.94147198563917, 79.8085280143608)",64.93,5.494179,"(61.908564970573266, 67.95143502942672)",28.723,12.30904,"(21.953841830302558, 35.49215816969743)",4.597,2.069531,"(3.458895039363111, 5.735104960636888)"
H-UNet-HPCA-FT,1.0,0.1,82.307,3.323546,"(80.47926956501774, 84.13473043498223)",71.197,4.672309,"(68.62753919669144, 73.76646080330853)",15.8,7.633443,"(11.60211134820273, 19.997888651797272)",2.231,1.091477,"(1.6307600530744746, 2.831239946925526)"
H-UNet-HPCA-FT,1.0,0.2,86.187,2.229205,"(84.9610844315254, 87.41291556847459)",76.595,3.035253,"(74.92581151100882, 78.26418848899118)",13.323,9.92541,"(7.864680964096131, 18.781319035903863)",1.84,1.250911,"(1.1520817818588562, 2.5279182181411435)"
H-UNet-HPCA-T-FT,1.0,0.01,63.658,9.259468,"(58.56590473492478, 68.75009526507523)",50.358,9.667031,"(45.041772247290815, 55.67422775270919)",59.774,29.560669,"(43.517586530513555, 76.03041346948643)",11.877,7.903759,"(7.530455329721507, 16.22354467027849)"
H-UNet-HPCA-T-FT,1.0,0.02,72.936,6.932417,"(69.12362906318148, 76.7483709368185)",60.677,7.247188,"(56.6915257651701, 64.6624742348299)",38.666,19.640336,"(27.865114046612426, 49.46688595338756)",6.835,4.339714,"(4.44844430202496, 9.221555697975042)"
H-UNet-HPCA-T-FT,1.0,0.05,74.626,2.971592,"(72.9918207910755, 76.2601792089245)",62.247,3.476704,"(60.33504291177728, 64.15895708822272)",31.519,9.194081,"(26.462863493811692, 36.5751365061883)",5.278,1.785079,"(4.296324636715334, 6.259675363284665)"
H-UNet-HPCA-T-FT,1.0,0.1,80.27,4.130297,"(77.99860959646689, 82.54139040353313)",68.556,5.474964,"(65.54513201338793, 71.56686798661207)",17.91,10.878798,"(11.92738044575887, 23.892619554241136)",2.496,1.586353,"(1.6236107893929344, 3.3683892106070665)"
H-UNet-HPCA-T-FT,1.0,0.2,85.736,3.368017,"(83.88381340393674, 87.58818659606324)",75.872,4.731835,"(73.26980363413216, 78.47419636586781)",10.397,7.677937,"(6.1746424726790705, 14.619357527320926)",1.406,1.019348,"(0.8454257928688755, 1.9665742071311247)"
