In [1]:
import pandas as pd
import math
import numpy as np

%matplotlib inline

from pathlib import Path
from omegaconf import OmegaConf

In [2]:
# Collect predictions

def check_num_epochs(run):
    if Path(run / "valid_log.csv").is_file():
        cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
        cfg = OmegaConf.to_container(cfg)
        num_epochs = cfg['optim']['epochs']

        valid_log = pd.read_csv(run / "valid_log.csv", header=None, index_col=0)
        if (len(valid_log.index)-3) < num_epochs:
            print("Wrong number of epochs in run: {}".format(run))
    else:
        print("valid_log.csv not exists in run: {}".format(run))

def check_only_one_tensorboard(run):
    len(list(Path('.').glob('*')))
    if len(list(Path(run / "runs").glob('*'))) > 1:
        print("More than 1 tensorboard folder in run: {}".format(run))

def collect_one(model_name, run, csv_file):
    check_num_epochs(run)
    check_only_one_tensorboard(run)
    
    cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
    cfg = OmegaConf.to_container(cfg)

    run_number = cfg['data']['train']['cross_val_bucket_validation_index']
    regime, inv_temp = float(run.parent.parts[-1].rsplit('-', 1)[1]), float(run.parent.parts[-2].rsplit('-', 1)[1])

    csv_path = run / 'test_predictions' / csv_file
    if not csv_path.exists():
        print(f'Skipping not found: {csv_path}')
        return pd.DataFrame()
    
    data = pd.read_csv(csv_path)
    if data.empty:
        print(f'Pred file is empty: {csv_path}')

    data['model'] = model_name
    data['run_number'] = run_number
    data['inv_temp'] = inv_temp
    data['regime'] = regime
    
    return data

def collect_all(model_name, root, csv_file):
    root = Path(root)

    metrics = []
    for inv_temp in list(root.glob("inv_temp-*")):
        for regime in list(inv_temp.glob("regime-*")):
            for run in list(regime.glob("run-*")):
                metrics.append(collect_one(model_name, run, csv_file))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

def collect_all_regimes(model_name, root, csv_file, regimes=['0.1', '0.2', '0.5', '1.0']):
    root = Path(root)

    metrics = []
    for run in list(root.glob("run-*")):
        if root.name.rsplit("-", 1)[1] in regimes:
            metrics.append(collect_one(model_name, run, csv_file))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

In [3]:
# Compute metrics for each detected run

def compute_metrics(data, grouping, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance']):
    metrics_dict_names = {
        'Dice': 'dice',
        'Jaccard': 'jaccard',
        'Hausdorff Distance': '95hd',
        'Average Surface Distance': 'asd'
    }

    columns = ['Model', '# Run', 'Inv Temp', 'Regime']
    columns.extend(metric_names)
    metrics = []
    
    data = data.copy().reset_index()
    grouped = data.groupby(grouping)
    
    for model_group, predictions in grouped:
        model_name, run_number, inv_temp, regime = model_group[0], model_group[1], model_group[2], model_group[3]
        
        metric_values = []
        for metric_name in metric_names:
            values = predictions['segm/{}'.format(metrics_dict_names[metric_name])].values
            mean_value = np.nanmean(values)
            if metric_name == 'Dice' or metric_name == 'Jaccard':
                metric_values.append((math.ceil(mean_value*10000)/10000)*100)
            else:
                if not np.isnan(mean_value):
                    metric_values.append(math.ceil(mean_value*100)/100)
                else:
                    metric_values.append(mean_value)
        
        metrics.append([model_name, run_number, inv_temp, regime, *metric_values])
        
    metrics_df = pd.DataFrame(metrics, columns=columns)
    
    return metrics_df

def summarize_metrics(metrics, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance']):
    mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate(['mean', 'std'])
    
    return mean_metrics

<h1>Evaluation - Searching temperature hyperparameter</h1>

<p>Evaluate Hebbian models belonging to SWTA paradigm to search best temperature values (this value is dataset-specific)</p>

<h2>GlaS Dataset</h2>

In [4]:
# Unsupervised learning models are evaluate over the whole dataset and a fraction of data for backpropagating over BN layers, 
# while fine tuned models over a fraction of data

EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation-new/runs"

runs = {
    # UNET-based
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_t')),
}

In [5]:
# Collect predictions scanning runs
#predictions = pd.concat([collect_all(k, r, 'preds_from_last.csv') for k, v in runs.items() for r in v], ignore_index=True)
predictions = pd.concat([collect_all(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [6]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,1.0,0.2,61.25,44.66,41.53,6.24
1,H-UNet-BASE-SWTA,0,5.0,0.2,66.29,50.49,47.84,6.63
2,H-UNet-BASE-SWTA,0,10.0,0.2,67.75,52.02,33.88,4.52
3,H-UNet-BASE-SWTA,0,20.0,0.2,67.52,51.93,43.27,6.08
4,H-UNet-BASE-SWTA,0,50.0,0.2,68.39,52.79,38.51,5.29
5,H-UNet-BASE-SWTA,0,100.0,0.2,68.19,52.35,37.25,4.96
6,H-UNet-BASE-SWTA,1,1.0,0.2,61.83,45.84,46.92,7.65
7,H-UNet-BASE-SWTA,1,5.0,0.2,61.67,45.37,40.8,6.17
8,H-UNet-BASE-SWTA,1,10.0,0.2,63.45,47.62,51.51,8.49
9,H-UNet-BASE-SWTA,1,20.0,0.2,64.6,48.62,37.54,5.75


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA,1.0,0.2,61.686,1.918627,45.368,1.979209,44.224,5.362008,6.72,0.989621
H-UNet-BASE-SWTA,5.0,0.2,63.084,4.519268,47.022,4.84994,44.664,5.215125,6.492,0.916717
H-UNet-BASE-SWTA,10.0,0.2,65.13,3.555643,49.414,3.834538,42.61,6.565771,6.638,1.632703
H-UNet-BASE-SWTA,20.0,0.2,65.868,2.031285,50.142,2.423813,40.632,3.999521,6.014,0.475374
H-UNet-BASE-SWTA,50.0,0.2,67.154,2.043693,51.366,2.337901,35.97,4.032741,4.946,0.606407
H-UNet-BASE-SWTA,100.0,0.2,67.71,1.548967,51.956,1.601571,33.246,9.672649,4.7,1.505473
H-UNet-BASE-SWTA-T,1.0,0.2,63.678,1.476625,47.748,1.797518,52.082,4.773748,7.81,0.834476
H-UNet-BASE-SWTA-T,5.0,0.2,66.172,1.565621,50.212,1.724346,37.722,5.584906,5.252,1.002632
H-UNet-BASE-SWTA-T,10.0,0.2,67.51,1.426131,51.996,1.294712,40.286,12.06013,6.094,2.063645
H-UNet-BASE-SWTA-T,20.0,0.2,68.462,2.173378,52.818,2.460766,29.2,4.747252,4.192,0.690558


In [56]:
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation-new/runs"

runs = {
    # UNET-based
    'H-UNet-BASE-SWTA-FT-FROM-BEST-DICE': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft')),
    'H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ft')),
    #'H-UNet-BASE-SWTA-FT-FROM-LAST': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft_fromLast')),
    #'H-UNet-BASE-SWTA-T-FT-FROM-LAST': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ft_fromLast')),
}

In [57]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [58]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA-FT-FROM-BEST-DICE,0,1.0,0.2,72.15,57.26,34.88,4.6
1,H-UNet-BASE-SWTA-FT-FROM-BEST-DICE,0,5.0,0.2,74.83,60.53,27.36,3.81
2,H-UNet-BASE-SWTA-FT-FROM-BEST-DICE,0,100.0,0.2,76.11,62.13,27.99,3.76
3,H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE,0,1.0,0.2,73.37,58.65,24.43,3.29
4,H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE,0,5.0,0.2,73.88,59.25,27.21,3.69
5,H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE,0,10.0,0.2,74.38,59.86,26.4,3.6
6,H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE,0,20.0,0.2,75.69,61.82,29.54,4.28
7,H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE,0,50.0,0.2,79.53,66.81,21.06,2.92
8,H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE,0,100.0,0.2,74.11,59.46,24.73,3.21


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE,1.0,0.2,72.15,,57.26,,34.88,,4.6,
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE,5.0,0.2,74.83,,60.53,,27.36,,3.81,
H-UNet-BASE-SWTA-FT-FROM-BEST-DICE,100.0,0.2,76.11,,62.13,,27.99,,3.76,
H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE,1.0,0.2,73.37,,58.65,,24.43,,3.29,
H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE,5.0,0.2,73.88,,59.25,,27.21,,3.69,
H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE,10.0,0.2,74.38,,59.86,,26.4,,3.6,
H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE,20.0,0.2,75.69,,61.82,,29.54,,4.28,
H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE,50.0,0.2,79.53,,66.81,,21.06,,2.92,
H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE,100.0,0.2,74.11,,59.46,,24.73,,3.21,


<h1>Evaluation - Hebbian Unsupervised Pretraining</h1>

<p>Evaluate Hebbian models pretrained in an unsupervised way over the datasets; only best temperature values for SWTA are considered.</p>

In [7]:
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation-new/runs"

INV_TEMP_GlaS=100          # to be set accordingly, used by SWTA
REGIMES=['0.2']

runs = {
    # UNET-based
    'H-UNet-HPCA': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-T': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_t/inv_temp-1').glob('regime-*')),
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_t/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
}

In [8]:
# Collect predictions scanning runs
#predictions = pd.concat([collect_all_regimes(k, r, 'preds_from_last.csv', regimes=REGIMES) for k, v in runs.items() for r in v], ignore_index=True)
predictions = pd.concat([collect_all_regimes(k, r, 'preds_from_best_dice.csv', regimes=REGIMES) for k, v in runs.items() for r in v], ignore_index=True)

In [9]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,100.0,0.2,68.19,52.35,37.25,4.96
1,H-UNet-BASE-SWTA,1,100.0,0.2,67.04,51.06,28.83,4.13
2,H-UNet-BASE-SWTA,2,100.0,0.2,65.68,50.03,48.0,7.14
3,H-UNet-BASE-SWTA,3,100.0,0.2,67.74,52.02,29.09,4.11
4,H-UNet-BASE-SWTA,4,100.0,0.2,69.9,54.32,23.06,3.16
5,H-UNet-BASE-SWTA-T,0,100.0,0.2,69.49,53.79,27.79,3.65
6,H-UNet-BASE-SWTA-T,1,100.0,0.2,69.13,53.79,37.13,5.31
7,H-UNet-BASE-SWTA-T,2,100.0,0.2,66.53,50.39,25.65,3.47
8,H-UNet-BASE-SWTA-T,3,100.0,0.2,70.45,55.32,34.51,5.02
9,H-UNet-BASE-SWTA-T,4,100.0,0.2,70.53,55.34,28.99,4.1


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA,100.0,0.2,67.71,1.548967,51.956,1.601571,33.246,9.672649,4.7,1.505473
H-UNet-BASE-SWTA-T,100.0,0.2,69.226,1.623724,53.726,2.017605,30.814,4.813801,4.31,0.81997
H-UNet-HPCA,1.0,0.2,48.79,2.747162,32.92,2.664977,61.252,19.444879,10.098,2.838727
H-UNet-HPCA-T,1.0,0.2,48.326,1.664926,32.39,1.535855,51.83,15.945383,8.794,2.214742


<h1>Evaluation - Data regime variations</h1>

<p>Evaluate Hebbian models over the datasets, by varying the quantity of training data; only fine-tuned models are considered since during pre-training we can consider the whole dataset; only best temperature values for SWTA are considered.</p>

<h2>GlaS Dataset</h2>

In [55]:
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation-new/runs"

REGIMES = ['0.2']       # regimes to be considered
INV_TEMP_GlaS=50          # to be set accordingly, used by SWTA

runs = {
    # UNET-based
    ##'UNet': list(Path(EXP_ROOT + '/experiment=glas/unet_base/inv_temp-1').glob('regime-*')),
    #'UNet-256': list(Path(EXP_ROOT + '/experiment=glas/unet_base-256/inv_temp-1').glob('regime-*')),
    #'UNet-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=glas/unet/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-FT-FROM-BEST-DICE': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ft/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-T-FT-FROM-BEST-DICE': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ft/inv_temp-1').glob('regime-*')),
    ##'H-UNet-HPCA-FT-FROM-LAST': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ft_fromLast/inv_temp-1').glob('regime-*')),
    ##'H-UNet-HPCA-T-FT-FROM-LAST': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ft_fromLast/inv_temp-1').glob('regime-*')),
    #'H-UNet-Pseudolabeling-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_ft/inv_temp-1').glob('regime-*')),
    #'H-UNet-Pseudolabeling-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_t_ft/inv_temp-1').glob('regime-*')),
    'H-UNet-SWTA-FT-FROM-BEST-DICE': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    'H-UNet-SWTA-T-FT-FROM-BEST-DICE': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    ##'H-UNet-SWTA-FT-FROM-LAST': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_ft_fromLast/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    ##'H-UNet-SWTA-T-FT-FROM-LAST': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t_ft_fromLast/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    #'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    #'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_t_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
}

In [29]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all_regimes(k, r, 'preds.csv', regimes=REGIMES) for k, v in runs.items() for r in v], ignore_index=True)

Wrong number of epochs in run: /home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation-new/runs/experiment=glas/hunet_base-hpca_ft/inv_temp-1/regime-0.2/run-1
Skipping not found: /home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation-new/runs/experiment=glas/hunet_base-hpca_ft/inv_temp-1/regime-0.2/run-1/test_predictions/preds.csv


In [30]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-HPCA-FT-FROM-BEST-DICE,0,1.0,0.2,73.89,59.55,29.2,4.14
1,H-UNet-HPCA-T-FT-FROM-BEST-DICE,0,1.0,0.2,75.59,61.78,32.08,4.7
2,H-UNet-SWTA-T-FT-FROM-BEST-DICE,0,50.0,0.2,79.53,66.81,21.06,2.92


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-HPCA-FT-FROM-BEST-DICE,1.0,0.2,73.89,,59.55,,29.2,,4.14,
H-UNet-HPCA-T-FT-FROM-BEST-DICE,1.0,0.2,75.59,,61.78,,32.08,,4.7,
H-UNet-SWTA-T-FT-FROM-BEST-DICE,50.0,0.2,79.53,,66.81,,21.06,,2.92,
