In [1]:
import pandas as pd
import math
import numpy as np

%matplotlib inline

from pathlib import Path
from omegaconf import OmegaConf

In [2]:
# Collect predictions

def check_num_epochs(run):
    if Path(run / "valid_log.csv").is_file():
        cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
        cfg = OmegaConf.to_container(cfg)
        num_epochs = cfg['optim']['epochs']

        valid_log = pd.read_csv(run / "valid_log.csv", header=None, index_col=0)
        if (len(valid_log.index)-3) < num_epochs:
            print("Wrong number of epochs in run: {}".format(run))
    else:
        print("valid_log.csv not exists in run: {}".format(run))

def check_only_one_tensorboard(run):
    len(list(Path('.').glob('*')))
    if len(list(Path(run / "runs").glob('*'))) > 1:
        print("More than 1 tensorboard folder in run: {}".format(run))

def collect_one(model_name, run, csv_file):
    check_num_epochs(run)
    check_only_one_tensorboard(run)
    
    cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
    cfg = OmegaConf.to_container(cfg)

    run_number = cfg['data']['train']['cross_val_bucket_validation_index']
    regime, inv_temp = float(run.parent.parts[-1].rsplit('-', 1)[1]), float(run.parent.parts[-2].rsplit('-', 1)[1])

    csv_path = run / 'test_predictions' / csv_file
    if not csv_path.exists():
        print(f'Skipping not found: {csv_path}')
        return pd.DataFrame()
    
    data = pd.read_csv(csv_path)
    if data.empty:
        print(f'Pred file is empty: {csv_path}')

    data['model'] = model_name
    data['run_number'] = run_number
    data['inv_temp'] = inv_temp
    data['regime'] = regime
    
    return data

def collect_all(model_name, root, csv_file):
    root = Path(root)

    metrics = []
    for inv_temp in list(root.glob("inv_temp-*")):
        for regime in list(inv_temp.glob("regime-*")):
            for run in list(regime.glob("run-*")):
                metrics.append(collect_one(model_name, run, csv_file))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

def collect_all_regimes(model_name, root, csv_file):
    root = Path(root)

    metrics = []
    for run in list(root.glob("run-*")):
        metrics.append(collect_one(model_name, run, csv_file))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

In [3]:
# Compute metrics for each detected run

def compute_metrics(data, grouping, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance']):
    metrics_dict_names = {
        'Dice': 'dice',
        'Jaccard': 'jaccard',
        'Hausdorff Distance': '95hd',
        'Average Surface Distance': 'asd'
    }

    columns = ['Model', '# Run', 'Inv Temp', 'Regime']
    columns.extend(metric_names)
    metrics = []
    
    data = data.copy().reset_index()
    grouped = data.groupby(grouping)
    
    for model_group, predictions in grouped:
        model_name, run_number, inv_temp, regime = model_group[0], model_group[1], model_group[2], model_group[3]
        
        metric_values = []
        for metric_name in metric_names:
            values = predictions['segm/{}'.format(metrics_dict_names[metric_name])].values
            mean_value = np.nanmean(values)
            if metric_name == 'Dice' or metric_name == 'Jaccard':
                metric_values.append((math.ceil(mean_value*10000)/10000)*100)
            else:
                if not np.isnan(mean_value):
                    metric_values.append(math.ceil(mean_value*100)/100)
                else:
                    metric_values.append(mean_value)
        
        metrics.append([model_name, run_number, inv_temp, regime, *metric_values])
        
    metrics_df = pd.DataFrame(metrics, columns=columns)
    
    return metrics_df

def summarize_metrics(metrics, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance']):
    mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate(['mean', 'std'])
    
    return mean_metrics

<h1>Evaluation - Searching temperature hyperparameter</h1>

<p>Evaluate Hebbian models belonging to SWTA paradigm to search best temperature values (this value is dataset-specific)</p>

<h2>GlaS Dataset</h2>

In [11]:
#EXP_ROOT = "./runs"
#EXP_ROOT = "/mnt/Results/hebbian-medical-image-segmentation/runs"
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs"

runs = {
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta')),
    'H-UNet-BASE-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t')),
    'H-UNet-BASE-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ft')),
    'H-UNet-BASE-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft')),
    'H-UNet-Pseudolabeling-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_t')),
    'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_t_ft')),
}

In [12]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [8]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,1.0,1.0,47.9,32.04,44.52,9.46
1,H-UNet-BASE-SWTA,0,5.0,1.0,47.71,31.81,43.38,8.13
2,H-UNet-BASE-SWTA,0,10.0,1.0,48.03,32.08,43.16,8.01
3,H-UNet-BASE-SWTA,0,20.0,1.0,48.16,32.26,42.35,7.86
4,H-UNet-BASE-SWTA,0,50.0,1.0,47.03,31.28,42.69,8.34
5,H-UNet-BASE-SWTA,0,100.0,1.0,47.14,31.42,42.04,8.07
6,H-UNet-BASE-SWTA-FT,0,1.0,1.0,87.32,78.13,13.17,1.66
7,H-UNet-BASE-SWTA-FT,0,5.0,1.0,87.68,78.8,11.75,1.76
8,H-UNet-BASE-SWTA-FT,0,10.0,1.0,88.23,79.66,9.1,1.64
9,H-UNet-BASE-SWTA-FT,0,20.0,1.0,88.14,79.54,12.74,1.67


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA,1.0,1.0,47.9,,32.04,,44.52,,9.46,
H-UNet-BASE-SWTA,5.0,1.0,47.71,,31.81,,43.38,,8.13,
H-UNet-BASE-SWTA,10.0,1.0,48.03,,32.08,,43.16,,8.01,
H-UNet-BASE-SWTA,20.0,1.0,48.16,,32.26,,42.35,,7.86,
H-UNet-BASE-SWTA,50.0,1.0,47.03,,31.28,,42.69,,8.34,
H-UNet-BASE-SWTA,100.0,1.0,47.14,,31.42,,42.04,,8.07,
H-UNet-BASE-SWTA-FT,1.0,1.0,87.32,,78.13,,13.17,,1.66,
H-UNet-BASE-SWTA-FT,5.0,1.0,87.68,,78.8,,11.75,,1.76,
H-UNet-BASE-SWTA-FT,10.0,1.0,88.23,,79.66,,9.1,,1.64,
H-UNet-BASE-SWTA-FT,20.0,1.0,88.14,,79.54,,12.74,,1.67,


<h1>Evaluation - Hebbian Models</h1>

<p>Evaluate Hebbian models over the datasets; only regime=1.0 and best temperature values for SWTA are considered.</p>

<h2>GlaS Dataset</h2>

In [19]:
#EXP_ROOT = "./runs"
EXP_ROOT = "/mnt/Workspace/hebbian-skin-cancer-segmentation/runs"
#EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs"

INV_TEMP_GlaS=10          # to be set accordingly, used by SWTA

runs = {
    # UNET-based
    'UNet': list(Path(EXP_ROOT + '/experiment=glas/unet_base/inv_temp-1/regime-1.0').glob('run*')),
    #'UNet-256': list(Path(EXP_ROOT + '/experiment=glas/unet_base-256/inv_temp-1/regime-1.0').glob('run*')),
    'UNet-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=glas/unet/inv_temp-1/regime-1.0').glob('run*')),
    'H-UNet-HPCA': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca/inv_temp-1/regime-1.0').glob('run*')),
    'H-UNet-HPCA-T': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t/inv_temp-1/regime-1.0').glob('run*')),
    'H-UNet-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ft/inv_temp-1/regime-1.0').glob('run*')),
    'H-UNet-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ft/inv_temp-1/regime-1.0').glob('run*')),
    'H-UNet-Pseudolabeling-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_ft/inv_temp-1/regime-1.0').glob('run*')),
    'H-UNet-Pseudolabeling-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_t_ft/inv_temp-1/regime-1.0').glob('run*')),
    #'H-UNet-SWTA': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    #'H-UNet-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_ft/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    #'H-UNet-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_t/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    #'H-UNet-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_t_ft/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    # FCN32s-based
    #'FCN32s': list(Path(EXP_ROOT + '/experiment=glas/fcn32s_base/inv_temp-1/regime-1.0').glob('run*')),
    #'FCN32s-256': list(Path(EXP_ROOT + '/experiment=glas/fcn32s_base-256/inv_temp-1/regime-1.0').glob('run*')),
    #'FCN32s-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=glas/fcn32s/inv_temp-1/regime-1.0').glob('run*')),
    #'H-FCN32s-SWTA': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-swta/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    #'H-FCN32s-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-swta_ft/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    #'H-FCN32s-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-swta_t/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    #'H-FCN32s-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-swta_t_ft/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    #'H-FCN32s-HPCA': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-hpca/inv_temp-1/regime-1.0').glob('run*')),
    #'H-FCN32s-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-hpca_ft/inv_temp-1/regime-1.0').glob('run*')),
    #'H-FCN32s-HPCA-T': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-hpca_t/inv_temp-1/regime-1.0').glob('run*')),
    #'H-FCN32s-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-hpca_t_ft/inv_temp-1/regime-1.0').glob('run*')),
}

In [20]:
# Collect predictions scanning runs
predictions = pd.concat([collect_one(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [21]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

  mean_value = np.nanmean(values)


Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-HPCA,0,1.0,1.0,33.72,20.48,366.27,120.88
1,H-UNet-HPCA-FT,0,1.0,1.0,87.94,79.11,14.74,1.91
2,H-UNet-HPCA-T,0,1.0,1.0,33.71,20.46,,
3,H-UNet-HPCA-T-FT,0,1.0,1.0,88.53,80.09,11.9,1.72
4,H-UNet-Pseudolabeling-HPCA-FT,0,1.0,1.0,87.74,78.83,15.28,1.92
5,H-UNet-Pseudolabeling-HPCA-T-FT,0,1.0,1.0,89.01,80.82,10.43,1.55


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-HPCA,1.0,1.0,33.72,,20.48,,366.27,,120.88,
H-UNet-HPCA-FT,1.0,1.0,87.94,,79.11,,14.74,,1.91,
H-UNet-HPCA-T,1.0,1.0,33.71,,20.46,,,,,
H-UNet-HPCA-T-FT,1.0,1.0,88.53,,80.09,,11.9,,1.72,
H-UNet-Pseudolabeling-HPCA-FT,1.0,1.0,87.74,,78.83,,15.28,,1.92,
H-UNet-Pseudolabeling-HPCA-T-FT,1.0,1.0,89.01,,80.82,,10.43,,1.55,


<h1>Evaluation - Data regime variations</h1>

<p>Evaluate Hebbian models over the datasets, by varying the quantity of training data; only fine-tuned models are considered since during pre-training we can consider the whole dataset; only best temperature values for SWTA are considered.</p>

<h2>GlaS Dataset</h2>

In [22]:
#EXP_ROOT = "./runs"
EXP_ROOT = "/mnt/Workspace/hebbian-skin-cancer-segmentation/runs"
#EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs"

INV_TEMP_GlaS=10          # to be set accordingly, used by SWTA

runs = {
    # UNET-based
    'UNet': list(Path(EXP_ROOT + '/experiment=glas/unet_base/inv_temp-1').glob('regime-*')),
    #'UNet-256': list(Path(EXP_ROOT + '/experiment=glas/unet_base-256/inv_temp-1').glob('regime-*')),
    'UNet-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=glas/unet/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ft/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ft/inv_temp-1').glob('regime-*')),
    'H-UNet-Pseudolabeling-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_ft/inv_temp-1').glob('regime-*')),
    'H-UNet-Pseudolabeling-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_t_ft/inv_temp-1').glob('regime-*')),
}

In [23]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all_regimes(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [24]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-HPCA-FT,0,1.0,0.2,72.28,57.81,34.97,5.0
1,H-UNet-HPCA-FT,0,1.0,1.0,87.94,79.11,14.74,1.91
2,H-UNet-HPCA-T-FT,0,1.0,0.2,71.91,57.42,35.42,5.08
3,H-UNet-HPCA-T-FT,0,1.0,1.0,88.53,80.09,11.9,1.72
4,H-UNet-Pseudolabeling-HPCA-FT,0,1.0,0.2,73.15,58.88,30.57,4.41
5,H-UNet-Pseudolabeling-HPCA-FT,0,1.0,1.0,87.74,78.83,15.28,1.92
6,H-UNet-Pseudolabeling-HPCA-T-FT,0,1.0,0.2,76.41,63.11,27.88,3.96
7,H-UNet-Pseudolabeling-HPCA-T-FT,0,1.0,1.0,89.01,80.82,10.43,1.55
8,UNet,0,1.0,0.2,74.08,60.29,30.64,4.24
9,UNet-Pseudolabeling,0,1.0,0.2,80.42,68.42,23.66,3.41


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-HPCA-FT,1.0,0.2,72.28,,57.81,,34.97,,5.0,
H-UNet-HPCA-FT,1.0,1.0,87.94,,79.11,,14.74,,1.91,
H-UNet-HPCA-T-FT,1.0,0.2,71.91,,57.42,,35.42,,5.08,
H-UNet-HPCA-T-FT,1.0,1.0,88.53,,80.09,,11.9,,1.72,
H-UNet-Pseudolabeling-HPCA-FT,1.0,0.2,73.15,,58.88,,30.57,,4.41,
H-UNet-Pseudolabeling-HPCA-FT,1.0,1.0,87.74,,78.83,,15.28,,1.92,
H-UNet-Pseudolabeling-HPCA-T-FT,1.0,0.2,76.41,,63.11,,27.88,,3.96,
H-UNet-Pseudolabeling-HPCA-T-FT,1.0,1.0,89.01,,80.82,,10.43,,1.55,
UNet,1.0,0.2,74.08,,60.29,,30.64,,4.24,
UNet-Pseudolabeling,1.0,0.2,80.42,,68.42,,23.66,,3.41,
