In [1]:
import pandas as pd
import glob
import math

%matplotlib inline

from pathlib import Path
from omegaconf import OmegaConf

In [2]:
# Collect predictions

def check_num_epochs(run):
    if Path(run / "valid_log.csv").is_file():
        cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
        cfg = OmegaConf.to_container(cfg)
        num_epochs = cfg['optim']['epochs']

        valid_log = pd.read_csv(run / "valid_log.csv", header=None, index_col=0)
        if (len(valid_log.index)-3) < num_epochs:
            print("Wrong number of epochs in run: {}".format(run))
    else:
        print("valid_log.csv not exists in run: {}".format(run))

def check_only_one_tensorboard(run):
    len(list(Path('.').glob('*')))
    if len(list(Path(run / "runs").glob('*'))) > 1:
        print("More than 1 tensorboard folder in run: {}".format(run))

def collect_one(model_name, run, csv_file):
    check_num_epochs(run)
    check_only_one_tensorboard(run)
    
    cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
    cfg = OmegaConf.to_container(cfg)

    run_number = cfg['data']['train']['cross_val_bucket_validation_index']
    regime, inv_temp = float(run.parent.parts[-1].rsplit('-', 1)[1]), float(run.parent.parts[-2].rsplit('-', 1)[1])

    csv_path = run / 'test_predictions' / csv_file
    if not csv_path.exists():
        print(f'Skipping not found: {csv_path}')
        return pd.DataFrame()
    
    data = pd.read_csv(csv_path)
    if data.empty:
        print(f'Pred file is empty: {csv_path}')

    data['model'] = model_name
    data['run_number'] = run_number
    data['inv_temp'] = inv_temp
    data['regime'] = regime
    
    return data

def collect_all(model_name, root, csv_file):
    root = Path(root)

    metrics = []
    for inv_temp in list(root.glob("inv_temp-*")):
        for regime in list(inv_temp.glob("regime-*")):
            for run in list(regime.glob("run-*")):
                metrics.append(collect_one(model_name, run, csv_file))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

def collect_all_regimes(model_name, root, csv_file):
    root = Path(root)

    metrics = []
    for run in list(root.glob("run-*")):
        metrics.append(collect_one(model_name, run, csv_file))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

In [3]:
# Compute metrics for each detected run

def compute_metrics(data, grouping, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance']):
    metrics_dict_names = {
        'Dice': 'dice',
        'Jaccard': 'jaccard',
        'Hausdorff Distance': '95hd',
        'Average Surface Distance': 'asd'
    }

    columns = ['Model', '# Run', 'Inv Temp', 'Regime']
    columns.extend(metric_names)
    metrics = []
    
    data = data.copy().reset_index()
    grouped = data.groupby(grouping)
    
    for model_group, predictions in grouped:
        model_name, run_number, inv_temp, regime = model_group[0], model_group[1], model_group[2], model_group[3]
        
        metric_values = []
        for metric_name in metric_names:
            values = predictions['segm/{}'.format(metrics_dict_names[metric_name])].values
            mean_value = values.mean()
            if metric_name == 'Dice' or metric_name == 'Jaccard':
                metric_values.append((math.ceil(mean_value*10000)/10000)*100)
            else:
                metric_values.append(math.ceil(mean_value*100)/100)
        
        metrics.append([model_name, run_number, inv_temp, regime, *metric_values])
        
    metrics_df = pd.DataFrame(metrics, columns=columns)
    
    return metrics_df

def summarize_metrics(metrics, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance']):
    mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate(['mean', 'std'])
    
    return mean_metrics

<h1>Evaluation - Searching temperature hyperparameter</h1>

<p>Evaluate Hebbian models belonging to SWTA paradigm to search best temperature values (this value is dataset-specific)</p>

<h2>GlaS Dataset</h2>

In [4]:
EXP_ROOT = "./runs"
#EXP_ROOT = "/mnt/Results/hebbian-medical-image-segmentation/runs"

runs = {
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta')),
    'H-UNet-BASE-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t')),
    'H-UNet-BASE-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ft')),
}

In [5]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

ValueError: No objects to concatenate

In [6]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,5.0,1.0,68.71,53.41,48.13,6.91
1,H-UNet-BASE-SWTA,0,6.0,1.0,70.28,55.04,35.78,5.04
2,H-UNet-BASE-SWTA,0,10.0,1.0,70.88,55.86,39.84,5.32
3,H-UNet-BASE-SWTA,0,12.0,1.0,70.58,55.18,22.02,3.09
4,H-UNet-BASE-SWTA-FT,0,5.0,1.0,86.42,76.8,17.13,2.23
5,H-UNet-BASE-SWTA-FT,0,6.0,1.0,87.24,78.02,13.37,1.94
6,H-UNet-BASE-SWTA-FT,0,10.0,1.0,87.94,79.15,14.16,1.97
7,H-UNet-BASE-SWTA-FT,0,12.0,1.0,87.07,77.8,13.28,1.93
8,H-UNet-BASE-SWTA-T,0,5.0,1.0,68.9,53.46,34.26,4.79
9,H-UNet-BASE-SWTA-T,0,6.0,1.0,70.99,55.83,30.67,3.87


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA,5.0,1.0,68.71,,53.41,,48.13,,6.91,
H-UNet-BASE-SWTA,6.0,1.0,70.28,,55.04,,35.78,,5.04,
H-UNet-BASE-SWTA,10.0,1.0,70.88,,55.86,,39.84,,5.32,
H-UNet-BASE-SWTA,12.0,1.0,70.58,,55.18,,22.02,,3.09,
H-UNet-BASE-SWTA-FT,5.0,1.0,86.42,,76.8,,17.13,,2.23,
H-UNet-BASE-SWTA-FT,6.0,1.0,87.24,,78.02,,13.37,,1.94,
H-UNet-BASE-SWTA-FT,10.0,1.0,87.94,,79.15,,14.16,,1.97,
H-UNet-BASE-SWTA-FT,12.0,1.0,87.07,,77.8,,13.28,,1.93,
H-UNet-BASE-SWTA-T,5.0,1.0,68.9,,53.46,,34.26,,4.79,
H-UNet-BASE-SWTA-T,6.0,1.0,70.99,,55.83,,30.67,,3.87,


<h1>Evaluation - Hebbian Models</h1>

<p>Evaluate Hebbian models over the datasets; only regime=1.0 and best temperature values for SWTA are considered.</p>

<h2>GlaS Dataset</h2>

In [22]:
#EXP_ROOT = "./runs"
#EXP_ROOT = "/mnt/Results/hebbian-medical-image-segmentation/runs"
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/results/hebbian-skin-cancer-segmentation/runs"

INV_TEMP_GlaS=10          # to be set accordingly, used by SWTA

runs = {
    # UNET-based
    'UNet': list(Path(EXP_ROOT + '/experiment=glas/unet_base/inv_temp-1/regime-1.0').glob('run*')),
    'UNet-256': list(Path(EXP_ROOT + '/experiment=glas/unet_base-256/inv_temp-1/regime-1.0').glob('run*')),
    'UNet-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=glas/unet/inv_temp-1/regime-1.0').glob('run*')),
    'H-UNet-HPCA': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca/inv_temp-1/regime-1.0').glob('run*')),
    'H-UNet-HPCA-T': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t/inv_temp-1/regime-1.0').glob('run*')),
    'H-UNet-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ft/inv_temp-1/regime-1.0').glob('run*')),
    'H-UNet-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ft/inv_temp-1/regime-1.0').glob('run*')),
    #'H-UNet-SWTA': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    #'H-UNet-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_ft/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    #'H-UNet-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_t/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    #'H-UNet-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_t_ft/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    # FCN32s-based
    'FCN32s': list(Path(EXP_ROOT + '/experiment=glas/fcn32s_base/inv_temp-1/regime-1.0').glob('run*')),
    'FCN32s-256': list(Path(EXP_ROOT + '/experiment=glas/fcn32s_base-256/inv_temp-1/regime-1.0').glob('run*')),
    'FCN32s-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=glas/fcn32s/inv_temp-1/regime-1.0').glob('run*')),
    #'H-FCN32s-SWTA': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-swta/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    #'H-FCN32s-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-swta_ft/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    #'H-FCN32s-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-swta_t/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    #'H-FCN32s-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-swta_t_ft/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run*')),
    #'H-FCN32s-HPCA': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-hpca/inv_temp-1/regime-1.0').glob('run*')),
    #'H-FCN32s-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-hpca_ft/inv_temp-1/regime-1.0').glob('run*')),
    #'H-FCN32s-HPCA-T': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-hpca_t/inv_temp-1/regime-1.0').glob('run*')),
    #'H-FCN32s-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hfcn32s-hpca_t_ft/inv_temp-1/regime-1.0').glob('run*')),
}

In [23]:
# Collect predictions scanning runs
predictions = pd.concat([collect_one(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [24]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,FCN32s,0,1.0,1.0,89.85,82.17,7.76,1.3
1,FCN32s-256,0,1.0,1.0,87.22,77.95,6.87,0.93
2,FCN32s-Pseudolabeling,0,1.0,1.0,89.96,82.38,11.8,1.6
3,H-UNet-HPCA,0,1.0,1.0,55.34,39.23,65.24,10.29
4,H-UNet-HPCA-FT,0,1.0,1.0,88.51,79.94,8.64,1.37
5,H-UNet-HPCA-T,0,1.0,1.0,63.07,48.15,54.19,9.35
6,H-UNet-HPCA-T-FT,0,1.0,1.0,88.1,79.36,13.64,1.71
7,UNet,0,1.0,1.0,89.35,81.24,8.4,1.34
8,UNet-256,0,1.0,1.0,89.24,81.23,4.38,0.72
9,UNet-Pseudolabeling,0,1.0,1.0,89.12,80.94,11.15,1.52


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
FCN32s,1.0,1.0,89.85,,82.17,,7.76,,1.3,
FCN32s-256,1.0,1.0,87.22,,77.95,,6.87,,0.93,
FCN32s-Pseudolabeling,1.0,1.0,89.96,,82.38,,11.8,,1.6,
H-UNet-HPCA,1.0,1.0,55.34,,39.23,,65.24,,10.29,
H-UNet-HPCA-FT,1.0,1.0,88.51,,79.94,,8.64,,1.37,
H-UNet-HPCA-T,1.0,1.0,63.07,,48.15,,54.19,,9.35,
H-UNet-HPCA-T-FT,1.0,1.0,88.1,,79.36,,13.64,,1.71,
UNet,1.0,1.0,89.35,,81.24,,8.4,,1.34,
UNet-256,1.0,1.0,89.24,,81.23,,4.38,,0.72,
UNet-Pseudolabeling,1.0,1.0,89.12,,80.94,,11.15,,1.52,


<h1>Evaluation - Data regime variations</h1>

<p>Evaluate Hebbian models over the datasets, by varying the quantity of training data; only fine-tuned models are considered since during pre-training we can consider the whole dataset; only best temperature values for SWTA are considered.</p>