In [3]:
import pandas as pd
import math
import numpy as np

%matplotlib inline

from pathlib import Path
from omegaconf import OmegaConf
import scipy.stats as st

In [8]:
# Collect predictions

def check_num_epochs(run):
    if Path(run / "valid_log.csv").is_file():
        cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
        cfg = OmegaConf.to_container(cfg)
        num_epochs = cfg['optim']['epochs']

        valid_log = pd.read_csv(run / "valid_log.csv", header=None, index_col=0)
        if (len(valid_log.index)-3) < num_epochs:
            print("Wrong number of epochs in run: {}".format(run))
    else:
        print("valid_log.csv not exists in run: {}".format(run))

def check_only_one_tensorboard(run):
    len(list(Path('.').glob('*')))
    if len(list(Path(run / "runs").glob('*'))) > 1:
        print("More than 1 tensorboard folder in run: {}".format(run))

def check_image_size(run, image_size=480):
    cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
    cfg = OmegaConf.to_container(cfg)
    conf_image_size = cfg['data']['image_size']
    if int(conf_image_size) != image_size:
        print("Different image size in conf. Image size in conf: {}, Image size: {}".format(conf_image_size, image_size))

def collect_one(model_name, run, csv_file, image_size=480):
    check_num_epochs(run)
    check_only_one_tensorboard(run)
    check_image_size(run, image_size=image_size)
    
    cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
    cfg = OmegaConf.to_container(cfg)

    run_number = cfg['data']['train']['cross_val_bucket_validation_index']
    regime, inv_temp = float(run.parent.parts[-1].rsplit('-', 1)[1]), float(run.parent.parts[-2].rsplit('-', 1)[1])

    csv_path = run / 'test_predictions' / csv_file
    if not csv_path.exists():
        print(f'Skipping not found: {csv_path}')
        return pd.DataFrame()
    
    data = pd.read_csv(csv_path)
    if data.empty:
        print(f'Pred file is empty: {csv_path}')

    data['model'] = model_name
    data['run_number'] = run_number
    data['inv_temp'] = inv_temp
    data['regime'] = regime
    
    return data

def collect_all(model_name, root, csv_file, regimes=['0.01', '0.02', '0.05', '0.1', '0.2', '0.25', '1.0'], image_size=480):
    root = Path(root)

    metrics = []
    for inv_temp in list(root.glob("inv_temp-*")):
        for regime in list(inv_temp.glob("regime-*")):
            if regime.name.rsplit("-", 1)[1] in regimes:
                for run in list(regime.glob("run-*")):
                    metrics.append(collect_one(model_name, run, csv_file, image_size=image_size))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

def collect_all_regimes(model_name, root, csv_file, regimes=['0.1', '0.2', '0.5', '1.0'], image_size=480):
    root = Path(root)

    metrics = []
    for run in list(root.glob("run-*")):
        if root.name.rsplit("-", 1)[1] in regimes:
            metrics.append(collect_one(model_name, run, csv_file, image_size=image_size))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

In [5]:
# Compute metrics for each detected run

def compute_metrics(data, grouping, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance']):
    metrics_dict_names = {
        'Dice': 'dice',
        'Jaccard': 'jaccard',
        'Hausdorff Distance': '95hd',
        'Average Surface Distance': 'asd'
    }

    columns = ['Model', '# Run', 'Inv Temp', 'Regime']
    columns.extend(metric_names)
    metrics = []
    
    data = data.copy().reset_index()
    grouped = data.groupby(grouping)
    
    for model_group, predictions in grouped:
        model_name, run_number, inv_temp, regime = model_group[0], model_group[1], model_group[2], model_group[3]
        
        metric_values = []
        for metric_name in metric_names:
            values = predictions['segm/{}'.format(metrics_dict_names[metric_name])].values
            mean_value = np.nanmean(values)
            if metric_name == 'Dice' or metric_name == 'Jaccard':
                metric_values.append((math.ceil(mean_value*10000)/10000)*100)
            else:
                if not np.isnan(mean_value):
                    metric_values.append(math.ceil(mean_value*100)/100)
                else:
                    metric_values.append(mean_value)
        
        metrics.append([model_name, run_number, inv_temp, regime, *metric_values])
        
    metrics_df = pd.DataFrame(metrics, columns=columns)
    
    return metrics_df

def summarize_metrics(metrics, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance'], t_student=False):
    mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate(['mean', 'std'])
    
    return mean_metrics

<h1>GlaS Dataset</h1>

<h2>Evaluation - Searching temperature hyperparameter</h2>

<p>Evaluate Hebbian models belonging to SWTA paradigm to search best temperature values (this value is dataset-specific)</p>

In [5]:
# Unsupervised learning models are evaluate over the whole dataset while fine tuned models over a fraction of data

EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs"

runs = {
    # UNET-based
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t')),
}

In [6]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds_from_last.csv') for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_all(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [7]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,1.0,1.0,52.31,36.16,41.72,8.55
1,H-UNet-BASE-SWTA,0,5.0,1.0,49.74,33.87,42.4,8.16
2,H-UNet-BASE-SWTA,0,10.0,1.0,45.79,30.24,45.59,8.88
3,H-UNet-BASE-SWTA,0,20.0,1.0,49.81,33.74,38.32,7.2
4,H-UNet-BASE-SWTA,0,50.0,1.0,49.98,34.01,42.11,7.88
5,H-UNet-BASE-SWTA,0,100.0,1.0,49.48,33.46,40.05,7.27
6,H-UNet-BASE-SWTA-T,0,1.0,1.0,45.6,30.22,44.54,9.98
7,H-UNet-BASE-SWTA-T,0,5.0,1.0,48.07,32.26,54.12,10.52
8,H-UNet-BASE-SWTA-T,0,10.0,1.0,49.61,33.6,42.02,8.29
9,H-UNet-BASE-SWTA-T,0,20.0,1.0,50.11,34.03,42.04,7.84


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA,1.0,1.0,52.31,,36.16,,41.72,,8.55,
H-UNet-BASE-SWTA,5.0,1.0,49.74,,33.87,,42.4,,8.16,
H-UNet-BASE-SWTA,10.0,1.0,45.79,,30.24,,45.59,,8.88,
H-UNet-BASE-SWTA,20.0,1.0,49.81,,33.74,,38.32,,7.2,
H-UNet-BASE-SWTA,50.0,1.0,49.98,,34.01,,42.11,,7.88,
H-UNet-BASE-SWTA,100.0,1.0,49.48,,33.46,,40.05,,7.27,
H-UNet-BASE-SWTA-T,1.0,1.0,45.6,,30.22,,44.54,,9.98,
H-UNet-BASE-SWTA-T,5.0,1.0,48.07,,32.26,,54.12,,10.52,
H-UNet-BASE-SWTA-T,10.0,1.0,49.61,,33.6,,42.02,,8.29,
H-UNet-BASE-SWTA-T,20.0,1.0,50.11,,34.03,,42.04,,7.84,


In [8]:
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs"

runs = {
    # UNET-based
    #'H-UNet-BASE-SWTA-FT-FROM-BEST-DICE': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft_fromBestDice')),
    #'H-UNet-BASE-SWTA-T-FT-FROM-BEST-DICE': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ft_fromBestDice')),
    'H-UNet-BASE-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_ft')),
    'H-UNet-BASE-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet_base-swta_t_ft')),
    'H-UNet-Pseudolabeling-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_t')),
    'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/').glob('hunet-swta_t_ft')),
}

In [9]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [10]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA-FT,0,1.0,0.2,78.29,65.23,25.62,3.60
1,H-UNet-BASE-SWTA-FT,0,5.0,0.2,79.96,67.77,27.76,3.96
2,H-UNet-BASE-SWTA-FT,0,10.0,0.2,81.71,70.05,23.32,3.45
3,H-UNet-BASE-SWTA-FT,0,20.0,0.2,81.64,69.71,19.47,2.40
4,H-UNet-BASE-SWTA-FT,0,50.0,0.2,81.50,69.69,22.19,3.04
...,...,...,...,...,...,...,...,...
115,H-UNet-BASE-SWTA-T-FT,9,5.0,0.2,80.07,68.01,22.94,3.58
116,H-UNet-BASE-SWTA-T-FT,9,10.0,0.2,79.62,67.11,26.51,3.54
117,H-UNet-BASE-SWTA-T-FT,9,20.0,0.2,80.61,68.63,28.82,3.85
118,H-UNet-BASE-SWTA-T-FT,9,50.0,0.2,79.22,66.88,27.03,3.87


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA-FT,1.0,0.2,77.89,1.997649,64.949,2.580833,28.68,3.489909,4.102,0.595871
H-UNet-BASE-SWTA-FT,5.0,0.2,79.583,1.966842,67.351,2.601664,26.779,5.198413,3.807,0.716505
H-UNet-BASE-SWTA-FT,10.0,0.2,79.743,1.593606,67.467,2.118732,26.086,5.496421,3.717,0.72014
H-UNet-BASE-SWTA-FT,20.0,0.2,80.649,1.280663,68.642,1.708799,24.566,4.580646,3.346,0.760675
H-UNet-BASE-SWTA-FT,50.0,0.2,80.322,1.563833,68.282,2.008149,25.608,2.128227,3.521,0.304939
H-UNet-BASE-SWTA-FT,100.0,0.2,80.417,2.325621,68.408,3.034636,25.24,4.685384,3.545,0.652521
H-UNet-BASE-SWTA-T-FT,1.0,0.2,78.21,1.914233,65.403,2.549466,28.162,5.176704,4.079,0.975676
H-UNet-BASE-SWTA-T-FT,5.0,0.2,80.459,1.78652,68.528,2.44121,25.713,3.538424,3.632,0.391828
H-UNet-BASE-SWTA-T-FT,10.0,0.2,81.042,1.461079,69.245,1.992058,24.659,4.680369,3.455,0.584964
H-UNet-BASE-SWTA-T-FT,20.0,0.2,81.511,2.092566,69.898,2.830684,25.0,4.665269,3.445,0.671669


<h2>Evaluation - Hebbian Unsupervised Pretraining</h2>

<p>Evaluate Hebbian models pretrained in an unsupervised way over the datasets; only best temperature values for SWTA are considered.</p>

In [11]:
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs"

INV_TEMP_GlaS=20          # to be set accordingly, used by SWTA

runs = {
    # UNET-based
    'H-UNet-HPCA': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-HPCA-T': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run-*')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t/inv_temp-{}/regime-1.0'.format(INV_TEMP_GlaS)).glob('run-*')),
}

In [12]:
# Collect predictions scanning runs
predictions = pd.concat([collect_one(k, r, 'preds_from_last.csv') for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_one(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [13]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

  mean_value = np.nanmean(values)


Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA,0,20.0,1.0,49.81,33.74,38.32,7.2
1,H-UNet-BASE-SWTA-T,0,20.0,1.0,50.11,34.03,42.04,7.84
2,H-UNet-HPCA,0,1.0,1.0,33.72,20.47,,
3,H-UNet-HPCA-T,0,1.0,1.0,34.84,21.3,,


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA,20.0,1.0,49.81,,33.74,,38.32,,7.2,
H-UNet-BASE-SWTA-T,20.0,1.0,50.11,,34.03,,42.04,,7.84,
H-UNet-HPCA,1.0,1.0,33.72,,20.47,,,,,
H-UNet-HPCA-T,1.0,1.0,34.84,,21.3,,,,,


<h2>Evaluation - Data regime variations</h2>

<p>Evaluate Hebbian models over the datasets, by varying the quantity of training data; only fine-tuned models are considered since during pre-training we can consider the whole dataset; only best temperature values for SWTA are considered.</p>

In [18]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['0.01', '0.02', '0.05', '0.1', '0.2']       # regimes to be considered
IMAGE_SIZE = 480
INV_TEMP_GlaS=20          # to be set accordingly, used by SWTA

runs = {
    # UNET-based
    'UNet': list(Path(EXP_ROOT + '/experiment=glas').glob('unet_base')),
    #'UNet-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=glas/unet/inv_temp-1').glob('regime-*')),
    ##'H-UNet-HPCA-FT-FROM-BEST-DICE': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ft_fromBestDice/inv_temp-1').glob('regime-*')),
    ##'H-UNet-HPCA-T-FT-FROM-BEST-DICE': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ft_fromBestDice/inv_temp-1').glob('regime-*')),
    #'H-UNet-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_ft/inv_temp-1').glob('regime-*')),
    #'H-UNet-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-hpca_t_ft/inv_temp-1').glob('regime-*')),
    #'H-UNet-Pseudolabeling-HPCA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_ft/inv_temp-1').glob('regime-*')),
    #'H-UNet-Pseudolabeling-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-hpca_t_ft/inv_temp-1').glob('regime-*')),
    ##'H-UNet-SWTA-FT-FROM-BEST-DICE': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_ft_fromBestDice/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    ##'H-UNet-SWTA-T-FT-FROM-BEST-DICE': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t_ft_fromBestDice/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    #'H-UNet-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    #'H-UNet-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet_base-swta_t_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    #'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
    #'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=glas/hunet-swta_t_ft/inv_temp-{}'.format(INV_TEMP_GlaS)).glob('regime-*')),
}

In [19]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE) for k, v in runs.items() for r in v], ignore_index=True)

In [20]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,UNet,0,1.0,0.2,82.31,70.79,21.56,2.75
1,UNet,1,1.0,0.2,79.69,67.33,22.69,3.06
2,UNet,2,1.0,0.2,77.99,65.01,25.59,3.2
3,UNet,3,1.0,0.2,80.72,68.6,25.77,3.25
4,UNet,4,1.0,0.2,82.35,70.79,22.47,2.91
5,UNet,5,1.0,0.2,75.64,61.99,37.45,5.45
6,UNet,6,1.0,0.2,78.85,66.22,25.13,3.66
7,UNet,7,1.0,0.2,80.72,68.97,30.89,4.1
8,UNet,8,1.0,0.2,79.55,67.09,22.18,3.25
9,UNet,9,1.0,0.2,78.94,66.35,27.56,3.78


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
UNet,1.0,0.2,79.676,2.017701,67.314,2.674805,26.129,4.898783,3.541,0.786617


<h1>PH2 Dataset</h1>

<h2>Evaluation - Searching temperature hyperparameter</h2>

<p>Evaluate Hebbian models belonging to SWTA paradigm to search best temperature values (this value is dataset-specific)</p>

In [10]:
# Unsupervised learning models are evaluate over the whole dataset while fine tuned models over a fraction of data

EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

runs = {
    # UNET-based
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-swta')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-swta_t')),
}

In [11]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds_from_last.csv', regimes=['1.0']) for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_all(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

More than 1 tensorboard folder in run: /home/luca/datino/results/hebbian-medical-image-segmentation/runs/experiment=ph2/hunet_base-swta/inv_temp-1/regime-1.0/run-0
Skipping not found: /home/luca/datino/results/hebbian-medical-image-segmentation/runs/experiment=ph2/hunet_base-swta/inv_temp-1/regime-1.0/run-0/test_predictions/preds_from_last.csv
More than 1 tensorboard folder in run: /home/luca/datino/results/hebbian-medical-image-segmentation/runs/experiment=ph2/hunet_base-swta/inv_temp-10/regime-1.0/run-0
Skipping not found: /home/luca/datino/results/hebbian-medical-image-segmentation/runs/experiment=ph2/hunet_base-swta/inv_temp-10/regime-1.0/run-0/test_predictions/preds_from_last.csv
More than 1 tensorboard folder in run: /home/luca/datino/results/hebbian-medical-image-segmentation/runs/experiment=ph2/hunet_base-swta/inv_temp-100/regime-1.0/run-0
Skipping not found: /home/luca/datino/results/hebbian-medical-image-segmentation/runs/experiment=ph2/hunet_base-swta/inv_temp-100/regime-1.0

In [None]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

In [4]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

runs = {
    # UNET-based
    'H-UNet-BASE-SWTA-FT': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-swta_ft')),
    #'H-UNet-BASE-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet_base-swta_t_ft')),
    #'H-UNet-Pseudolabeling-SWTA-T': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet-swta_t')),
    #'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2/').glob('hunet-swta_t_ft')),
}

In [5]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [6]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-BASE-SWTA-FT,0,1.0,0.01,75.57,63.16,19.17,2.93
1,H-UNet-BASE-SWTA-FT,0,1.0,0.02,75.43,63.36,14.70,2.01
2,H-UNet-BASE-SWTA-FT,0,1.0,0.05,79.87,67.81,9.70,1.26
3,H-UNet-BASE-SWTA-FT,0,1.0,0.10,84.61,74.48,6.90,0.85
4,H-UNet-BASE-SWTA-FT,0,1.0,0.20,88.37,79.72,2.17,0.36
...,...,...,...,...,...,...,...,...
245,H-UNet-BASE-SWTA-FT,9,50.0,0.10,88.49,79.95,3.02,0.40
246,H-UNet-BASE-SWTA-FT,9,50.0,0.20,89.51,81.89,4.06,0.60
247,H-UNet-BASE-SWTA-FT,9,100.0,0.05,84.15,73.80,4.91,0.67
248,H-UNet-BASE-SWTA-FT,9,100.0,0.10,86.82,77.64,4.63,0.63


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-BASE-SWTA-FT,1.0,0.01,74.179,4.972635,62.108,5.52232,17.414,5.258574,3.0,1.211941
H-UNet-BASE-SWTA-FT,1.0,0.02,78.688,5.13088,67.328,6.122891,11.925,7.275678,1.931,1.382722
H-UNet-BASE-SWTA-FT,1.0,0.05,82.505,3.613014,71.528,4.804789,6.579,3.649409,0.98,0.669228
H-UNet-BASE-SWTA-FT,1.0,0.1,86.587,2.337882,77.099,3.438664,3.844,1.647673,0.52,0.20221
H-UNet-BASE-SWTA-FT,1.0,0.2,88.62,1.393756,80.036,2.137138,2.498,0.487141,0.352,0.066299
H-UNet-BASE-SWTA-FT,5.0,0.01,73.64,5.600869,61.67,6.469745,16.812,8.138955,2.793,1.625669
H-UNet-BASE-SWTA-FT,5.0,0.02,76.497,3.134837,64.407,4.085177,13.386,4.802745,2.096,0.817927
H-UNet-BASE-SWTA-FT,5.0,0.05,79.99,3.040618,68.46,3.96477,8.204,4.112034,1.31,0.754763
H-UNet-BASE-SWTA-FT,5.0,0.1,85.889,1.7042,76.304,2.274434,5.22,2.697097,0.811,0.504743
H-UNet-BASE-SWTA-FT,5.0,0.2,88.838,1.005936,80.421,1.433321,2.55,0.890668,0.368,0.144284


<h2>Evaluation - Hebbian Unsupervised Pretraining</h2>

<p>Evaluate Hebbian models pretrained in an unsupervised way over the datasets; only best temperature values for SWTA are considered.</p>

In [39]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

INV_TEMP_PH2=20          # to be set accordingly, used by SWTA

runs = {
    # UNET-based
    'H-UNet-HPCA': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-hpca/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-HPCA-T': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-hpca_t/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-swta/inv_temp-{}/regime-1.0'.format(INV_TEMP_PH2)).glob('run-*')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-swta_t/inv_temp-{}/regime-1.0'.format(INV_TEMP_PH2)).glob('run-*')),
}

In [40]:
# Collect predictions scanning runs
predictions = pd.concat([collect_one(k, r, 'preds_from_last.csv') for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_one(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

Skipping not found: /home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs/experiment=ph2/hunet_base-swta/inv_temp-20/regime-1.0/run-0/test_predictions/preds_from_last.csv
Skipping not found: /home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs/experiment=ph2/hunet_base-swta_t/inv_temp-20/regime-1.0/run-0/test_predictions/preds_from_last.csv


In [41]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

  mean_value = np.nanmean(values)


Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-HPCA,0,1.0,1.0,21.89,12.59,104.8,41.46
1,H-UNet-HPCA-T,0,1.0,1.0,21.21,12.13,,


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-HPCA,1.0,1.0,21.89,,12.59,,104.8,,41.46,
H-UNet-HPCA-T,1.0,1.0,21.21,,12.13,,,,,


<h2>Evaluation - Data regime variations</h2>

<p>Evaluate Hebbian models over the datasets, by varying the quantity of training data; only fine-tuned models are considered since during pre-training we can consider the whole dataset; only best temperature values for SWTA are considered.</p>

In [13]:
EXP_ROOT = "/home/luca/datino/results/hebbian-medical-image-segmentation/runs"

REGIMES = ['0.01', '0.02', '0.05', '0.1', '0.2', '0.25']       # regimes to be considered
IMAGE_SIZE = 480
INV_TEMP_PH2=10          # to be set accordingly, used by SWTA

runs = {
    # UNET-based
    'UNet': list(Path(EXP_ROOT + '/experiment=ph2').glob('unet_base')),
    ##'UNet-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=ph2').glob('unet')),
    ##'H-UNet-HPCA-FT': list(Path(EXP_ROOT + '/experiment=ph2').glob('hunet_base-hpca_ft')),
    ##'H-UNet-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2').glob('hunet_base-hpca_t_ft')),
    ##'H-UNet-Pseudolabeling-HPCA-FT': list(Path(EXP_ROOT + '/experiment=ph2').glob('hunet-hpca_ft')),
    ##'H-UNet-Pseudolabeling-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2').glob('hunet-hpca_t_ft')),
    #'H-UNet-SWTA-FT': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-swta_ft/inv_temp-{}'.format(INV_TEMP_PH2)).glob('regime-*')),
    #'H-UNet-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2/hunet_base-swta_t_ft/inv_temp-{}'.format(INV_TEMP_PH2)).glob('regime-*')),
    #'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=ph2/hunet-swta_ft/inv_temp-{}'.format(INV_TEMP_PH2)).glob('regime-*')),
    #'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=ph2/hunet-swta_t_ft/inv_temp-{}'.format(INV_TEMP_PH2)).glob('regime-*')),
}

In [12]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv', regimes=REGIMES, image_size=IMAGE_SIZE) for k, v in runs.items() for r in v], ignore_index=True)

Different image size in conf. Image size in conf: 480, Image size: 256
Different image size in conf. Image size in conf: 480, Image size: 256
Different image size in conf. Image size in conf: 480, Image size: 256
Different image size in conf. Image size in conf: 480, Image size: 256
Different image size in conf. Image size in conf: 480, Image size: 256
Different image size in conf. Image size in conf: 480, Image size: 256
Different image size in conf. Image size in conf: 480, Image size: 256
Different image size in conf. Image size in conf: 480, Image size: 256
Different image size in conf. Image size in conf: 480, Image size: 256
Different image size in conf. Image size in conf: 480, Image size: 256
Different image size in conf. Image size in conf: 480, Image size: 256
Different image size in conf. Image size in conf: 480, Image size: 256
Different image size in conf. Image size in conf: 480, Image size: 256
Different image size in conf. Image size in conf: 480, Image size: 256
Differ

In [12]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-UNet-Pseudolabeling-HPCA-T-FT,0,1.0,0.01,58.14,46.18,109.55,20.28
1,H-UNet-Pseudolabeling-HPCA-T-FT,0,1.0,0.02,69.58,58.64,34.29,6.45
2,H-UNet-Pseudolabeling-HPCA-T-FT,0,1.0,0.05,89.1,81.19,4.51,0.66
3,H-UNet-Pseudolabeling-HPCA-T-FT,0,1.0,0.1,86.89,79.07,9.14,1.67
4,H-UNet-Pseudolabeling-HPCA-T-FT,0,1.0,0.2,89.18,81.63,7.69,1.3
5,H-UNet-Pseudolabeling-HPCA-T-FT,0,1.0,0.25,90.6,83.59,6.2,0.8
6,H-UNet-Pseudolabeling-HPCA-T-FT,1,1.0,0.01,69.2,55.3,68.55,11.39
7,H-UNet-Pseudolabeling-HPCA-T-FT,1,1.0,0.02,84.72,75.51,6.77,0.98
8,H-UNet-Pseudolabeling-HPCA-T-FT,1,1.0,0.05,88.87,81.56,3.94,0.63
9,H-UNet-Pseudolabeling-HPCA-T-FT,1,1.0,0.1,89.74,82.17,7.17,0.8


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-UNet-Pseudolabeling-HPCA-T-FT,1.0,0.01,61.618,13.307067,49.796,13.774789,71.141,34.119033,13.184,6.55301
H-UNet-Pseudolabeling-HPCA-T-FT,1.0,0.02,79.876,6.880196,69.569,7.92003,15.256,9.087026,2.534,1.89568
H-UNet-Pseudolabeling-HPCA-T-FT,1.0,0.05,86.555,2.01419,77.999,2.52819,7.715,3.29891,1.158,0.57777
H-UNet-Pseudolabeling-HPCA-T-FT,1.0,0.1,89.122,1.649605,81.765,1.965702,5.822,2.557706,0.93,0.474997
H-UNet-Pseudolabeling-HPCA-T-FT,1.0,0.2,91.413,0.949082,84.589,1.361979,3.649,1.953393,0.508,0.315834
H-UNet-Pseudolabeling-HPCA-T-FT,1.0,0.25,91.933,0.851026,85.51,1.239077,3.385,1.542662,0.469,0.195644


<h1>KvasirSEG Dataset</h1>

<h2>Evaluation - Searching temperature hyperparameter</h2>

<p>Evaluate Hebbian models belonging to SWTA paradigm to search best temperature values (this value is dataset-specific)</p>

In [None]:
# Unsupervised learning models are evaluate over the whole dataset while fine tuned models over a fraction of data

EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs"

runs = {
    # UNET-based
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=kvasirSEG/').glob('hunet_base-swta')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=kvasirSEG/').glob('hunet_base-swta_t')),
}

In [None]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds_from_last.csv') for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_all(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [None]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

In [None]:
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs"

runs = {
    # UNET-based
    'H-UNet-BASE-SWTA-FT': list(Path(EXP_ROOT + '/experiment=kvasirSEG/').glob('hunet_base-swta_ft')),
    'H-UNet-BASE-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=kvasirSEG/').glob('hunet_base-swta_t_ft')),
    'H-UNet-Pseudolabeling-SWTA-T': list(Path(EXP_ROOT + '/experiment=kvasirSEG/').glob('hunet-swta_t')),
    'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=kvasirSEG/').glob('hunet-swta_t_ft')),
}

In [None]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [None]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

<h2>Evaluation - Hebbian Unsupervised Pretraining</h2>

<p>Evaluate Hebbian models pretrained in an unsupervised way over the datasets; only best temperature values for SWTA are considered.</p>

In [None]:
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs"

INV_TEMP_KVASIR=20          # to be set accordingly, used by SWTA

runs = {
    # UNET-based
    'H-UNet-HPCA': list(Path(EXP_ROOT + '/experiment=kvasirSEG/hunet_base-hpca/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-HPCA-T': list(Path(EXP_ROOT + '/experiment=kvasirSEG/hunet_base-hpca_t/inv_temp-1/regime-1.0').glob('run-*')),
    'H-UNet-BASE-SWTA': list(Path(EXP_ROOT + '/experiment=kvasirSEG/hunet_base-swta/inv_temp-{}/regime-1.0'.format(INV_TEMP_KVASIR)).glob('run-*')),
    'H-UNet-BASE-SWTA-T': list(Path(EXP_ROOT + '/experiment=kvasirSEG/hunet_base-swta_t/inv_temp-{}/regime-1.0'.format(INV_TEMP_KVASIR)).glob('run-*')),
}

In [None]:
# Collect predictions scanning runs
predictions = pd.concat([collect_one(k, r, 'preds_from_last.csv') for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_one(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [None]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

<h2>Evaluation - Data regime variations</h2>

<p>Evaluate Hebbian models over the datasets, by varying the quantity of training data; only fine-tuned models are considered since during pre-training we can consider the whole dataset; only best temperature values for SWTA are considered.</p>

In [None]:
EXP_ROOT = "/home/luca/dgx-a100/raid/home/lucaciampi/workspace/hebbian-skin-cancer-segmentation/runs"

REGIMES = ['0.01', '0.02', '0.05', '0.1', '0.2', '0.25']       # regimes to be considered
INV_TEMP_KVASIR=10          # to be set accordingly, used by SWTA

runs = {
    # UNET-based
    'UNet': list(Path(EXP_ROOT + '/experiment=kvasirSEG/unet_base/inv_temp-1').glob('regime-*')),
    'UNet-Pseudolabeling': list(Path(EXP_ROOT + '/experiment=kvasirSEG/unet/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-FT': list(Path(EXP_ROOT + '/experiment=kvasirSEG/hunet_base-hpca_ft/inv_temp-1').glob('regime-*')),
    'H-UNet-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=kvasirSEG/hunet_base-hpca_t_ft/inv_temp-1').glob('regime-*')),
    'H-UNet-Pseudolabeling-HPCA-FT': list(Path(EXP_ROOT + '/experiment=kvasirSEG/hunet-hpca_ft/inv_temp-1').glob('regime-*')),
    'H-UNet-Pseudolabeling-HPCA-T-FT': list(Path(EXP_ROOT + '/experiment=kvasirSEG/hunet-hpca_t_ft/inv_temp-1').glob('regime-*')),
    'H-UNet-SWTA-FT': list(Path(EXP_ROOT + '/experiment=kvasirSEG/hunet_base-swta_ft/inv_temp-{}'.format(INV_TEMP_PH2)).glob('regime-*')),
    'H-UNet-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=kvasirSEG/hunet_base-swta_t_ft/inv_temp-{}'.format(INV_TEMP_PH2)).glob('regime-*')),
    'H-UNet-Pseudolabeling-SWTA-FT': list(Path(EXP_ROOT + '/experiment=kvasirSEG/hunet-swta_ft/inv_temp-{}'.format(INV_TEMP_PH2)).glob('regime-*')),
    'H-UNet-Pseudolabeling-SWTA-T-FT': list(Path(EXP_ROOT + '/experiment=kvasirSEG/hunet-swta_t_ft/inv_temp-{}'.format(INV_TEMP_PH2)).glob('regime-*')),
}

In [None]:
# Collect predictions scanning runs
predictions = pd.concat([collect_one(k, r, 'preds_from_last.csv') for k, v in runs.items() for r in v], ignore_index=True)
#predictions = pd.concat([collect_one(k, r, 'preds_from_best_dice.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [None]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)