In [1]:
import pandas as pd
import glob

%matplotlib inline

from pathlib import Path
from omegaconf import OmegaConf

In [2]:
# Collect predictions

def check_num_epochs(run, num_epochs=100):
    if Path(run / "valid_log.csv").is_file():
        valid_log = pd.read_csv(run / "valid_log.csv", index_col=0)
        if len(valid_log) < num_epochs:
            print("Wrong number of epochs in run: {}".format(run))
    else:
        print("valid_log.csv not exists in run: {}".format(run))

def collect_one(model_name, run, csv_file, num_epochs=100):
    check_num_epochs(run, num_epochs=num_epochs)
    
    cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
    cfg = OmegaConf.to_container(cfg)

    run_number = cfg['data']['train']['cross_val_bucket_validation_index']
    regime, inv_temp = float(run.parent.parts[-1].rsplit('-', 1)[1]), float(run.parent.parts[-2].rsplit('-', 1)[1])

    csv_path = run / 'test_predictions' / csv_file
    if not csv_path.exists():
        print(f'Skipping not found: {csv_path}')
        return pd.DataFrame()
    
    data = pd.read_csv(csv_path)
    if data.empty:
        print(f'Pred file is empty: {csv_path}')

    data['model'] = model_name
    data['run_number'] = run_number
    data['inv_temp'] = inv_temp
    data['regime'] = regime
    
    return data

def collect_all(model_name, root, csv_file):
    root = Path(root)

    metrics = []
    for inv_temp in list(root.glob("inv_temp-*")):
        for regime in list(inv_temp.glob("regime-*")):
            for run in list(regime.glob("run-*")):
                metrics.append(collect_one(model_name, run, csv_file))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

In [3]:
# Compute metrics for each detected run

def compute_metrics(data, grouping):
    columns = ['Model', '# Run', 'Inv Temp', 'Regime', 'Dice', 'Jaccard']
    metrics = []
    
    data = data.copy().reset_index()
    grouped = data.groupby(grouping)
    
    for model_group, predictions in grouped:
        model_name, run_number, inv_temp, regime = model_group[0], model_group[1], model_group[2], model_group[3]
        
        dice = predictions['segm/dice'].values
        jaccard = predictions['segm/jaccard'].values
        
        metrics.append([model_name, run_number, inv_temp, regime, dice.mean(), jaccard.mean()])
        
    metrics_df = pd.DataFrame(metrics, columns=columns)
    
    return metrics_df

def summarize_metrics(metrics):
    mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[['Dice', 'Jaccard']].aggregate(['mean', 'std'])
    
    return mean_metrics

<h1>Evaluation</h1>

<p>Evaluate models over datasets (5-cross validation)</p>

<h2>PH2 Dataset</h2>

In [7]:
#ROOT = "./"
ROOT = "/mnt/Ext_Data/results/hebbian-medical-image-segmentation"

runs = {
    'UNet': list(Path(ROOT + '/runs/experiment=ph2/').glob('unet')),
    'H-UNet-SWTA': list(Path(ROOT + '/runs/experiment=ph2/').glob('hunet-swta')),
    'H-UNet-SWTA-FT': list(Path(ROOT + '/runs/experiment=ph2/').glob('hunet-swta_ft')),
    'H-UNet-SWTA-T': list(Path(ROOT + '/runs/experiment=ph2/').glob('hunet-swta_t')),
    'H-UNet-SWTA-T-FT': list(Path(ROOT + '/runs/experiment=ph2/').glob('hunet-swta_t_ft')),
}

In [8]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [9]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard
0,H-UNet-SWTA,0,0.001,1.0,0.347459,0.220506
1,H-UNet-SWTA,0,0.002,1.0,0.40633,0.272467
2,H-UNet-SWTA,0,0.005,1.0,0.376278,0.253494
3,H-UNet-SWTA,0,2.0,1.0,0.785376,0.659664
4,H-UNet-SWTA,0,5.0,1.0,0.794921,0.672899
5,H-UNet-SWTA,0,10.0,1.0,0.807764,0.689072
6,H-UNet-SWTA-FT,0,0.001,1.0,0.761935,0.632993
7,H-UNet-SWTA-FT,0,0.002,1.0,0.817542,0.703456
8,H-UNet-SWTA-FT,0,0.005,1.0,0.816057,0.70295
9,H-UNet-SWTA-FT,0,2.0,1.0,0.857253,0.758608


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
H-UNet-SWTA,0.001,1.0,0.347459,,0.220506,
H-UNet-SWTA,0.002,1.0,0.40633,,0.272467,
H-UNet-SWTA,0.005,1.0,0.376278,,0.253494,
H-UNet-SWTA,2.0,1.0,0.785376,,0.659664,
H-UNet-SWTA,5.0,1.0,0.794921,,0.672899,
H-UNet-SWTA,10.0,1.0,0.807764,,0.689072,
H-UNet-SWTA-FT,0.001,1.0,0.761935,,0.632993,
H-UNet-SWTA-FT,0.002,1.0,0.817542,,0.703456,
H-UNet-SWTA-FT,0.005,1.0,0.816057,,0.70295,
H-UNet-SWTA-FT,2.0,1.0,0.857253,,0.758608,


<h2>ISIC 2016 Dataset</h2>

In [22]:
#ROOT = "./"
ROOT = "/mnt/Ext_Data/results/hebbian-medical-image-segmentation"

runs = {
    'UNet': list(Path(ROOT + '/runs/experiment=isic2016/').glob('unet')),
    'H-UNet-SWTA': list(Path(ROOT + '/runs/experiment=isic2016/').glob('hunet-swta')),
    'H-UNet-SWTA-FT': list(Path(ROOT + '/runs/experiment=isic2016/').glob('hunet-swta_ft')),
    'H-UNet-SWTA-T': list(Path(ROOT + '/runs/experiment=isic2016/').glob('hunet-swta_t')),
    'H-UNet-SWTA-T-FT': list(Path(ROOT + '/runs/experiment=isic2016/').glob('hunet-swta_t_ft')),
}

In [23]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [24]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard
0,H-UNet-SWTA,0,0.001,1.0,0.27962,0.171027
1,H-UNet-SWTA,0,0.002,1.0,0.367899,0.237425
2,H-UNet-SWTA,0,0.005,1.0,0.473021,0.334356
3,H-UNet-SWTA,0,2.0,1.0,0.737769,0.609246
4,H-UNet-SWTA,0,5.0,1.0,0.730096,0.597795
5,H-UNet-SWTA,0,10.0,1.0,0.743858,0.62322
6,H-UNet-SWTA-FT,0,0.001,1.0,0.820996,0.713543
7,H-UNet-SWTA-FT,0,0.002,1.0,0.778024,0.657678
8,H-UNet-SWTA-FT,0,0.005,1.0,0.734156,0.600639
9,H-UNet-SWTA-FT,0,2.0,1.0,0.848543,0.749035


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
H-UNet-SWTA,0.001,1.0,0.27962,,0.171027,
H-UNet-SWTA,0.002,1.0,0.367899,,0.237425,
H-UNet-SWTA,0.005,1.0,0.473021,,0.334356,
H-UNet-SWTA,2.0,1.0,0.737769,,0.609246,
H-UNet-SWTA,5.0,1.0,0.730096,,0.597795,
H-UNet-SWTA,10.0,1.0,0.743858,,0.62322,
H-UNet-SWTA-FT,0.001,1.0,0.820996,,0.713543,
H-UNet-SWTA-FT,0.002,1.0,0.778024,,0.657678,
H-UNet-SWTA-FT,0.005,1.0,0.734156,,0.600639,
H-UNet-SWTA-FT,2.0,1.0,0.848543,,0.749035,


<h2>KvasirSEG Dataset</h2>

In [25]:
#ROOT = "./"
ROOT = "/mnt/Ext_Data/results/hebbian-medical-image-segmentation"

runs = {
    'UNet': list(Path(ROOT + '/runs/experiment=kvasirSEG/').glob('unet')),
    'H-UNet-SWTA': list(Path(ROOT + '/runs/experiment=kvasirSEG/').glob('hunet-swta')),
    'H-UNet-SWTA-FT': list(Path(ROOT + '/runs/experiment=kvasirSEG/').glob('hunet-swta_ft')),
    'H-UNet-SWTA-T': list(Path(ROOT + '/runs/experiment=kvasirSEG/').glob('hunet-swta_t')),
    'H-UNet-SWTA-T-FT': list(Path(ROOT + '/runs/experiment=kvasirSEG/').glob('hunet-swta_t_ft')),
}

In [26]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [27]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard
0,H-UNet-SWTA,0,0.001,1.0,0.26124,0.156116
1,H-UNet-SWTA,0,0.002,1.0,0.277637,0.170643
2,H-UNet-SWTA,0,0.005,1.0,0.264526,0.15861
3,H-UNet-SWTA,0,2.0,1.0,0.406727,0.272606
4,H-UNet-SWTA,0,5.0,1.0,0.409068,0.273103
5,H-UNet-SWTA,0,10.0,1.0,0.411893,0.276746
6,H-UNet-SWTA-FT,0,0.001,1.0,0.40815,0.275266
7,H-UNet-SWTA-FT,0,0.002,1.0,0.380882,0.250689
8,H-UNet-SWTA-FT,0,0.005,1.0,0.390853,0.258718
9,H-UNet-SWTA-FT,0,2.0,1.0,0.735841,0.616942


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
H-UNet-SWTA,0.001,1.0,0.26124,,0.156116,
H-UNet-SWTA,0.002,1.0,0.277637,,0.170643,
H-UNet-SWTA,0.005,1.0,0.264526,,0.15861,
H-UNet-SWTA,2.0,1.0,0.406727,,0.272606,
H-UNet-SWTA,5.0,1.0,0.409068,,0.273103,
H-UNet-SWTA,10.0,1.0,0.411893,,0.276746,
H-UNet-SWTA-FT,0.001,1.0,0.40815,,0.275266,
H-UNet-SWTA-FT,0.002,1.0,0.380882,,0.250689,
H-UNet-SWTA-FT,0.005,1.0,0.390853,,0.258718,
H-UNet-SWTA-FT,2.0,1.0,0.735841,,0.616942,


<h2>Data Science Bowl 2018 Dataset</h2>

In [28]:
#ROOT = "./"
ROOT = "/mnt/Ext_Data/results/hebbian-medical-image-segmentation"

runs = {
    'UNet': list(Path(ROOT + '/runs/experiment=datasciencebowl2018/').glob('unet')),
    'H-UNet-SWTA': list(Path(ROOT + '/runs/experiment=datasciencebowl2018/').glob('hunet-swta')),
    'H-UNet-SWTA-FT': list(Path(ROOT + '/runs/experiment=datasciencebowl2018/').glob('hunet-swta_ft')),
    'H-UNet-SWTA-T': list(Path(ROOT + '/runs/experiment=datasciencebowl2018/').glob('hunet-swta_t')),
    'H-UNet-SWTA-T-FT': list(Path(ROOT + '/runs/experiment=datasciencebowl2018/').glob('hunet-swta_t_ft')),
}

In [29]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [30]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard
0,H-UNet-SWTA,0,0.001,1.0,0.433182,0.29395
1,H-UNet-SWTA,0,0.002,1.0,0.633607,0.479857
2,H-UNet-SWTA,0,0.005,1.0,0.169068,0.097866
3,H-UNet-SWTA,0,2.0,1.0,0.759079,0.622942
4,H-UNet-SWTA,0,5.0,1.0,0.752148,0.617175
5,H-UNet-SWTA,0,10.0,1.0,0.780533,0.65393
6,H-UNet-SWTA-FT,0,0.001,1.0,0.809131,0.692046
7,H-UNet-SWTA-FT,0,0.002,1.0,0.779566,0.657712
8,H-UNet-SWTA-FT,0,0.005,1.0,0.731529,0.59898
9,H-UNet-SWTA-FT,0,2.0,1.0,0.82414,0.713056


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
H-UNet-SWTA,0.001,1.0,0.433182,,0.29395,
H-UNet-SWTA,0.002,1.0,0.633607,,0.479857,
H-UNet-SWTA,0.005,1.0,0.169068,,0.097866,
H-UNet-SWTA,2.0,1.0,0.759079,,0.622942,
H-UNet-SWTA,5.0,1.0,0.752148,,0.617175,
H-UNet-SWTA,10.0,1.0,0.780533,,0.65393,
H-UNet-SWTA-FT,0.001,1.0,0.809131,,0.692046,
H-UNet-SWTA-FT,0.002,1.0,0.779566,,0.657712,
H-UNet-SWTA-FT,0.005,1.0,0.731529,,0.59898,
H-UNet-SWTA-FT,2.0,1.0,0.82414,,0.713056,


<h2><s>BrainMRI Dataset</s></h2>

In [5]:
ROOT = "./"

runs = {
    #'UNet': list(Path(ROOT + '/runs/experiment=brainMRI/').glob('unet')),
    'H-UNet-SWTA': list(Path(ROOT + '/runs/experiment=brainMRI/').glob('hunet-swta')),
    'H-UNet-SWTA-FT': list(Path(ROOT + '/runs/experiment=brainMRI/').glob('hunet-swta_ft')),
    'H-UNet-SWTA-T': list(Path(ROOT + '/runs/experiment=brainMRI/').glob('hunet-swta_t')),
    'H-UNet-SWTA-T-FT': list(Path(ROOT + '/runs/experiment=brainMRI/').glob('hunet-swta_t_ft')),
}

In [6]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [7]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard
0,H-UNet-SWTA,0,0.001,1.0,0.111565,0.055784
1,H-UNet-SWTA,0,0.002,1.0,0.046428,0.023943
2,H-UNet-SWTA,0,0.005,1.0,0.042155,0.022117
3,H-UNet-SWTA,0,2.0,1.0,0.290193,0.175805
4,H-UNet-SWTA,0,5.0,1.0,0.344112,0.224951
5,H-UNet-SWTA,0,10.0,1.0,0.410864,0.26234
6,H-UNet-SWTA-FT,0,0.001,1.0,0.494738,0.296692
7,H-UNet-SWTA-FT,0,0.002,1.0,0.505196,0.314623
8,H-UNet-SWTA-FT,0,0.005,1.0,0.414225,0.269342
9,H-UNet-SWTA-FT,0,2.0,1.0,0.779051,0.488952


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
H-UNet-SWTA,0.001,1.0,0.111565,,0.055784,
H-UNet-SWTA,0.002,1.0,0.046428,,0.023943,
H-UNet-SWTA,0.005,1.0,0.042155,,0.022117,
H-UNet-SWTA,2.0,1.0,0.290193,,0.175805,
H-UNet-SWTA,5.0,1.0,0.344112,,0.224951,
H-UNet-SWTA,10.0,1.0,0.410864,,0.26234,
H-UNet-SWTA-FT,0.001,1.0,0.494738,,0.296692,
H-UNet-SWTA-FT,0.002,1.0,0.505196,,0.314623,
H-UNet-SWTA-FT,0.005,1.0,0.414225,,0.269342,
H-UNet-SWTA-FT,2.0,1.0,0.779051,,0.488952,
