In [1]:
import pandas as pd
import math
import numpy as np
import json

from pathlib import Path
import scipy.stats as st

In [2]:
# Collect predictions

def check_num_epochs(run):
    if Path(run / "val_log.csv").is_file():
        with open(run / 'config.json') as f:
            cfg = json.load(f)

        num_epochs = cfg['num_epochs']
        val_iter = cfg['validate_iter']
        num_epochs = int(num_epochs / val_iter)

        valid_log = pd.read_csv(run / "val_log.csv", header=None, index_col=0)
        if (len(valid_log.index)-1) < num_epochs:
            print("Wrong number of epochs in run: {}".format(run))
    else:
        print("val_log.csv not exists in run: {}".format(run))

def check_only_one_tensorboard(run):
    if len(list(Path(run / "runs").glob('*'))) > 1:
        print("More than 1 tensorboard folder in run: {}".format(run))

def collect_one(model_name, run, csv_file):
    check_num_epochs(run)
    check_only_one_tensorboard(run)
    
    with open(run / 'config.json') as f:
        cfg = json.load(f)

    run_number = cfg['seed']
    regime, inv_temp = float(run.parent.parts[-1].rsplit('-', 1)[1]), float(run.parent.parts[-2].rsplit('-', 1)[1])

    csv_path = run / csv_file
    if not csv_path.exists():
        print(f'Skipping not found: {csv_path}')
        return pd.DataFrame()
    
    data = pd.read_csv(csv_path)
    if data.empty:
        print(f'Pred file is empty: {csv_path}')

    data['model'] = model_name
    data['run_number'] = int(run_number)
    data['inv_temp'] = int(inv_temp)
    data['regime'] = int(regime)
    
    return data

def collect_all(model_name, root, csv_file, regimes=['1', '2', '5', '10', '20','100'], ignore_outliers=True):
    root = Path(root)

    metrics = []
    for inv_temp in list(root.glob("inv_temp-*")):
        for regime in list(inv_temp.glob("regime-*")):
            if regime.name.rsplit("-", 1)[1] in regimes:
                for run in list(regime.glob("run-*")):
                    if ignore_outliers and 'outlier' in run.as_posix():
                        continue
                    else:
                        metrics.append(collect_one(model_name, run, csv_file))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

In [3]:
# Compute metrics for each detected run

def compute_metrics(data, grouping, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance']):
    metrics_dict_names = {
        'Dice': 'dice',
        'Jaccard': 'jaccard',
        'Hausdorff Distance': '95hd',
        'Average Surface Distance': 'asd'
    }

    columns = ['Model', '# Run', 'Inv Temp', 'Regime']
    columns.extend(metric_names)
    metrics = []
    
    data = data.copy().reset_index()
    grouped = data.groupby(grouping)
    
    for model_group, predictions in grouped:
        model_name, run_number, inv_temp, regime = model_group[0], int(model_group[1]), int(model_group[2]), int(model_group[3])
        
        metric_values = []
        for metric_name in metric_names:
            values = predictions['segm/{}'.format(metrics_dict_names[metric_name])].values
            mean_value = np.nanmean(values)
            if metric_name == 'Dice' or metric_name == 'Jaccard':
                metric_values.append((math.ceil(mean_value*10000)/10000)*100)
            else:
                if not np.isnan(mean_value):
                    metric_values.append(math.ceil(mean_value*100)/100)
                else:
                    metric_values.append(mean_value)
        
        metrics.append([model_name, run_number, inv_temp, regime, *metric_values])
        
    metrics_df = pd.DataFrame(metrics, columns=columns)
    
    return metrics_df

def summarize_metrics(metrics, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance'], confidence_level=0.90, return_ranges=False):

    def compute_ci(values, return_ranges=False):
        ci = st.t.interval(confidence_level, len(values)-1, loc=np.mean(values), scale=np.std(values)/(len(values)**0.5))

        if return_ranges:
            return ci
        else:
            return (ci[1]-ci[0]) / 2

    #mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', np.mean), ('STD', np.std), ("CI {}%".format(confidence_level), compute_ci)])
    mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[metric_names].aggregate([('Mean', 'mean'), ("CI {}%".format(confidence_level), compute_ci)]) 

    return mean_metrics

<h1>GlaS Dataset</h1>

<h2>Evaluation - Searching temperature hyperparameter</h2>

<p>Evaluate Hebbian models belonging to SWTA paradigm to search best temperature values (this value is dataset-specific)</p>

In [13]:
EXP_ROOT = "./runs"

REGIMES = ['100']

runs = {
    'H-UNet-SWTA-T': list(Path(EXP_ROOT + '/GlaS/hebbian_unsup/').glob('unet_swta_t')),
    'H-UNet-URPC-SWTA-T': list(Path(EXP_ROOT + '/GlaS/hebbian_unsup/').glob('unet_urpc_swta_t')),
    'H-UNet-CCT-SWTA-T': list(Path(EXP_ROOT + '/GlaS/hebbian_unsup/').glob('unet_cct_swta_t')),
}

In [14]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'test.csv', regimes=REGIMES, ignore_outliers=False) for k, v in runs.items() for r in v], ignore_index=True)

In [15]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper, metric_names=['Dice', 'Jaccard'])

display(metrics)

summary = summarize_metrics(metrics, metric_names=['Dice', 'Jaccard'])

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard
0,H-UNet-CCT-SWTA-T,0,1,100,38.71,24.0
1,H-UNet-CCT-SWTA-T,0,5,100,39.73,24.79
2,H-UNet-CCT-SWTA-T,0,10,100,40.49,25.38
3,H-UNet-CCT-SWTA-T,0,20,100,41.47,26.16
4,H-UNet-CCT-SWTA-T,0,50,100,41.68,26.33
5,H-UNet-CCT-SWTA-T,0,75,100,41.73,26.37
6,H-UNet-CCT-SWTA-T,0,100,100,41.75,26.38
7,H-UNet-SWTA-T,0,1,100,37.59,23.14
8,H-UNet-SWTA-T,0,5,100,38.85,24.11
9,H-UNet-SWTA-T,0,10,100,40.69,25.55


  lower_bound = _a * scale + loc
  upper_bound = _b * scale + loc


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,CI 0.9%,Mean,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
H-UNet-CCT-SWTA-T,1,100,38.71,,24.0,
H-UNet-CCT-SWTA-T,5,100,39.73,,24.79,
H-UNet-CCT-SWTA-T,10,100,40.49,,25.38,
H-UNet-CCT-SWTA-T,20,100,41.47,,26.16,
H-UNet-CCT-SWTA-T,50,100,41.68,,26.33,
H-UNet-CCT-SWTA-T,75,100,41.73,,26.37,
H-UNet-CCT-SWTA-T,100,100,41.75,,26.38,
H-UNet-SWTA-T,1,100,37.59,,23.14,
H-UNet-SWTA-T,5,100,38.85,,24.11,
H-UNet-SWTA-T,10,100,40.69,,25.55,


In [4]:
EXP_ROOT = "/home/luca/datino/results/hebbian-bootstraping-semi-supervised-medical-imaging/runs"

REGIMES = ['1', '2', '5', '10', '20']

runs = {
    #'H-UNet-SWTA-T': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('h_unet_swta_t')),
    'H-EM-SWTA-T': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('h_em_unet_swta_t')),
    #'H-UAMT-SWTA-T': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('h_uamt_unet_swta_t')),
    #'H-CPS-SWTA-T': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('h_cps_unet_swta_t')),
    #'H-URPC-SWTA-T': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('h_urpc_unet_swta_t')),
    #'H-CCT-SWTA-T': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('h_cct_unet_swta_t')),
}

In [6]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'test.csv', regimes=REGIMES, ignore_outliers=False) for k, v in runs.items() for r in v], ignore_index=True)

In [7]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance'])

display(metrics)

summary = summarize_metrics(metrics, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance'], confidence_level=0.90)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard,Hausdorff Distance,Average Surface Distance
0,H-EM-SWTA-T,0,1,1,68.59,52.19,38.35,10.62
1,H-EM-SWTA-T,0,1,2,74.34,59.16,24.49,5.77
2,H-EM-SWTA-T,0,1,5,74.13,58.89,19.07,4.55
3,H-EM-SWTA-T,0,1,10,77.50,63.26,18.90,4.79
4,H-EM-SWTA-T,0,1,20,80.17,66.90,18.55,3.99
...,...,...,...,...,...,...,...,...
345,H-EM-SWTA-T,9,100,1,70.19,54.07,29.23,7.94
346,H-EM-SWTA-T,9,100,2,67.77,51.25,48.26,13.15
347,H-EM-SWTA-T,9,100,5,75.84,61.09,18.82,4.88
348,H-EM-SWTA-T,9,100,10,77.85,63.74,22.37,4.99


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard,Hausdorff Distance,Hausdorff Distance,Average Surface Distance,Average Surface Distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%,Mean,CI 0.9%
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
H-EM-SWTA-T,1,1,70.649,1.220874,54.659,1.478809,30.982,3.14425,8.172,1.039686
H-EM-SWTA-T,1,2,71.376,1.843128,55.587,2.277829,28.262,4.696963,7.225,1.440011
H-EM-SWTA-T,1,5,75.703,0.930633,60.93,1.22186,22.206,1.424,5.273,0.391892
H-EM-SWTA-T,1,10,78.672,0.663363,64.858,0.903759,18.914,0.595354,4.316,0.179424
H-EM-SWTA-T,1,20,80.424,0.854865,67.282,1.181045,17.645,1.576727,3.897,0.333828
H-EM-SWTA-T,5,1,70.493,0.968644,54.456,1.170483,34.617,4.097124,9.329,1.363443
H-EM-SWTA-T,5,2,71.214,1.446702,55.352,1.766363,28.712,3.182565,7.521,1.075441
H-EM-SWTA-T,5,5,75.566,1.142967,60.766,1.460275,23.248,2.543913,5.665,0.70026
H-EM-SWTA-T,5,10,78.237,0.883378,64.275,1.182476,20.505,1.071442,4.55,0.255455
H-EM-SWTA-T,5,20,80.638,0.841879,67.578,1.189431,17.896,1.054702,3.91,0.266149


<h2>Evaluation - Weight init comparison</h2>

<p>Evaluate weight init methods (only for baseline)</p>

In [39]:
EXP_ROOT = "./runs"

REGIMES = ['1', '2', '5', '10', '20']

runs = {
    'Kaiming-UNet': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('kaiming_unet')),
    'Xavier-UNet': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('xavier_unet')),
    'Orthogonal-UNet': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('orthogonal_unet')),
}

In [40]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'test.csv', regimes=REGIMES, ignore_outliers=False) for k, v in runs.items() for r in v], ignore_index=True)

In [None]:
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance'])

display(metrics)

summary = summarize_metrics(metrics, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance'], confidence_level=0.90)

display(summary)

<h2>Evaluation - Data regime variations</h2>

<p>Evaluate Hebbian models over the datasets, by varying the quantity of training data; only fine-tuned models are considered since during pre-training we can consider the whole dataset.</p>

In [33]:
EXP_ROOT = "./runs"

REGIMES = ['1', '2', '5', '10', '20']

runs = {
    'UNet': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('kaiming_unet')),
    'EM': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('em_unet')),
    'UAMT': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('uamt_unet')),
    'CPS': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('cps_unet')),
    'URPC': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('urpc_unet')),
    'CCT': list(Path(EXP_ROOT + '/GlaS/semi_sup/').glob('cct_unet')),
}

In [34]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'test.csv', regimes=REGIMES, ignore_outliers=False) for k, v in runs.items() for r in v], ignore_index=True)

In [None]:
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance'])

display(metrics)

summary = summarize_metrics(metrics, metric_names=['Dice', 'Jaccard', 'Hausdorff Distance', 'Average Surface Distance'], confidence_level=0.90)

display(summary)