In [9]:
import pandas as pd
import glob

%matplotlib inline

from pathlib import Path
from omegaconf import OmegaConf

In [28]:
# Collect predictions

def check_num_epochs(run, num_epochs=100):
    if Path(run / "valid_log.csv").is_file():
        valid_log = pd.read_csv(run / "valid_log.csv", index_col=0)
        if len(valid_log) < num_epochs:
            print("Wrong number of epochs in run: {}".format(run))
    else:
        print("valid_log.csv not exists in run: {}".format(run))

def collect_one(model_name, run, csv_file, num_epochs=100):
    check_num_epochs(run, num_epochs=num_epochs)
    
    cfg = OmegaConf.load(run / '.hydra' / 'config.yaml')
    cfg = OmegaConf.to_container(cfg)

    run_number = cfg['data']['train']['cross_val_bucket_validation_index']
    regime, inv_temp = float(run.parent.parts[-1].rsplit('-', 1)[1]), float(run.parent.parts[-2].rsplit('-', 1)[1])

    csv_path = run / 'test_predictions' / csv_file
    if not csv_path.exists():
        print(f'Skipping not found: {csv_path}')
        return pd.DataFrame()
    
    data = pd.read_csv(csv_path)
    if data.empty:
        print(f'Pred file is empty: {csv_path}')

    data['model'] = model_name
    data['run_number'] = run_number
    data['inv_temp'] = inv_temp
    data['regime'] = regime
    
    return data

def collect_all(model_name, root, csv_file):
    root = Path(root)

    metrics = []
    for inv_temp in list(root.glob("inv_temp-*")):
        for regime in list(inv_temp.glob("regime-*")):
            for run in list(regime.glob("run-*")):
                metrics.append(collect_one(model_name, run, csv_file))
        
    metrics = pd.concat(metrics, ignore_index=True)
    
    return metrics

In [32]:
# Compute metrics for each detected run

def compute_metrics(data, grouping):
    columns = ['Model', '# Run', 'Inv Temp', 'Regime', 'Dice', 'Jaccard']
    metrics = []
    
    data = data.copy().reset_index()
    grouped = data.groupby(grouping)
    
    for model_group, predictions in grouped:
        model_name, run_number, inv_temp, regime = model_group[0], model_group[1], model_group[2], model_group[3]
        
        dice = predictions['segm/dice'].values
        jaccard = predictions['segm/jaccard'].values
        
        metrics.append([model_name, run_number, inv_temp, regime, dice.mean(), jaccard.mean()])
        
    metrics_df = pd.DataFrame(metrics, columns=columns)
    
    return metrics_df

def summarize_metrics(metrics):
    mean_metrics = metrics.groupby(['Model', 'Inv Temp', 'Regime'])[['Dice', 'Jaccard']].aggregate(['mean', 'std'])
    
    return mean_metrics

<h1>Evaluation</h1>

<p>Evaluate models over datasets (5-cross validation)</p>

<h2>PH2 Dataset</h2>

In [34]:
ROOT = "./"

runs = {
    #'UNet': list(Path(ROOT + '/runs/experiment=ph2/').glob('unet')),
    'H-UNet-SWTA': list(Path(ROOT + '/runs/experiment=ph2/').glob('hunet-swta')),
    'H-UNet-SWTA-FT': list(Path(ROOT + '/runs/experiment=ph2/').glob('hunet-swta_ft')),
    'H-UNet-SWTA-T': list(Path(ROOT + '/runs/experiment=ph2/').glob('hunet-swta_t')),
    'H-UNet-SWTA-T-FT': list(Path(ROOT + '/runs/experiment=ph2/').glob('hunet-swta_t_ft')),
}

In [35]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [36]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

Unnamed: 0,Model,# Run,Inv Temp,Regime,Dice,Jaccard
0,H-UNet-SWTA,0,0.001,1.0,0.012625,0.006558
1,H-UNet-SWTA,0,0.002,1.0,0.018465,0.009605
2,H-UNet-SWTA,0,0.005,1.0,0.021105,0.011076
3,H-UNet-SWTA,0,2.0,1.0,0.047838,0.026706
4,H-UNet-SWTA,0,5.0,1.0,0.046926,0.026091
5,H-UNet-SWTA,0,10.0,1.0,0.042818,0.023621
6,H-UNet-SWTA-FT,0,0.001,1.0,0.013135,0.00703
7,H-UNet-SWTA-FT,0,0.002,1.0,0.018622,0.009882
8,H-UNet-SWTA-FT,0,0.005,1.0,0.011755,0.006069
9,H-UNet-SWTA-FT,0,2.0,1.0,0.049373,0.027552


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Dice,Dice,Jaccard,Jaccard
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std
Model,Inv Temp,Regime,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
H-UNet-SWTA,0.001,1.0,0.012625,,0.006558,
H-UNet-SWTA,0.002,1.0,0.018465,,0.009605,
H-UNet-SWTA,0.005,1.0,0.021105,,0.011076,
H-UNet-SWTA,2.0,1.0,0.047838,,0.026706,
H-UNet-SWTA,5.0,1.0,0.046926,,0.026091,
H-UNet-SWTA,10.0,1.0,0.042818,,0.023621,
H-UNet-SWTA-FT,0.001,1.0,0.013135,,0.00703,
H-UNet-SWTA-FT,0.002,1.0,0.018622,,0.009882,
H-UNet-SWTA-FT,0.005,1.0,0.011755,,0.006069,
H-UNet-SWTA-FT,2.0,1.0,0.049373,,0.027552,


<h2>ISIC 2016 Dataset</h2>

In [37]:
ROOT = "./"

runs = {
    #'UNet': list(Path(ROOT + '/runs/experiment=isic2016/').glob('unet')),
    'H-UNet-SWTA': list(Path(ROOT + '/runs/experiment=isic2016/').glob('hunet-swta')),
    'H-UNet-SWTA-FT': list(Path(ROOT + '/runs/experiment=isic2016/').glob('hunet-swta_ft')),
    'H-UNet-SWTA-T': list(Path(ROOT + '/runs/experiment=isic2016/').glob('hunet-swta_t')),
    'H-UNet-SWTA-T-FT': list(Path(ROOT + '/runs/experiment=isic2016/').glob('hunet-swta_t_ft')),
}

In [None]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [None]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

<h2>KvasirSEG Dataset</h2>

In [None]:
ROOT = "./"

runs = {
    #'UNet': list(Path(ROOT + '/runs/experiment=kvasirSEG/').glob('unet')),
    'H-UNet-SWTA': list(Path(ROOT + '/runs/experiment=kvasirSEG/').glob('hunet-swta')),
    'H-UNet-SWTA-FT': list(Path(ROOT + '/runs/experiment=kvasirSEG/').glob('hunet-swta_ft')),
    'H-UNet-SWTA-T': list(Path(ROOT + '/runs/experiment=kvasirSEG/').glob('hunet-swta_t')),
    'H-UNet-SWTA-T-FT': list(Path(ROOT + '/runs/experiment=kvasirSEG/').glob('hunet-swta_t_ft')),
}

In [None]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [None]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)

<h2>Data Science Bowl 2018 Dataset</h2>

In [None]:
ROOT = "./"

runs = {
    #'UNet': list(Path(ROOT + '/runs/experiment=datasciencebowl2018/').glob('unet')),
    'H-UNet-SWTA': list(Path(ROOT + '/runs/experiment=datasciencebowl2018/').glob('hunet-swta')),
    'H-UNet-SWTA-FT': list(Path(ROOT + '/runs/experiment=datasciencebowl2018/').glob('hunet-swta_ft')),
    'H-UNet-SWTA-T': list(Path(ROOT + '/runs/experiment=datasciencebowl2018/').glob('hunet-swta_t')),
    'H-UNet-SWTA-T-FT': list(Path(ROOT + '/runs/experiment=datasciencebowl2018/').glob('hunet-swta_t_ft')),
}

In [None]:
# Collect predictions scanning runs
predictions = pd.concat([collect_all(k, r, 'preds.csv') for k, v in runs.items() for r in v], ignore_index=True)

In [None]:
# Computing metrics
model_grouper = ['model', 'run_number', 'inv_temp', 'regime']
metrics = compute_metrics(predictions, model_grouper)

display(metrics)

summary = summarize_metrics(metrics)

display(summary)