In [1]:
import sys
sys.path.append('/storage/vbutoi/projects')
sys.path.append('/storage/vbutoi/libraries')
sys.path.append('/storage/vbutoi/projects/ESE')
sys.path.append('/storage/vbutoi/projects/UniverSeg')

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")

import os 
os.environ['DATAPATH'] = ':'.join((
       '/storage/vbutoi/datasets',
))

# Results loader object does everything
from ionpy.analysis import ResultsLoader
from pathlib import Path
root = Path("/storage/vbutoi/scratch/ESE")
rs = ResultsLoader()

# For using code without restarting.
%load_ext autoreload
%autoreload 
# For using yaml configs.
%load_ext yamlmagic

In [2]:
from ese.experiment.analysis.inference import load_cal_inference_stats

inference_paths = [
    # root / "inference/01_07_24_WMH_UncalibratedEnsemble",
    # root / "inference/01_07_24_WMH_TempScalingEnsemble",
    # root / "inference/01_07_24_WMH_VectorScalingEnsemble",
    # root / "inference/01_07_24_WMH_DirichletScalingEnsemble",
    # root / "inference/01_07_24_WMH_LTSEnsemble"
    # root / "inference/01_08_24_WMH_WholeEnsembleUncalibrated"
    root / "inference/01_08_24_WMH_WholeEnsembleTemperatureScaling"
    # root / "inference/01_08_24_WMH_WholeEnsembleVectorScaling"
    # root / "inference/01_08_24_WMH_WholeEnsembleDirichletScaling"
#    root / "inference/01_08_24_WMH_WholeEnsembleLTS"
]

inference_info_dict = load_cal_inference_stats(
    log_dirs=inference_paths,
    load_image_df=True,
    load_pixel_meters_dict=False
)

In [3]:
metadata_df = inference_info_dict['metadata_df']

In [4]:
metadata_df.head()

Unnamed: 0,calibration.conf_interval_end,calibration.conf_interval_start,calibration.neighborhood_width,calibration.num_bins,calibration.square_diff,dataloader.batch_size,dataloader.num_workers,dataloader.pin_memory,dataset._class,dataset.annotator,...,log.log_pixel_stats,log.root,log.show_examples,model.checkpoint,model.ensemble,model.ensemble_combine_fn,model.ensemble_pre_softmax,model.pretrained_exp_root,model.pretrained_select_metric,log_set
0,1.0,0.5,3,10,False,1,0,True,ese.experiment.datasets.WMH,observer_o12,...,False,/storage/vbutoi/scratch/ESE/inference/01_08_24...,False,min-val-ece_loss,True,max,False,/storage/vbutoi/scratch/ESE/calibration/01_07_...,val-dice_score,20240108_155620-Q7MA-0e780d105a6d9e43edfc525e5...
0,1.0,0.5,3,10,False,1,0,True,ese.experiment.datasets.WMH,observer_o12,...,False,/storage/vbutoi/scratch/ESE/inference/01_08_24...,False,min-val-ece_loss,True,mean,True,/storage/vbutoi/scratch/ESE/calibration/01_07_...,val-dice_score,20240108_155619-38QK-60e83393c9d8fcad04ac037f2...
0,1.0,0.5,3,10,False,1,0,True,ese.experiment.datasets.WMH,observer_o12,...,False,/storage/vbutoi/scratch/ESE/inference/01_08_24...,False,min-val-ece_loss,True,max,True,/storage/vbutoi/scratch/ESE/calibration/01_07_...,val-dice_score,20240108_155619-N302-d8a764a80f0d535deaf58d58d...
0,1.0,0.5,3,10,False,1,0,True,ese.experiment.datasets.WMH,observer_o12,...,False,/storage/vbutoi/scratch/ESE/inference/01_08_24...,False,min-val-ece_loss,True,mean,False,/storage/vbutoi/scratch/ESE/calibration/01_07_...,val-dice_score,20240108_155619-3CQH-27df19ef55562e800c4f7a69a...


In [5]:
image_info_df = inference_info_dict['image_info_df']

In [6]:
image_info_df.head()

Unnamed: 0,qual_metric,qual_score,slice_idx,conf_interval_start,conf_interval_end,num_bins,neighborhood_width,square_diff,log_set
0,Dice,,0,0.5,1.0,10,3,False,20240108_155620-Q7MA-0e780d105a6d9e43edfc525e5...
1,Dice,,1,0.5,1.0,10,3,False,20240108_155620-Q7MA-0e780d105a6d9e43edfc525e5...
2,Dice,,2,0.5,1.0,10,3,False,20240108_155620-Q7MA-0e780d105a6d9e43edfc525e5...
3,Dice,,3,0.5,1.0,10,3,False,20240108_155620-Q7MA-0e780d105a6d9e43edfc525e5...
4,Dice,,4,0.5,1.0,10,3,False,20240108_155620-Q7MA-0e780d105a6d9e43edfc525e5...


In [7]:
# g = sns.catplot(
#     image_info_df, 
#     x="calibrator", 
#     y="qual_score", 
#     kind="box",
#     hue="log_set"
# )
# # g.set(xlim=(0, 1), ylim=(0, 1))
# g.tight_layout()
# plt.show()

In [20]:
def gather_log_set_stats(
        stats_df, 
        metadata_df,
        target_metric,
        id_vars=['seed']
        ):
    unique_log_sets = stats_df['log_set'].unique()
    for log_set in unique_log_sets:
        log_set_df = stats_df[stats_df['log_set'] == log_set]
        qual_metric_df = log_set_df[log_set_df['qual_metric'] == target_metric]
        # Calculate basic statistics of performance.
        log_set_mean = qual_metric_df['qual_score'].mean()
        log_set_std = qual_metric_df['qual_score'].std()
        log_set_min = qual_metric_df['qual_score'].min()
        log_set_max = qual_metric_df['qual_score'].max()
        # Build the id string from the id_vars.
        id_string = "["
        log_set_metadata_df = metadata_df[metadata_df['log_set'] == log_set]
        for id_var in id_vars:
            id_value = log_set_metadata_df[id_var].unique()
            assert len(id_value) == 1, "Found more than one value for id_var."
            id_string += f" {id_var}:{id_value[0]},"
        print(f"{id_string} ] {target_metric} : {log_set_mean:.3f} +- {log_set_std:.3f} [{log_set_min:.3f}, {log_set_max:.3f}]")

In [21]:
gather_log_set_stats(
    image_info_df, 
    metadata_df=metadata_df, 
    target_metric='Dice',
    # id_vars=['experiment.pretrained_seed']
    id_vars=['model.ensemble_pre_softmax', 'model.ensemble_combine_fn']
    )

[ model.ensemble_pre_softmax:False, model.ensemble_combine_fn:max, ] Dice : nan +- nan [nan, nan]
[ model.ensemble_pre_softmax:True, model.ensemble_combine_fn:mean, ] Dice : nan +- nan [nan, nan]
[ model.ensemble_pre_softmax:True, model.ensemble_combine_fn:max, ] Dice : nan +- nan [nan, nan]
[ model.ensemble_pre_softmax:False, model.ensemble_combine_fn:mean, ] Dice : nan +- nan [nan, nan]
