In [1]:
import sys
sys.path.append('/storage/vbutoi/projects')
sys.path.append('/storage/vbutoi/libraries')
sys.path.append('/storage/vbutoi/projects/ESE')
sys.path.append('/storage/vbutoi/projects/UniverSeg')

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
sns.set_context("talk")

import os 
os.environ['DATAPATH'] = ':'.join((
       '/storage/vbutoi/datasets',
))

# Results loader object does everything
from ionpy.analysis import ResultsLoader
from pathlib import Path
root = Path("/storage/vbutoi/scratch/ESE")
rs = ResultsLoader()

# For using code without restarting.
%load_ext autoreload
%autoreload 2
# For using yaml configs.
%load_ext yamlmagic

In [2]:
%%yaml results_cfg 

log:
    root: /storage/vbutoi/scratch/ESE/inference
    inference_groups: 
        - "03_15_24_RandomCircles_Expanded_VES"
    
log_attributes:
    03_15_24_RandomCircles_Expanded_VES:
        loss_func: "CE"
    03_20_24_RandomCircles_Dice_VES:
        loss_func: "Dice"

calibration:
    metric_cfg_file: "/storage/vbutoi/projects/ESE/ese/experiment/configs/inference/Calibration_Metrics.yaml"

options:
    add_baseline_rows: False  
    load_pixel_meters: False 
    add_dice_loss_rows: True
    drop_nan_metric_rows: True 
    load_groupavg_metrics: False
    remove_shared_columns: False
    equal_rows_per_cfg_assert: True 

<IPython.core.display.Javascript object>

In [3]:
from ese.experiment.analysis.analyze_inf import load_cal_inference_stats

image_info_df = load_cal_inference_stats(
    results_cfg=results_cfg,
    load_cached=True,
)




Finished loading inference stats.
Log amounts: log.root                                                                                                  log_set                                              
/storage/vbutoi/scratch/ESE/inference/03_15_24_RandomCircles_Expanded_VES/Shapes_Ensemble_LTS             20240315_161231-2CDJ-a788359552a51dce87e2d71910abae22    1620
                                                                                                          20240315_161234-6Q5H-5c2893c2d9f05d3f181dd78df4b163ea    1620
                                                                                                          20240315_161237-JJOU-310722af69728f5488a70575c0fa7b37    1620
                                                                                                          20240315_161241-8144-9ee0926fffe111e18061cfe21e8dc475    1620
                                                                                                          20240315_161244

In [4]:
# Sort the image_info_df by method name, so everything appears nicely
image_info_df = image_info_df.sort_values(by=['method_name', 'calibrator'])
# Make sure that the model_class 'Uncalibrated' is first
image_info_df['calibrator'] = image_info_df['calibrator'].astype('category')
image_info_df['calibrator'] = image_info_df['calibrator'].cat.reorder_categories([
    'Uncalibrated',
    'TempScaling', 
    'LTS'
])

In [5]:
# Select only the rows corresponding to group methods
image_info_df = image_info_df[image_info_df['model_type'] == 'group']

In [6]:
image_info_df['method_name'].unique()

array(['Ensemble (mean, probs)'], dtype=object)

In [7]:
image_info_df['method_name'] = image_info_df['method_name'].astype('category')
image_info_df['method_name'] = image_info_df['method_name'].cat.reorder_categories([
    'Average UNet',
    'Ensemble (mean, probs)',
])

image_info_df['split'] = image_info_df['split'].astype('category')
image_info_df['split'] = image_info_df['split'].cat.reorder_categories([
    'val',
    'cal'
])

ValueError: items in new_categories are not the same as in old categories

# Let's looks at the calibration scores of our models.

## ECE Metrics

In [None]:
g = sns.relplot(
    data=image_info_df,
    x="num_ensemble_members",
    y="ECE",
    hue="calibrator",
    style="loss_func",
    col="split",
    kind="line",
    height=8,
)
g.fig.suptitle("ECE by Calibration Method and Calibration Loss Function", fontsize=25)
g.set_xlabels("# Ensemble Members")
g.fig.subplots_adjust(top=0.85)
g.set(ylim=(0.0, 0.03))

In [None]:
def group_by_config(in_df):
    grouped_df = in_df.groupby([
        'ensemble_hash',
        'method_name',
        'loss_func',
        'calibrator',
        'split',
        'num_ensemble_members',
        'image_metric'
    ])
    # Mean over the metric_score columns
    meaned_groups =  grouped_df.agg({'metric_score': 'mean'}).reset_index()
    # Drop the NaN rows
    grouped_cfg = meaned_groups.dropna().reset_index(drop=True)
    return grouped_cfg

In [None]:
image_info_df['image_metric'].unique()

In [None]:
ece_metric_df = group_by_config(image_info_df[image_info_df['image_metric'] == 'Image_ECE'])

g = sns.relplot(
    data=ece_metric_df,
    x="num_ensemble_members",
    y="metric_score",
    hue="calibrator",
    style="loss_func",
    kind="line",
    col="split",
    height=8,
)
# Set the title of the bar plot
g.fig.suptitle("Image-level ECE for Different Calibration Methods and ", fontsize=25)
# Give the title a bit of spacing from the plot
g.fig.subplots_adjust(top=0.85)
g.set(ylim=(0.0, 0.03))
# Set the y axis to be between 0.5 and 1.0
# calibrators_width = num_calibrators - 1
# Change the y axis label to say ECEkjkj
g.set_ylabels("Image-level ECE")
g.set_xlabels("# Ensemble Members")
# Set the x ticks as 2, 4, 8, 12, 16
g.set(xticks=[2, 4, 8, 12, 16])

In [None]:
dice_metric_df = group_by_config(image_info_df[image_info_df['image_metric'] == 'Dice'])

g = sns.relplot(
    data=dice_metric_df,
    x="num_ensemble_members",
    y="metric_score",
    hue="calibrator",
    style="loss_func",
    kind="line",
    col="split",
    height=8,
)
# Set the title of the bar plot
g.fig.suptitle("Dice for Different Calibration Methods", fontsize=25)
# Give the title a bit of spacing from the plot
g.fig.subplots_adjust(top=0.85)
# Change the y axis label
g.set_ylabels("Dice")
g.set_xlabels("# Ensemble Members")
# Set the x ticks as 2, 4, 8, 12, 16
g.set(xticks=[2, 4, 8, 12, 16])

In [None]:
hd_metric_df = group_by_config(image_info_df[image_info_df['image_metric'] == 'HD95'])

g = sns.relplot(
    data=hd_metric_df,
    x="num_ensemble_members",
    y="metric_score",
    hue="calibrator",
    style="loss_func",
    kind="line",
    col="split",
    height=8,
)
# Set the title of the bar plot
g.fig.suptitle("HD95 for Different Calibration Methods", fontsize=25)
# Give the title a bit of spacing from the plot
g.fig.subplots_adjust(top=0.85)
# Change the y axis label
g.set_ylabels("HD95")
g.set_xlabels("# Ensemble Members")
# Set the x ticks as 2, 4, 8, 12, 16
g.set(xticks=[2, 4, 8, 12, 16])

In [None]:
boundary_metric_df = group_by_config(image_info_df[image_info_df['image_metric'] == 'BoundaryIOU'])

g = sns.relplot(
    data=boundary_metric_df,
    x="num_ensemble_members",
    y="metric_score",
    hue="calibrator",
    style="loss_func",
    kind="line",
    col="split",
    height=8,
)
# Set the title of the bar plot
g.fig.suptitle("Boundary IoU for Different Calibration Methods", fontsize=25)
# Give the title a bit of spacing from the plot
g.fig.subplots_adjust(top=0.85)
# Change the y axis label
g.set_ylabels("Boundary IoU")
g.set_xlabels("# Ensemble Members")
# Set the x ticks as 2, 4, 8, 12, 16
g.set(xticks=[2, 4, 8, 12, 16])

In [None]:
boundary_metric_df = group_by_config(image_info_df[image_info_df['image_metric'] == 'Accuracy'])

g = sns.relplot(
    data=boundary_metric_df,
    x="num_ensemble_members",
    y="metric_score",
    hue="calibrator",
    style="loss_func",
    kind="line",
    col="split",
    height=8,
)
# Set the title of the bar plot
g.fig.suptitle("Pixel-Accuracy for Different Calibration Methods", fontsize=25)
# Give the title a bit of spacing from the plot
g.fig.subplots_adjust(top=0.85)
# Change the y axis label
g.set_ylabels("Pixel-Accuracy")
g.set_xlabels("# Ensemble Members")
# Set the x ticks as 2, 4, 8, 12, 16
g.set(xticks=[2, 4, 8, 12, 16])

In [None]:
boundary_metric_df = group_by_config(image_info_df[image_info_df['image_metric'] == 'Recall'])

g = sns.relplot(
    data=boundary_metric_df,
    x="num_ensemble_members",
    y="metric_score",
    hue="calibrator",
    style="loss_func",
    kind="line",
    col="split",
    height=8,
)
# Set the title of the bar plot
g.fig.suptitle("Recall for Different Calibration Methods", fontsize=25)
# Give the title a bit of spacing from the plot
g.fig.subplots_adjust(top=0.85)
# Change the y axis label
g.set_ylabels("Recall")
g.set_xlabels("# Ensemble Members")
# Set the x ticks as 2, 4, 8, 12, 16
g.set(xticks=[2, 4, 8, 12, 16])

In [None]:
boundary_metric_df = group_by_config(image_info_df[image_info_df['image_metric'] == 'Precision'])

g = sns.relplot(
    data=boundary_metric_df,
    x="num_ensemble_members",
    y="metric_score",
    hue="calibrator",
    style="loss_func",
    kind="line",
    col="split",
    height=8,
)
# Set the title of the bar plot
g.fig.suptitle("Precision for Different Calibration Methods", fontsize=25)
g.fig.subplots_adjust(top=0.85)
# Change the y axis label
g.set_ylabels("Precision")
g.set_xlabels("# Ensemble Members")
# Set the x ticks as 2, 4, 8, 12, 16
g.set(xticks=[2, 4, 8, 12, 16])