In [None]:
import sys
sys.path.append('/storage/vbutoi/projects')
sys.path.append('/storage/vbutoi/libraries')
sys.path.append('/storage/vbutoi/projects/ESE')
sys.path.append('/storage/vbutoi/projects/UniverSeg')

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style("darkgrid")
sns.set_context("talk")

import os 
os.environ['DATAPATH'] = ':'.join((
       '/storage/vbutoi/datasets',
))

from ese.experiment.analysis.analyze_inf import load_cal_inference_stats
# Results loader object does everything
from ionpy.analysis import ResultsLoader
from pathlib import Path
root = Path("/storage/vbutoi/scratch/ESE")
rs = ResultsLoader()

# For using code without restarting.
%load_ext autoreload
%autoreload 2
# For using yaml configs.
%load_ext yamlmagic

In [None]:
%%yaml results_cfg 

log:
    root: /storage/vbutoi/scratch/ESE/inference
    inference_groups: 
        - '06_19_24_WMH_Hard'
        - '06_19_24_WMH_SVLS'

options:
    add_dice_loss_rows: True
    drop_nan_metric_rows: True 
    remove_shared_columns: False
    equal_rows_per_cfg_assert: False 

In [None]:
inference_df = load_cal_inference_stats(
    results_cfg=results_cfg,
    load_cached=True,
)

In [None]:
# For the purpose of this experiment, we only care about a few columns in particular:
exp_columns = [
    "data_id",
    "slice_idx",
    "gt_volume",
    "hard_volume",
    "soft_volume",
    "pretrained_seed", 
    "pretrained_exp_root",
    "split",
    "task"
]
# Take these columns of the inference_df, drop other columns.
raw_exp_df = inference_df[exp_columns]
# For this experiment, we only want the entries from the 'Amsterdam' task that aren't from the training split.
raw_exp_df = raw_exp_df[raw_exp_df['task'] == 'Singapore']
# Finally, reset teh df.
exp_df = raw_exp_df.drop_duplicates().reset_index(drop=True) 

In [None]:
len(exp_df['data_id'].unique())

In [None]:
# Accumulate the volumes.
volume_df = exp_df.groupby(["data_id", "task", "pretrained_seed", "pretrained_exp_root", "split"]).agg(
    gt_volume=("gt_volume", "sum"),
    hard_volume=("hard_volume", "sum"),
    soft_volume=("soft_volume", "sum"),
).reset_index()
# Rename the volume cols to be with a space instead of a underscore.
volume_df = volume_df.rename(columns={
    "gt_volume": "gt volume",
    "hard_volume": "hard volume",
    "soft_volume": "soft volume",
})

In [None]:
# Make two new columns, one for the soft volume error and one for the hard volume error.
volume_df['soft error'] = (volume_df['soft volume'] - volume_df['gt volume'])
volume_df['hard error'] = (volume_df['hard volume'] - volume_df['gt volume'])
# Make the normalized metric that divides the error by the ground truth volume.
volume_df['soft relative error'] = volume_df['soft error'] / volume_df['gt volume']
volume_df['hard relative error'] = volume_df['hard error'] / volume_df['gt volume']

In [None]:
volume_df

# Library Funcs

In [None]:
def prepare_error_df(raw_df, groupby_keys, value_vars, var_name, value_name):
    # Make a clone of the volume df.
    input_df = raw_df.copy()
    # Melt the dataframe to have a single column for the error.
    error_df = pd.melt(
        input_df,
        id_vars=groupby_keys,
        value_vars=value_vars,
        var_name=var_name,
        value_name=value_name,
    )
    # Make some columns that are useful for plotting.
    error_df[f'absolute {value_name}'] = error_df[value_name].abs()
    # Return the melted dataframe.
    return error_df


def calibrator(pretrained_exp_root):
    if "SVLS" in pretrained_exp_root:
        return "SVLS"
    else:
        return "Uncalibrated"


def process_method_names(input_df, value_name):
    # Make a clone of the input_df
    df = input_df.copy()
    # Drop all the rows where calibrator != Uncalibrated AND the volume_type is hard_volume_error.
    df = df[~((df['calibrator'] != 'Uncalibrated') & (df['volume_type'] == f'hard {value_name}'))]
    # Then we augment the volume_type with the calibrator name.
    def volume_type(calibrator, volume_type):
        if calibrator == "Uncalibrated":
            return "Uncalibrated " + volume_type.split(" ")[0]
        else:
            return calibrator + " soft"
    df['volume type'] = df.apply(lambda x: volume_type(x['calibrator'], x['volume_type']), axis=1)
    # Finally, sort by data_id
    df = df.sort_values(by="data_id")
    # Return the augmented dataframe.
    return df

# First, let's look at absolute error.

In [None]:
volume_df.augment(calibrator)
# Make some columns that are useful for plotting.
raw_melted_error_df = prepare_error_df(
    volume_df, 
    groupby_keys=[
        "calibrator",
        "data_id", 
        "pretrained_seed", 
        "gt volume", 
        "soft volume", 
        "hard volume",
        "split",
    ],
    value_vars=["soft error", "hard error"],
    var_name="volume_type",
    value_name="error"
)
# Process the volume types.
melted_error_df = process_method_names(raw_melted_error_df, value_name="error")

In [None]:
melted_error_df['volume type'].unique()

In [None]:
# Finally, we want to change the order that the methods appear in.
melted_error_df['volume type'] = melted_error_df['volume type'].astype('category')
melted_error_df['volume type'] = melted_error_df['volume type'].cat.reorder_categories([
    'Uncalibrated hard',
    'Uncalibrated soft',
    'SVLS soft',
])

# Custom palette dictionary
custom_palette = {
    'Uncalibrated hard': 'blue',
    'Uncalibrated soft': 'limegreen',
    'SVLS soft': 'darkorange'
}

In [None]:
g = sns.catplot(
    melted_error_df,
    x="data_id",
    y="absolute error",
    hue="volume type",
    aspect=3,
    height=7,
    sharex=False,
    sharey=False,
    palette=custom_palette
)
# For each subplot make a line at y = 0 to show the error.
for ax in g.axes.flat:
    ax.axhline(0, ls='--', color='red')

# Adjust the layout
plt.subplots_adjust(top=0.85)
g.fig.suptitle('Soft/Hard Volumetric Error', fontsize=30)

# Show the plot
plt.show()

In [None]:
melted_error_df

In [None]:
# We want to calculate now the average (across seeds) of the the absolute error for each method.
# That means first averaging within each seed, then averaging across seeds.
# We group by the data_id and the volume_type.
error_per_seed = melted_error_df.groupby(["pretrained_seed", "volume type"]).agg(
    abs_error=("absolute error", "mean"),
).reset_index()
# Rename abs_error to absolute error.
error_per_seed = error_per_seed.rename(columns={"abs_error": "absolute error"})

In [None]:
# make a table with each row being the volume type, and the column being the abs_error mean with standard deviation in another column.
method_error_table = error_per_seed.pivot_table(
    values="absolute error",
    index="volume type",
    columns="pretrained_seed",
    aggfunc="mean",
)
# Average across the pretrained seeds to make an average column, and then make a stdv column.
method_error_table['mean'] = method_error_table.mean(axis=1)
method_error_table['stdv'] = method_error_table.std(axis=1)
# We can drop all other columns that aren't mean and stdv
method_error_table = method_error_table[['mean', 'stdv']]

In [None]:
method_error_table

## Second, let's look at relative error.

In [None]:
volume_df.augment(calibrator)
# Make some columns that are useful for plotting.
raw_melted_relative_df = prepare_error_df(
    volume_df, 
    groupby_keys=[
        "calibrator",
        "data_id", 
        "pretrained_seed", 
        "gt volume", 
        "soft volume", 
        "hard volume",
        "split",
    ],
    value_vars=["soft relative error", "hard relative error"],
    var_name="volume_type",
    value_name="relative error"
)
# Process the volume types.
melted_relative_df = process_method_names(raw_melted_relative_df, value_name="relative error")

In [None]:
melted_relative_df['volume type'].unique()

In [None]:
# Finally, we want to change the order that the methods appear in.
melted_relative_df['volume type'] = melted_relative_df['volume type'].astype('category')
melted_relative_df['volume type'] = melted_relative_df['volume type'].cat.reorder_categories([
    'Uncalibrated hard',
    'Uncalibrated soft',
    'SVLS soft',
])

# Custom palette dictionary
custom_palette = {
    'Uncalibrated hard': 'blue',
    'Uncalibrated soft': 'limegreen',
    'SVLS soft': 'darkorange'
}

In [None]:
g = sns.catplot(
    melted_relative_df,
    x="data_id",
    y="absolute relative error",
    hue="volume type",
    aspect=3,
    height=7,
    sharex=False,
    sharey=False,
    palette=custom_palette
)
# For each subplot make a line at y = 0 to show the error.
for ax in g.axes.flat:
    ax.axhline(0.0, ls='--', color='red')

# Adjust the layout
plt.subplots_adjust(top=0.85)
g.fig.suptitle('Soft/Hard Volumetric Relative Error', fontsize=30)

# Show the plot
plt.show()

In [None]:
# We want to calculate now the average (across seeds) of the the absolute error for each method.
# That means first averaging within each seed, then averaging across seeds.
# We group by the data_id and the volume_type.
relative_per_seed = melted_relative_df.groupby(["pretrained_seed", "volume type"]).agg(
    abs_relative_error=("absolute relative error", "mean"),
).reset_index()
# Rename abs_error to absolute error.
relative_per_seed = relative_per_seed.rename(columns={"abs_relative_error": "absolute relative error"})

In [None]:
# make a table with each row being the volume type, and the column being the abs_error mean with standard deviation in another column.
method_relative_table = relative_per_seed.pivot_table(
    values="absolute relative error",
    index="volume type",
    columns="pretrained_seed",
    aggfunc="mean",
)
# Average across the pretrained seeds to make an average column, and then make a stdv column.
method_relative_table['mean'] = method_relative_table.mean(axis=1)
method_relative_table['stdv'] = method_relative_table.std(axis=1)
# We can drop all other columns that aren't mean and stdv
method_relative_table = method_relative_table[['mean', 'stdv']]

In [None]:
method_relative_table

# Finally, we want to look at the Dice score and Image ECE of all three methods as we are arguing that Image ECE is what we should care about.

In [None]:
# Get the relevant columns for looking at the Dice score and Image ECE
metric_cols = [
    "data_id",
    "slice_idx",
    "pretrained_seed",
    "pretrained_exp_root",
    "split",
    "task",
    "image_metric",
    "metric_score"
]
# Take these columns of the inference_df, drop other columns.
raw_metric_df = inference_df[metric_cols]
raw_metric_df.augment(calibrator)
# For this experiment, we only want the entries from the 'Amsterdam' task that aren't from the training split.
raw_metric_df = raw_metric_df[raw_metric_df['task'] == 'Singapore']
raw_metric_df = raw_metric_df.drop(columns=["pretrained_exp_root"])
# Finally, reset teh df.
metric_df = raw_metric_df.drop_duplicates().reset_index(drop=True)

In [None]:
metric_df

In [None]:
# Groupby the pretrained_seed and image_metrics, and calibrator, and take the mean of the metric_score.
metric_per_seed = metric_df.groupby(["pretrained_seed", "image_metric", "calibrator"]).agg(
    metric_score=("metric_score", "mean"),
).reset_index()
# Finally, we want to change the order that the methods appear in.
metric_per_seed['calibrator'] = metric_per_seed['calibrator'].astype('category')
metric_per_seed['calibrator'] = metric_per_seed['calibrator'].cat.reorder_categories([
    'Uncalibrated',
    'SVLS',
])

In [None]:
metric_per_seed

In [None]:
result = metric_per_seed.groupby(['calibrator', 'image_metric']).agg(
    mean_metric_score=('metric_score', 'mean'),
    std_metric_score=('metric_score', 'std')
).reset_index()

# Pivot the table to have one row for each calibrator
result = result.pivot(
    index='calibrator', 
    columns='image_metric', 
    values=[
        'mean_metric_score', 
        'std_metric_score'
    ]
)

# Flatten the column multi-index
result.columns = ['_'.join(col).strip() for col in result.columns.values]
result = result.reset_index()

In [None]:
result

In [None]:
# Get all the of the columns that are 'calibrator' and have 'Dice_Loss' in them
dice_cols = [col for col in result.columns if 'Dice Loss' in col or 'calibrator' in col]
dice_result = result[dice_cols]

In [None]:
dice_result

In [None]:
# Get all the of the columns that are 'calibrator' and have 'Image_ECE' in them
image_ece_cols = [col for col in result.columns if 'Image_ECE' in col or 'calibrator' in col]
ece_result = result[image_ece_cols] 

In [None]:
ece_result