In [None]:
import sys
sys.path.append('/storage/vbutoi/projects')
sys.path.append('/storage/vbutoi/libraries')
sys.path.append('/storage/vbutoi/projects/ESE')
sys.path.append('/storage/vbutoi/projects/UniverSeg')

import os 
import numpy as np
import pandas as pd
import seaborn as sns
from pathlib import Path
import matplotlib.pyplot as plt
# Ionpy imports
from ionpy.analysis import ResultsLoader
# Local imports
from ese.analysis.baselines import fit_posthoc_calibrators, viz_posthoc_calibrators
from ese.analysis.analyze_inf import load_cal_inference_stats
from ese.analysis.analysis_utils.parse_sweep import get_global_optimal_parameter, get_per_subject_optimal_values

sns.set_style("darkgrid")
sns.set_context("talk")
os.environ['DATAPATH'] = ':'.join((
       '/storage/vbutoi/datasets',
))
root = Path("/storage/vbutoi/scratch/ESE")
rs = ResultsLoader()

# For using code without restarting.
%load_ext autoreload
%autoreload 2
# For using yaml configs.
%load_ext yamlmagic
%matplotlib inline

In [None]:
%%yaml results_cfg 

log:
    # root: '/storage/vbutoi/scratch/ESE/inference/10_26_24_OCTA_Benchmark'
    # root: '/storage/vbutoi/scratch/ESE/inference/10_26_24_ISLES_Benchmark'
    # root: '/storage/vbutoi/scratch/ESE/inference/10_26_24_Roads_FULLRES_Benchmark'
    root: '/storage/vbutoi/scratch/ESE/inference/10_26_24_WMH_Benchmark'
    inference_group: 'Sweep_Temperature'

options:
    verify_graceful_exit: True
    equal_rows_per_cfg_assert: False 

# Plotting Calls

In [None]:
inference_df = load_cal_inference_stats(
    results_cfg=results_cfg,
    load_cached=True
)

In [4]:
cols_to_keep = [
    'soft_RAVE',
    'experiment_model_dir',
    'temperature',
    'hard_volume',
    'soft_volume',
    'gt_volume',
    'data_id',
    'split'
]
# Filter out the columns we want to keep
exp_df = inference_df[cols_to_keep].drop_duplicates().reset_index(drop=True)
# We need to make sure that the cal split goes first.
exp_df = exp_df.sort_values('split', ascending=True)

In [5]:
##############################################################################################################
# This cell is quite important, it allows us to see the base soft volume for each data_id and loss_func_class
##############################################################################################################
# We want to add a base soft volume column to let us so what the uncalibrated volume is.
bsv = exp_df[exp_df['temperature'] == 1.01][['data_id', 'experiment_model_dir', 'soft_volume']].drop_duplicates().reset_index(drop=True)
# Make a new column that is the combination of data_id and loss_func_class
bsv['data_id_experiment_model_dir'] = bsv['data_id'] + '_' + bsv['experiment_model_dir']
exp_df['data_id_experiment_model_dir'] = exp_df['data_id'] + '_' + exp_df['experiment_model_dir']
# Drop the columns data_id and experiment_model_dir
bsv = bsv.drop(columns=['data_id', 'experiment_model_dir'])
# Convert this to a dictionary mapping from data_id to base soft volume
bsv_dict = dict(zip(bsv['data_id_experiment_model_dir'], bsv['soft_volume']))
# Make a new column of exp_df, called base_soft_volume, where the value is the corresponding value for the data_id of that row in the bsv_dict
exp_df['base_soft_volume'] = exp_df['data_id_experiment_model_dir'].map(bsv_dict)

In [6]:
global_opt_temp_df = get_global_optimal_parameter(
    exp_df, 
    sweep_key='temperature', 
    y_key='soft_RAVE',
    group_keys=['split', 'experiment_model_dir']
)

In [7]:
opt_scores, opt_temps_df = get_per_subject_optimal_values(
    exp_df, 
    sweep_key='temperature', 
    y_key='soft_RAVE',
    group_keys=['split', 'experiment_model_dir'],
    keep_keys=['hard_volume', 'base_soft_volume'],
    return_optimal_values=True
)

In [None]:
for model_dir in opt_temps_df['experiment_model_dir'].unique():
    # We also want to add our baseline of predicting a global optimal temperature
    loss_fn_global_temp = global_opt_temp_df[global_opt_temp_df['experiment_model_dir'] == model_dir]
    cal_global_opt_temp = float(loss_fn_global_temp[loss_fn_global_temp['split'] == 'cal']['temperature'])

    # Get the subset of the temps corresponding to one model.
    model_temps_df = opt_temps_df[opt_temps_df['experiment_model_dir'] == model_dir]

    # Split the data into training and validation sets based on 'split' column
    model_df_train = model_temps_df[model_temps_df['split'] == 'cal']
    model_df_val = model_temps_df[model_temps_df['split'] == 'val']

    # Prepare the features (X) and target (y)
    X_train = model_df_train['hard_volume'].to_numpy().reshape(-1, 1)
    y_train = model_df_train['temperature']
    # 
    X_val = model_df_val['hard_volume'].to_numpy().reshape(-1, 1)
    y_val = model_df_val['temperature']

    # Fit the calibrators and get the results
    results, fitted_models = fit_posthoc_calibrators(
        X_train=X_train,
        y_train=y_train,
        X_val=X_val,
        y_val=y_val,
        global_opt_temp=cal_global_opt_temp
    )

    # Convert results to a DataFrame for better visualization
    results_df = pd.DataFrame(results).T  # Transpose for better format
    # We want to srt by the best MSE
    results_df = results_df.sort_values('MSE')
    print("Model Performance for model_dir:", model_dir)
    display(results_df)

    # Now we want to see the actual fits
    viz_posthoc_calibrators(
        fitted_models, 
        X_data=X_val, 
        y_data=y_val, 
        global_temp=cal_global_opt_temp
    )