# Calibration Plots and Results

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import ttest_rel, kstest, wilcoxon
import pickle

from calibration.calibrators import *
from validate import calibration_metrics, validation_metrics
from calibration.metrics import *
from utils.plots import *

import scipy.stats as stats
import pandas as pd
import scikit_posthocs as sp
from scipy.stats import wilcoxon

In [2]:
n_bins = 10
mode_metrics = 'quantile'
mode_plot = 'uniform'

## Load data

In [3]:
folder = 'UNIFESP+iCOPE'
model_name = 'VGGNB' # change model here

filename = 'results_MCDP_0.5_30.pkl'

original = f'experiments\\{folder}\\{model_name}\\{filename}'
LS_01 = f'experiments\\{folder}\\{model_name}_LS_01\\{filename}'
LS_03 = f'experiments\\{folder}\\{model_name}_LS_03\\{filename}'
LS_05 = f'experiments\\{folder}\\{model_name}_LS_05\\{filename}'
LINEAR = f'experiments\\{folder}\\{model_name}_LINEAR\\{filename}'
SIGMOID = f'experiments\\{folder}\\{model_name}_SIGMOID\\{filename}'
STEP = f'experiments\\{folder}\\{model_name}_STEP\\{filename}'
HIST = f'experiments\\{folder}\\{model_name}\\results_MCDP_HIST.pkl'
ISOTONIC = f'experiments\\{folder}\\{model_name}\\results_MCDP_ISOTONIC.pkl'
PLATT = f'experiments\\{folder}\\{model_name}\\results_MCDP_PLATT.pkl'
TEMPERATURE = f'experiments\\{folder}\\{model_name}\\results_MCDP_TEMPERATURE.pkl'

models = [LS_01, LS_03, LS_05, LINEAR, SIGMOID, STEP, HIST, ISOTONIC, PLATT, TEMPERATURE]


In [4]:
metrics_dict_original = {'Accuracy': [], 'F1 Score': [], 'Precision': [], 
                'Sensitivity': [], 'Specificity': [], 'AUC': [], 
                'ECE': [], 'MCE': [], 'NLL': [], 'Brier': []}

metrics_cls_aux = {'Accuracy': [], 'F1 Score': [], 'Precision': [], 
            'Sensitivity': [], 'Specificity': [], 'AUC': []}

metrics_calib_aux = {'ECE': [], 'MCE': [], 'NLL': [], 'Brier': []}


with open(original, 'rb') as f:
    results = pickle.load(f)
    
    # Collect metric values for each fold
for fold in results.keys():
    metrics_cls = validation_metrics(results[fold]['preds'], results[fold]['probs'], results[fold]['labels'])
    metrics_calib = calibration_metrics(results[fold]['probs'], results[fold]['labels'], n_bins=n_bins, mode=mode_metrics)

    for metric in metrics_cls_aux.keys():
        metrics_dict_original[metric].append(metrics_cls[metric])

    for metric in metrics_calib_aux.keys():
        metrics_dict_original[metric].append(metrics_calib[metric])

In [5]:
for model in models:

    print(model)
    print()

    metrics_dict = {'Accuracy': [], 'F1 Score': [], 'Precision': [], 
                'Sensitivity': [], 'Specificity': [], 'AUC': [], 
                'ECE': [], 'MCE': [], 'NLL': [], 'Brier': []}
    
    metrics_cls_aux = {'Accuracy': [], 'F1 Score': [], 'Precision': [], 
            'Sensitivity': [], 'Specificity': [], 'AUC': []}

    metrics_calib_aux = {'ECE': [], 'MCE': [], 'NLL': [], 'Brier': []}


    with open(model, 'rb') as f:
        results = pickle.load(f)
        
        # Collect metric values for each fold
    for fold in results.keys():
        metrics_cls = validation_metrics(results[fold]['preds'], results[fold]['probs'], results[fold]['labels'])
        metrics_calib = calibration_metrics(results[fold]['probs'], results[fold]['labels'], n_bins=n_bins, mode=mode_metrics)

        for metric in metrics_cls_aux.keys():
            metrics_dict[metric].append(metrics_cls[metric])

        for metric in metrics_calib_aux.keys():
            metrics_dict[metric].append(metrics_calib[metric])
        

    for metric in metrics_dict.keys():
        
        values_originais = np.asarray(metrics_dict_original[metric])
        values = np.asarray(metrics_dict[metric])

        try:
            stat, p_value = wilcoxon(values_originais, values)
            print(str(round(p_value,3)).replace('.',','))
        except ValueError:
            print(f'####')


        
    print()

            

experiments\UNIFESP+iCOPE\VGGNB_LS_01\results_MCDP_0.5_30.pkl

0,063
0,043
0,173
0,068
0,465
0,208
0,695
0,105
1,0
0,193

experiments\UNIFESP+iCOPE\VGGNB_LS_03\results_MCDP_0.5_30.pkl

0,046
0,075
0,08
0,223
0,109
0,139
0,027
0,322
0,049
0,375

experiments\UNIFESP+iCOPE\VGGNB_LS_05\results_MCDP_0.5_30.pkl

0,08
0,138
0,465
0,197
0,273
0,77
0,002
0,846
0,004
0,014

experiments\UNIFESP+iCOPE\VGGNB_LINEAR\results_MCDP_0.5_30.pkl

0,012
0,015
0,011
0,866
0,028
0,01
0,084
0,695
0,002
0,002

experiments\UNIFESP+iCOPE\VGGNB_SIGMOID\results_MCDP_0.5_30.pkl

0,017
0,11
0,011
0,686
0,011
0,01
0,037
0,625
0,002
0,006

experiments\UNIFESP+iCOPE\VGGNB_STEP\results_MCDP_0.5_30.pkl

0,012
0,084
0,002
0,779
0,008
0,014
0,16
0,275
0,01
0,01

experiments\UNIFESP+iCOPE\VGGNB\results_MCDP_HIST.pkl

0,249
0,31
0,463
1,0
0,5
0,193
1,0
0,922
0,006
0,037

experiments\UNIFESP+iCOPE\VGGNB\results_MCDP_ISOTONIC.pkl

0,109
0,109
0,109
0,317
0,18
0,051
0,695
0,232
0,002
0,006

experiments\UNIFESP+iCOPE\VGGNB\resul

