# Metrics Report

This notebook will create a classification and calibration metrics reports with values and graphs.

In [1]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import pandas as pd
import gc

from validate import validation_metrics, calibration_metrics
from utils.plots import probability_histogram
from calibration.metrics import ECE, calibration_curve

plt.style.use('utils/plotstyle.mplstyle')


In [2]:
def calib_pad_average_arrays(arrays):
    """
    Given a list of 1D arrays of potentially different lengths, pad them with NaNs
    and compute the average and standard deviation at each index, ignoring NaNs.
    """
    max_length = max(len(arr) for arr in arrays)
    # Pad each array with NaNs to the maximum length.
    padded = [np.pad(arr, (0, max_length - len(arr)), constant_values=np.nan) for arr in arrays]
    padded = np.vstack(padded)
    avg = np.nanmean(padded, axis=0)
    std = np.nanstd(padded, axis=0)
    return avg, std

In [3]:
def show_results(results, path_experiments):
    # Compute validation metrics for each fold.

    unique_folds = results['fold'].unique()

    val_metrics = [validation_metrics(results[results['fold'] == fold]['preds'], results[results['fold'] == fold]['probs'], results[results['fold'] == fold]['labels'])
                   for fold in unique_folds]

    
    accuracy    = np.array([m['Accuracy']    for m in val_metrics])
    f1          = np.array([m['F1 Score']    for m in val_metrics])
    precision   = np.array([m['Precision']   for m in val_metrics])
    sensitivity = np.array([m['Sensitivity'] for m in val_metrics])
    specificity = np.array([m['Specificity'] for m in val_metrics])
    auc         = np.array([m['AUC']         for m in val_metrics])
    
    # Compute calibration metrics for each fold.
    n_bins = 10
    mode_metrics = 'quantile'
    mode_plot    = 'uniform'
    
    calib_metrics = [calibration_metrics(results[results['fold'] == fold]['probs'], results[results['fold'] == fold]['labels'],
                                         n_bins=n_bins, mode=mode_metrics)
                     for fold in unique_folds]
    
    ece    = np.array([m['ECE']    for m in calib_metrics])
    mce    = np.array([m['MCE']    for m in calib_metrics])
    nll    = np.array([m['NLL']    for m in calib_metrics])
    brier  = np.array([m['Brier']  for m in calib_metrics])
    
    # Build a string summary using join (replacing dots with commas)
    metrics_arrays = [accuracy, f1, precision, sensitivity, specificity, auc, ece, mce, nll, brier]
    lines = [
        ";".join(str(np.round(val, 4)).replace('.', ',') for val in arr)
        for arr in metrics_arrays
    ]
    summary_str = "\n".join(lines)
    print(summary_str)

    """
    prob_true, prob_pred, _ = calibration_curve(results['probs'], results['labels'], n_bins=n_bins, mode=mode_plot)
    # Create figure with a single gridspec call.
    fig = plt.figure()
    gs = fig.add_gridspec(2, 1, height_ratios=(3, 1),
                          left=0.1, right=0.9, bottom=0.1, top=0.9, hspace=0.05)
    ax_curve = fig.add_subplot(gs[0])
    ax_hist  = fig.add_subplot(gs[1])
    
    # Plot calibration curve.
    ax_curve.plot(np.arange(0, 1.1, 0.1), np.arange(0, 1.1, 0.1),
                  linestyle=":", color='#9e9e9e')
    ax_curve.plot(prob_pred, prob_true, marker='o',
                  label=f'ECE = {ece.mean():.4f} ± {ece.std():.2f}')

    # Plot histogram of predicted probabilities.
    ax_hist.hist(results['probs'], bins=np.linspace(0.0, 1.0, n_bins + 1),
                 edgecolor='black')
    
    ax_curve.set_ylabel('Fração de Positivos')
    ax_curve.legend(loc='upper left')
    ax_curve.set_ylim([0, 1.01])
    ax_curve.set_xlim([0, 1.01])
    ax_curve.set_xticklabels([])
    
    ax_hist.set_xlabel('Probabilidade Prevista Média')
    ax_hist.set_ylabel('Quantidade')
    ax_hist.set_xlim([0, 1])
    
    plt.savefig(os.path.join(path_experiments, "calib_curve.pdf"), bbox_inches='tight')
    plt.close(fig)

    """
    # Compute calibration curves for each fold.
    list_prob_true, list_prob_pred, list_probs, ece_list = [], [], [], []
    for fold in unique_folds:
        prob_true, prob_pred, _ = calibration_curve(results[results['fold'] == fold]['probs'], results[results['fold'] == fold]['labels'],
                                                    n_bins=n_bins, mode=mode_plot)
        list_prob_true.append(prob_true)
        list_prob_pred.append(prob_pred)
        list_probs.extend(results[results['fold'] == fold]['probs'])
        ece_list.append(ECE(results[results['fold'] == fold]['probs'], results[results['fold'] == fold]['labels'],
                            n_bins=n_bins, mode=mode_metrics))
    
    average_prob_true, std_prob = calib_pad_average_arrays(list_prob_true)
    average_prob_pred, std_pred = calib_pad_average_arrays(list_prob_pred)
    list_probs = np.array(list_probs)
    average_ece = np.mean(ece_list)
    std_ece = np.std(ece_list)
    
    # Create figure with a single gridspec call.
    fig = plt.figure(constrained_layout=True)
    gs = fig.add_gridspec(2, 1, height_ratios=(3, 1),
                          left=0.1, right=0.9, bottom=0.1, top=0.9, hspace=0.05)
    ax_curve = fig.add_subplot(gs[0])
    ax_hist  = fig.add_subplot(gs[1], sharex=ax_curve)
    
    # Plot calibration curve.
    ax_curve.plot(np.arange(0, 1.1, 0.1), np.arange(0, 1.1, 0.1),
                  linestyle=":", color='#9e9e9e')
    ax_curve.plot(average_prob_pred, average_prob_true, marker='o',
                  label=f'ECE = {average_ece:.4f} ± {std_ece:.2f}')
    ax_curve.fill_between(average_prob_pred, np.clip(average_prob_true-std_prob,0,1),np.clip(average_prob_true+std_prob,0,1), alpha=0.3, edgecolor='None')

    # Plot histogram of predicted probabilities.
    ax_hist.hist(list_probs, bins=np.linspace(0.0, 1.0, n_bins + 1),
                 edgecolor='black')
    
    ax_curve.set_ylabel('Fração de Positivos')
    ax_curve.legend(loc='upper left')
    ax_curve.set_ylim([0, 1])
    ax_curve.set_xlim([0, 1])
    ax_curve.tick_params(labelbottom=False)
    
    
    ax_hist.set_xlabel('Probabilidade Prevista Média')
    ax_hist.set_ylabel('Quantidade')
    ax_hist.set_xlim([0, 1])
    
    plt.savefig(os.path.join(path_experiments, "calib_curve.pdf"))
    plt.close(fig)
    

In [4]:
#calibrators = ["", "_LS_01", "_LS_03", "_LS_05", "_SIGMOID", "_LINEAR", "_STEP", "_PLATT" , "_ISOTONIC", "_TEMP","_HIST"]
calibrators = ["", "_HIST", "_ISOTONIC", "_LINEAR", "_LS_01", "_LS_03", "_LS_05", "_PLATT", "_SIGMOID", "_STEP", "_TEMP"]


folder = ""
model = 'NCNN_FINAL'  # change model here
model_results_file = 'results.pkl'

for calibrator in calibrators:
    print(f"calibrator: {calibrator}")
    path_experiments = os.path.join('experiments', folder, f"{model}{calibrator}")
    
    try:
        with open(os.path.join(path_experiments, model_results_file), 'rb') as f:
            daata = pickle.load(f)
            daata.pop('embeddings', None)  # Remove embeddings if present
            results = pd.DataFrame(daata)
    
        show_results(results, path_experiments)
    except FileNotFoundError:
        pass
    
    
    

calibrator: 
0,8491
0,8182
0,9
0,75
0,931
0,8477
0,0712
0,1915
0,4301
0,1347
calibrator: _HIST
calibrator: _ISOTONIC
calibrator: _LINEAR
calibrator: _LS_01
calibrator: _LS_03
calibrator: _LS_05
calibrator: _PLATT
calibrator: _SIGMOID
calibrator: _STEP
calibrator: _TEMP


## Classification Metrics Graphs

In [None]:
probs = []
labels = []
for fold in results.keys():
    probs.extend(results[fold]['probs'])
    labels.extend(results[fold]['labels'])

probability_histogram(np.array(probs), np.array(labels), path=path_experiments)