In [1]:
pip install numpy

Collecting numpy
  Downloading numpy-2.0.2-cp39-cp39-win_amd64.whl (15.9 MB)
Installing collected packages: numpy
Successfully installed numpy-2.0.2
Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\Ashish\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.


In [2]:
pip install scikit-learn


Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp39-cp39-win_amd64.whl (11.2 MB)
Collecting scipy>=1.6.0
  Downloading scipy-1.13.1-cp39-cp39-win_amd64.whl (46.2 MB)
Collecting threadpoolctl>=3.1.0
  Using cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Collecting joblib>=1.2.0
  Using cached joblib-1.4.2-py3-none-any.whl (301 kB)
Installing collected packages: threadpoolctl, scipy, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.6.1 scipy-1.13.1 threadpoolctl-3.5.0
Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\Ashish\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.


In [1]:
import numpy as np
from sklearn.metrics import accuracy_score

def compute_ece(data, n_bins=10):
    """
    Compute the Expected Calibration Error (ECE) from the provided data dictionary.
    
    Args:
    - data: dictionary containing 'pred' and 'truth' for each subject
    - n_bins: number of bins to use for calibration (default is 10)

    Returns:
    - ece: Expected Calibration Error
    """
    # Flatten the predictions and truth values from all subjects
    all_preds = []
    all_truths = []
    all_probs = []

    for subject, values in data.items():
        preds = values['pred']
        truths = values['truth']
        
        # Assuming the prediction is the class index, convert to probabilities
        # In this case, we're using simple dummy probabilities for the sake of example
        # Replace this with your actual model's predicted probabilities
        prob = np.zeros((len(preds), 3))  # Assuming 3 classes, adjust as per your case
        for i, p in enumerate(preds):
            prob[i, p] = 1  # Assuming predicted class with full probability

        all_preds.extend(preds)
        all_truths.extend(truths)
        all_probs.extend(prob)

    # Convert to numpy arrays
    all_preds = np.array(all_preds)
    all_truths = np.array(all_truths)
    all_probs = np.array(all_probs)

    # Binning predicted probabilities
    bin_boundaries = np.linspace(0, 1, n_bins + 1)
    ece = 0.0

    for i in range(n_bins):
        bin_mask = (all_probs.max(axis=1) > bin_boundaries[i]) & (all_probs.max(axis=1) <= bin_boundaries[i + 1])
        
        # Skip empty bins
        if bin_mask.sum() == 0:
            continue
        
        bin_preds = all_preds[bin_mask]
        bin_truths = all_truths[bin_mask]
        bin_probs = all_probs[bin_mask]
        
        # Calculate accuracy and confidence for the current bin
        bin_accuracy = accuracy_score(bin_truths, bin_preds)
        bin_confidence = bin_probs.max(axis=1).mean()

        # Calculate the absolute difference between accuracy and confidence
        bin_error = abs(bin_accuracy - bin_confidence)

        # Compute the weighted contribution of this bin to the overall ECE
        bin_weight = bin_mask.sum() / len(all_preds)
        ece += bin_weight * bin_error

    return ece

# Example usage with your provided data dictionary
data = {
   '006': {'pred': [0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0], 'truth': [0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2]}, '007': {'pred': [1, 1, 1, 1, 1, 1, 1, 1], 'truth': [0, 1, 1, 1, 1, 1, 2, 2]}, '009': {'pred': [0, 0, 0, 0], 'truth': [0, 0, 0, 2]}, '010': {'pred': [0, 0, 0, 0], 'truth': [0, 0, 0, 0]}, '011': {'pred': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1], 'truth': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]}, '012': {'pred': [0, 0, 0], 'truth': [0, 0, 2]}, '013': {'pred': [0, 0, 0, 0, 0, 0], 'truth': [0, 0, 0, 0, 0, 0]}, '014': {'pred': [0, 0, 1, 1, 1, 1, 0, 1, 0, 0], 'truth': [0, 0, 1, 1, 1, 1, 1, 1, 1, 2]}, '015': {'pred': [0, 0, 0], 'truth': [0, 0, 2]}, '016': {'pred': [0, 0, 1, 0, 0], 'truth': [0, 0, 1, 2, 2]}, '017': {'pred': [0, 0, 0, 0], 'truth': [0, 0, 0, 2]}, '018': {'pred': [0, 0, 2], 'truth': [0, 0, 2]}, '019': {'pred': [1], 'truth': [1]}, '020': {'pred': [0, 0, 1, 0], 'truth': [0, 0, 1, 1]}, '021': {'pred': [0, 0], 'truth': [0, 0]}, '022': {'pred': [0, 1, 0, 1, 1], 'truth': [0, 0, 0, 1, 1]}, '023': {'pred': [0], 'truth': [0]}, '024': {'pred': [0], 'truth': [0]}, '026': {'pred': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'truth': [0, 0, 0, 0, 0, 0, 0, 0, 0]}, '028': {'pred': [0, 2, 2], 'truth': [0, 2, 2]}, '030': {'pred': [0, 0, 0], 'truth': [0, 0, 0]}, '031': {'pred': [0], 'truth': [0]}, '032': {'pred': [0, 0, 0, 0], 'truth': [0, 0, 0, 0]}, '033': {'pred': [0, 0, 0, 0, 0], 'truth': [0, 0, 0, 0, 1]}, '034': {'pred': [0, 0, 0], 'truth': [0, 0, 0]}, '035': {'pred': [0, 0, 0, 0, 0, 0, 0, 0], 'truth': [0, 0, 0, 0, 0, 0, 0, 2]}, '036': {'pred': [0], 'truth': [0]}, '037': {'pred': [0], 'truth': [0]}, 'sub01': {'pred': [0, 0, 0], 'truth': [0, 0, 1]}, 'sub02': {'pred': [0, 0, 0, 0, 0, 0, 2, 2, 0], 'truth': [0, 0, 0, 0, 0, 1, 2, 2, 2]}, 'sub03': {'pred': [0, 0, 0, 0, 2], 'truth': [0, 0, 0, 0, 2]}, 'sub04': {'pred': [0, 0], 'truth': [0, 0]}, 'sub05': {'pred': [2, 2, 2, 2, 2, 2], 'truth': [1, 2, 2, 2, 2, 2]}, 'sub06': {'pred': [0, 0, 0, 2], 'truth': [0, 1, 2, 2]}, 'sub07': {'pred': [0, 0, 0, 0, 0], 'truth': [0, 0, 0, 0, 0]},'sub08': {'pred': [0], 'truth': [0]}, 'sub09': {'pred': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], 'truth': [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]}, 'sub11': {'pred': [0, 0, 0, 0], 'truth': [0, 0, 0, 0]}, 'sub12': {'pred': [0, 0, 0, 2, 0, 2, 0, 2, 2, 2, 0], 'truth': [0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2]}, 'sub13': {'pred': [1, 1], 'truth': [1, 1]}, 'sub14': {'pred': [1, 1, 1], 'truth': [1, 1, 1]}, 'sub15': {'pred': [0, 1, 2], 'truth': [0, 1, 2]}, 'sub16': {'pred': [0, 0, 0], 'truth': [0, 1, 1]}, 'sub17': {'pred': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'truth': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2]}, 'sub19': {'pred': [0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2], 'truth': [0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2]}, 'sub20': {'pred': [0, 0], 'truth': [0, 0]}, 'sub21': {'pred': [0], 'truth': [0]}, 'sub22': {'pred': [0, 0], 'truth': [0, 0]}, 'sub23': {'pred': [0, 0, 0, 0, 0, 0, 0, 1], 'truth': [0, 0, 0, 0, 0, 0, 0, 1]}, 'sub24': {'pred': [0, 0, 0], 'truth': [0, 0, 2]}, 'sub25': {'pred': [0, 0, 0, 2, 0], 'truth': [0, 0, 0, 2, 2]}, 'sub26': {'pred': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'truth': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]}

}

ece = compute_ece(data, n_bins=10)
print(f"Expected Calibration Error (ECE): {ece:.4f}")


Expected Calibration Error (ECE): 0.1942
