# Calibra
### Assess the reliability of your machine learning classifiers!

In [4]:
import pandas as pd 
import numpy as np 

#### Create a synthetic dataset and fit a classifier to it

In [5]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

X, y = make_classification(n_samples=1000, n_features=20, n_informative=2, n_redundant=2, n_classes=2, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Initialize the model
model = RandomForestClassifier(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f"Model accuracy: {accuracy:.2f}")


Model accuracy: 0.88


#### Examine the distribution of your model's predictions!

In [6]:
from calibra.utils import bin_probabilities
# from calibra.plotting import get_classwise_bin_weights
from calibra.errors import classwise_ece

bins = bin_probabilities(y_pred=predictions, y_true=y_test, num_bins=20)
# classwise_bin_weights = get_classwise_bin_weights(bins, num_bins=20, num_classes=2, num_samples=1000)


# look at the distribution of predictions for the negative class
overall_classwise_ece, classwise_errors = classwise_ece(y_pred=predictions, y_true=y_test, num_bins=20, return_classwise_errors=True)

In [8]:
print(f'classwise ece: {overall_classwise_ece}')
print(f'classwise errors: {classwise_errors}')

classwise ece: 0.12
classwise errors: [0.12 0.12]
