In [96]:
from ucimlrepo import fetch_ucirepo 
from venn_abers import VennAbersCalibrator
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.isotonic import IsotonicRegression
import matplotlib.pyplot as plt

In [110]:
# fetch dataset 
breast_cancer_wisconsin_diagnostic = fetch_ucirepo(id=17) 
  
# data (as pandas dataframes) 
X = breast_cancer_wisconsin_diagnostic.data.features.values
Y = breast_cancer_wisconsin_diagnostic.data.targets.values.flatten()
Y = (Y == 'M').astype('int')

In [111]:
def U(decision, true_class, **params):
    """
    Calculate utility based on the decision, true class, and given parameters.
    
    Args:
        decision (str): One of "DoNothing", "Biopsy", "ScanAgain".
        true_class (str): "cancer" or "not_cancer".
        params (dict): Parameters defining utilities and costs.
    
    Returns:
        float: Utility value.
    """
    U_TP = params.get("U_TP", 100)  # Utility for true positive
    U_TN = params.get("U_TN", 50)   # Utility for true negative
    C_biopsy = params.get("C_biopsy", 20)
    C_scan = params.get("C_scan", 10)
    C_FN = params.get("C_FN", 200)
    C_FP = params.get("C_FP", 15)
    if decision == "DoNothing":
        if true_class == "M":
            return -C_FN  # False negative cost
        else:
            return U_TN  # True negative utility
    elif decision == "Biopsy":
        if true_class == "M":
            return U_TP - C_biopsy  # True positive utility minus biopsy cost
        else:
            return U_TN - C_biopsy - C_FP  # True negative utility minus biopsy and false positive costs
    elif decision == "ScanAgain":
        return -C_scan  # Cost of scanning again (future probabilities not considered here)
    else:
        raise ValueError("Invalid decision. Choose from 'DoNothing', 'Biopsy', or 'ScanAgain'.")


# IVAP

In [196]:
train_size = 300

r=2024

X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=train_size, random_state=r)

ivap_rf = RandomForestClassifier(random_state=r)
ivap = VennAbersCalibrator(estimator=ivap_rf, inductive=True, cal_size=0.3, random_state=r)
ivap.fit(X_train, y_train)
P_ivap, P01_ivap = ivap.predict_proba(X_test, p0_p1_output=True)

Utility_ivap = 0
for probs, y_true in zip(P_ivap, y_test):
    utility = {}
    for d in ['DoNothing', 'Biopsy', 'ScanAgain']:
        u = 0
        for p, y in zip(probs, ivap.classes):
            u += p*U(decision=d, true_class=y)
        utility[d] = u
    # Choose decision with highest expected utility
    d = max(utility, key=utility.get)
    # print(d)
    Utility_ivap += U(decision=d, true_class=y_true)

Utility_ivap/len(y_test)

50.0

# Predict_proba

In [197]:
rf = RandomForestClassifier(random_state=r)
rf.fit(X_train, y_train)
P_proba = rf.predict_proba(X_test)

Utility_proba = 0
for probs, y_true in zip(P_proba, y_test):
    utility = {}
    for d in ['DoNothing', 'Biopsy', 'ScanAgain']:
        u = 0
        for p, y in zip(probs, ivap.classes):
            u += p*U(decision=d, true_class=y)
        utility[d] = u
    # Choose decision with highest expected utility
    d = max(utility, key=utility.get)
    # print(d)
    Utility_proba += U(decision=d, true_class=y_true)

Utility_proba/len(y_test)

50.0