In [7]:
import warnings

import numpy as np
import pandas as pd

from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression


RANDOM_SEED = 42
warnings.filterwarnings("ignore")

In [6]:
class UncertaintyRegionExperiment:
    def __init__(self, data, target, n_splits=5, coverage_quantiles=[0.25, 0.5, 0.75], random_state=42):
        self.data = data
        self.target = target
        self.n_splits = n_splits
        self.coverage_quantiles = coverage_quantiles
        self.random_state = random_state

    def get_confidence(self, pred, y_true, k):
        ap = y_true.mean()

        lb = pred[:, 1] - k * np.sqrt(pred[:, 1] * pred[:, 0])
        ub = pred[:, 1] + k * np.sqrt(pred[:, 1] * pred[:, 0])

        confidence = np.logical_or(
            lb > ap, ap > ub
        )

        return np.array([
            "reliable" if c else "unreliable"
            for c in confidence
        ])

    def get_closest_k(self, y_pred, y_test, target_size):
        param_interval = np.arange(0, 3, .01)
        sizes = [
            (self.get_confidence(y_pred, y_test, k) == "reliable").mean()
            for k in param_interval
        ]

        index = np.argmin(np.abs(np.array(sizes) - target_size))
        return param_interval[index]

    def get_top_n_accuracy(self, test, pred_proba, y_test, n):
        k = self.get_closest_k(pred_proba, y_test, n)
        test = test.assign(
            y_true=y_test,
            pred=pred_proba[:, 1],
            pred_class=(pred_proba[:, 1] >= 0.5).astype("int"),
            confidence=self.get_confidence(pred_proba, y_test, k)
        )

        reliable = test.loc[test.confidence == "reliable"]

        accuracy = accuracy_score(
            y_true=reliable.y_true,
            y_pred=reliable.pred_class
        )
        f1 = f1_score(
            y_true=reliable.y_true,
            y_pred=reliable.pred_class
        )
        return accuracy, f1

    def run(self):
        X = self.data.drop(self.target, axis=1)
        y = self.data[self.target]

        kf = KFold(n_splits=self.n_splits, shuffle=True, random_state=self.random_state)

        model = LogisticRegression(max_iter=1000, random_state=self.random_state)

        accuracy_dict = {
            'overall_accuracies': [],
            'overall_f1_scores': []
        }
        for q in self.coverage_quantiles:
            accuracy_dict[f"top_{int(q * 100)}_accuracies"] = []
            accuracy_dict[f"top_{int(q * 100)}_f1_scores"] = []

        for train_index, test_index in kf.split(X):
            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y[train_index], y[test_index]

            model.fit(X_train, y_train)
            pred_class = model.predict(X_test)

            accuracy = accuracy_score(y_test, pred_class)
            accuracy_dict["overall_accuracies"].append(accuracy)

            f1 = f1_score(y_test, pred_class)
            accuracy_dict["overall_f1_scores"].append(f1)

            pred_proba = model.predict_proba(X_test)

            for q in self.coverage_quantiles:
                accuracy, f1 = self.get_top_n_accuracy(
                    X_test, pred_proba, y_test, q
                )
                accuracy_dict[f"top_{int(q * 100)}_accuracies"].append(accuracy)
                accuracy_dict[f"top_{int(q * 100)}_f1_scores"].append(f1)

        return pd.DataFrame(accuracy_dict).describe().T[["mean", "std"]]

## Heart Attack

In [4]:
%%time

heart = pd.read_csv("../data/processed/heart.csv")

experiment = UncertaintyRegionExperiment(
    data=heart, 
    target="HeartDisease",
    random_state=RANDOM_SEED
)
metrics = experiment.run()
metrics

CPU times: user 1.08 s, sys: 6.11 ms, total: 1.09 s
Wall time: 1.09 s


Unnamed: 0,mean,std
overall_accuracies,0.843086,0.042702
overall_f1_scores,0.856424,0.043543
top_25_accuracies,0.930435,0.023814
top_25_f1_scores,0.936837,0.021931
top_50_accuracies,0.913043,0.026625
top_50_f1_scores,0.917957,0.030359
top_75_accuracies,0.898558,0.03725
top_75_f1_scores,0.903392,0.039342


## Australian Credit

In [8]:
%%time

australian = pd.read_csv("../data/processed/australian.csv")

experiment = UncertaintyRegionExperiment(
    data=australian, 
    target="14",
    random_state=RANDOM_SEED
)
metrics = experiment.run()
metrics

CPU times: user 1.31 s, sys: 6.11 ms, total: 1.32 s
Wall time: 1.32 s


Unnamed: 0,mean,std
overall_accuracies,0.856522,0.02075
overall_f1_scores,0.838537,0.036337
top_25_accuracies,0.964706,0.038348
top_25_f1_scores,0.969487,0.03324
top_50_accuracies,0.941858,0.014498
top_50_f1_scores,0.934287,0.017822
top_75_accuracies,0.923077,0.020397
top_75_f1_scores,0.911614,0.024216


## Diabetes

In [10]:
%%time

diabetes = pd.read_csv("../data/processed/diabetes.csv")

experiment = UncertaintyRegionExperiment(
    data=diabetes, 
    target="Outcome",
    random_state=RANDOM_SEED
)
metrics = experiment.run()
metrics

CPU times: user 734 ms, sys: 3.5 ms, total: 738 ms
Wall time: 737 ms


Unnamed: 0,mean,std
overall_accuracies,0.768262,0.028665
overall_f1_scores,0.635113,0.014662
top_25_accuracies,0.886775,0.066598
top_25_f1_scores,0.863986,0.071316
top_50_accuracies,0.867157,0.031174
top_50_f1_scores,0.822826,0.038623
top_75_accuracies,0.816501,0.032576
top_75_f1_scores,0.740945,0.025254


## Qsar

In [11]:
%%time

qsar = pd.read_csv("../data/processed/qsar.csv")

experiment = UncertaintyRegionExperiment(
    data=qsar, 
    target="Class",
    random_state=RANDOM_SEED
)
metrics = experiment.run()
metrics

CPU times: user 5.3 s, sys: 867 ms, total: 6.17 s
Wall time: 1.21 s


Unnamed: 0,mean,std
overall_accuracies,0.863507,0.013151
overall_f1_scores,0.793813,0.026149
top_25_accuracies,0.981032,0.017457
top_25_f1_scores,0.941013,0.059232
top_50_accuracies,0.954717,0.031572
top_50_f1_scores,0.924913,0.052802
top_75_accuracies,0.916446,0.017608
top_75_f1_scores,0.882661,0.031929


## Titanic

In [12]:
%%time

titanic = pd.read_csv("../data/processed/titanic.csv")

experiment = UncertaintyRegionExperiment(
    data=titanic, 
    target="Survived",
    random_state=RANDOM_SEED
)
metrics = experiment.run()
metrics

CPU times: user 1.09 s, sys: 3.78 ms, total: 1.1 s
Wall time: 1.09 s


Unnamed: 0,mean,std
overall_accuracies,0.797966,0.026468
overall_f1_scores,0.726177,0.046938
top_25_accuracies,0.927548,0.05009
top_25_f1_scores,0.93276,0.048328
top_50_accuracies,0.893254,0.020675
top_50_f1_scores,0.864469,0.039982
top_75_accuracies,0.857962,0.024472
top_75_f1_scores,0.820933,0.041452
