In [1]:
from statsmodels.stats.proportion import proportion_confint
from math import sqrt
from scipy.stats import norm

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix

In [2]:
X, y = load_breast_cancer(return_X_y=True)

In [3]:
len(X) == len(y)

True

In [4]:
train, test, train_labels, test_labels = train_test_split(X,
                                                          y,
                                                          test_size=0.33,
                                                          random_state=0)

In [5]:
dt = DecisionTreeClassifier(random_state=0)
dt.fit(train, train_labels)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')

In [6]:
preds = dt.predict(test)

In [7]:
tn, fp, fn, tp = confusion_matrix(y_true=test_labels, y_pred=preds).ravel()

In [8]:
print(f"{tn}\t{fp}\n{fn}\t{tp}")

63	4
10	111


---

${\displaystyle {\text{Precision}}={\frac {tp}{tp+fp}}}$

${\displaystyle {\text{Recall}}={\frac {tp}{tp+fn}}}$

${\displaystyle {\text{FPR}}={\frac {fp}{fp + tn}}}$

---

In [9]:
# Binomial proportion confidence interval via normal approximation
# More info: https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval

def get_asymptotic_normal_approximation(metric, n, alpha=0.05):
    z = norm.ppf(1 - (alpha / 2))
    
    interval = z * sqrt((metric * (1 - metric)) / n)
    
    upper = metric + interval
    lower = metric - interval
    
    return lower, upper

In [10]:
precision = tp / (tp + fp)
print(f"Precision: {round(precision, 3)}")

recall = tp / (tp + fn)
print(f"Recall: {round(recall, 3)}")

fpr = fp / (fp + tn)
print(f"FPR: {round(fpr, 3)}")

Precision: 0.965
Recall: 0.917
FPR: 0.06


In [11]:
# Precision

lower, upper = get_asymptotic_normal_approximation(precision, (tp+fp))
print(f"Manual: lower = {round(lower,3)}, upper = {round(upper, 3)}")

lower, upper = proportion_confint(count=tp, nobs=(tp+fp), alpha=0.05, method="normal")
print(f"statsmodels: lower = {round(lower,3)}, upper = {round(upper, 3)}")

Manual: lower = 0.932, upper = 0.999
statsmodels: lower = 0.932, upper = 0.999


In [12]:
# Recall

lower, upper = get_asymptotic_normal_approximation(recall, (tp+fn))
print(f"Manual: lower = {round(lower,3)}, upper = {round(upper, 3)}")

lower, upper = proportion_confint(count=tp, nobs=(tp+fn), alpha=0.05, method="normal")
print(f"statsmodels: lower = {round(lower,3)}, upper = {round(upper, 3)}")

Manual: lower = 0.868, upper = 0.966
statsmodels: lower = 0.868, upper = 0.966


In [13]:
# FPR

lower, upper = get_asymptotic_normal_approximation(fpr, (fp+tn))
print(f"Manual: lower = {round(lower,3)}, upper = {round(upper, 3)}")

lower, upper = proportion_confint(count=fp, nobs=(fp+tn), alpha=0.05, method="normal")
print(f"statsmodels: lower = {round(lower,3)}, upper = {round(upper, 3)}")

Manual: lower = 0.003, upper = 0.116
statsmodels: lower = 0.003, upper = 0.116
