In [1]:
from sklearn import datasets, svm, metrics
from sklearn.utils import resample
import numpy as np
import confidence_planner as cp

digits = datasets.load_digits()

# Flatten the images
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))

# Create a classifier: a support vector classifier
clf = svm.SVC(gamma=0.001)

n_iter = 100
accuracies = []

# Run model training and testing n_iter times
for i in range(n_iter):
    # Bootstrap indices
    indices = resample(range(n_samples), n_samples=n_samples)

    X_train = []
    y_train = []
    X_test = []
    y_test = []

    # Append images and labels with bootstrapped indices to lists for training
    for idx in indices:
        X_train.append(data[idx])
        y_train.append(digits.target[idx])

    # Append images and labels that are not in a training subset to lists for testing
    for i in range(n_samples):
        if i not in indices:
            X_test.append(data[i])
            y_test.append(digits.target[i])

    # Learn the digits on the train subset
    clf.fit(X_train, y_train)
    predicted = clf.predict(X_test)
    accuracy = metrics.accuracy_score(y_test, predicted)
    accuracies.append(accuracy)

acc_mean = np.mean(accuracies)
acc_median = np.mean(accuracies)
ci = cp.estimate_confidence_interval(None, accuracies, confidence_level=0.90, method="bootstrap")

print(f"Mean accuracy: {acc_mean}")
print(f"Median accuracy: {acc_median}")
print(f"90% CI: {ci}")

Mean accuracy: 0.9888840389988671
Median accuracy: 0.9888840389988671
90% CI: [0.9831364509756163, 0.9939218937091278]
