In [1]:
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split
import confidence_planner as cp

# example dataset
X, y = datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=23
)

# training the classifier and calculating accuracy
clf = svm.SVC(gamma=0.001)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
acc = metrics.accuracy_score(y_test, y_pred)

# confidence interval and sample size estimation
ci = cp.estimate_confidence_interval(y_test.shape[0], acc, confidence_level=0.90)
sample = cp.estimate_sample_size(accuracy_radius=0.05, confidence_level=0.90)

print(f"Holdout accuracy: {acc}")
print(f"90% CI: {ci}")
print(f"Test samples needed for a 0.05 radius 90% CI: {sample}")

Holdout accuracy: 0.9532163742690059
90% CI: [0.890815536269955, 1.0]
Test samples needed for a 0.05 radius 90% CI: 271
