In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold, KFold, LeaveOneOut, ShuffleSplit, StratifiedShuffleSplit, RepeatedKFold

import warnings

# Ignore all warnings
warnings.filterwarnings("ignore")

In [2]:
# Load breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

In [13]:
X.shape

(569, 30)

In [3]:
# Create a KNN model
knn_model = KNeighborsClassifier(n_neighbors=3)

#### K-Fold Cross-Validation

In [16]:
kf_scores = cross_val_score(knn_model, 
                            X, 
                            y, 
                            cv      = KFold(n_splits=5, shuffle=True, random_state=42), 
                            scoring = 'accuracy')

In [17]:
kf_scores

array([0.92982456, 0.94736842, 0.88596491, 0.92982456, 0.90265487])

In [7]:
print('------------------------ K-Fold Cross-Validation ------------------------')
print(f'Mean Accuracy: {np.mean(kf_scores):.4f}')
print(f'Standard Deviation: {np.std(kf_scores):.4f}\n')

------------------------ K-Fold Cross-Validation ------------------------
Mean Accuracy: 0.9191
Standard Deviation: 0.0219



#### Stratified K-Fold Cross-Validation

In [8]:
skf_scores = cross_val_score(knn_model, 
                             X, 
                             y, 
                             cv     = StratifiedKFold(n_splits=5, shuffle=True, random_state=42), 
                             scoring= 'accuracy')

In [18]:
skf_scores

array([0.92982456, 0.88596491, 0.93859649, 0.92982456, 0.95575221])

In [9]:
print('------------------------ Stratified K-Fold Cross-Validation ------------------------')
print(f'Mean Accuracy: {np.mean(skf_scores):.4f}')
print(f'Standard Deviation: {np.std(skf_scores):.4f}\n')

------------------------ Stratified K-Fold Cross-Validation ------------------------
Mean Accuracy: 0.9280
Standard Deviation: 0.0230



#### Leave-One-Out Cross-Validation

In [19]:
loo_scores = cross_val_score(knn_model, 
                             X, 
                             y, 
                             cv=LeaveOneOut(), 
                             scoring='accuracy')

print('------------------------ Leave-One-Out Cross-Validation ------------------------')
print(f'Mean Accuracy: {np.mean(loo_scores):.4f}\n')

------------------------ Leave-One-Out Cross-Validation ------------------------
Mean Accuracy: 0.9262



#### Shuffle Split Cross-Validation

In [27]:
ss_scores = cross_val_score(knn_model, 
                            X, 
                            y, 
                            cv=ShuffleSplit(n_splits=5, test_size=0.2, random_state=42), 
                            scoring='accuracy')

print(f'Mean Accuracy: {np.mean(ss_scores):.4f}')
print(f'Standard Deviation: {np.std(ss_scores):.4f}\n')

Mean Accuracy: 0.9298
Standard Deviation: 0.0175



#### Stratified Shuffle Split Cross-Validation

In [28]:
sss_scores = cross_val_score(knn_model, 
                             X, 
                             y, 
                             cv=StratifiedShuffleSplit(n_splits=5, test_size=0.2, 
                                                       random_state=42), 
                             scoring='accuracy')

print(f'Mean Accuracy: {np.mean(sss_scores):.4f}')
print(f'Standard Deviation: {np.std(sss_scores):.4f}\n')

Mean Accuracy: 0.9246
Standard Deviation: 0.0172



#### Repeated K-Fold Cross-Validation

In [21]:
rkf_scores = cross_val_score(knn_model, 
                             X, 
                             y, 
                             cv=RepeatedKFold(n_splits=5, n_repeats=2, random_state=42), 
                             scoring='accuracy')

print(f'Mean Accuracy: {np.mean(rkf_scores):.4f}')
print(f'Standard Deviation: {np.std(rkf_scores):.4f}\n')

Mean Accuracy: 0.9244
Standard Deviation: 0.0194



In [22]:
rkf_scores

array([0.92982456, 0.94736842, 0.88596491, 0.92982456, 0.90265487,
       0.92105263, 0.92982456, 0.92982456, 0.95614035, 0.91150442])