In [4]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import KFold, StratifiedKFold, ShuffleSplit, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Load the diabetes dataset
data = load_diabetes()
X, y = data.data, data.target

# Convert the regression target to a binary classification problem
y = (y > np.median(y)).astype(int)

# Standardize the features and create a logistic regression pipeline
model = make_pipeline(StandardScaler(), LogisticRegression())

# Define the number of splits or folds
k = 5
n_splits_mc = 150

# K-fold Cross-validation
kf = KFold(n_splits=k, shuffle=True, random_state=1)
kf_scores = cross_val_score(model, X, y, cv=kf)

# Stratified K-fold Cross-validation
skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=1)
skf_scores = cross_val_score(model, X, y, cv=skf)

# Monte Carlo Cross-validation
mc = ShuffleSplit(n_splits=n_splits_mc, test_size=0.2, random_state=1)
mc_scores = cross_val_score(model, X, y, cv=mc)

# Print the results
print(f"K-fold CV mean accuracy: {kf_scores.mean():.4f} ± {kf_scores.std():.4f}")
print(f"Stratified K-fold CV mean accuracy: {skf_scores.mean():.4f} ± {skf_scores.std():.4f}")
print(f"Monte Carlo CV mean accuracy: {mc_scores.mean():.4f} ± {mc_scores.std():.4f}")

K-fold CV mean accuracy: 0.7446 ± 0.0535
Stratified K-fold CV mean accuracy: 0.7397 ± 0.0322
Monte Carlo CV mean accuracy: 0.7424 ± 0.0402
