In [21]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, cross_val_score, train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [8]:
X, y = load_breast_cancer(return_X_y=True)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=1
)

In [11]:
pipe_lr = make_pipeline(
    StandardScaler(), PCA(n_components=2), LogisticRegression(random_state=1)
)
pipe_lr.fit(X_train, y_train)
y_pred = pipe_lr.predict(X_test)
print(f"Test Accuracy: {pipe_lr.score(X_test, y_test):.3f}")

Test Accuracy: 0.956


In [19]:
kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True).split(
    X_train, y_train
)

scores = []
for k, (train, test) in enumerate(kfold):
    pipe_lr.fit(X_train[train], y_train[train])
    score = pipe_lr.score(X_train[test], y_train[test])
    scores.append(score)
    print(
        f"Fold: {k+1:2d}, Class dist.: {np.bincount(y_train[train])}, Acc: {score:.3f}"
    )

Fold:  1, Class dist.: [153 256], Acc: 0.978
Fold:  2, Class dist.: [153 256], Acc: 0.935
Fold:  3, Class dist.: [153 256], Acc: 0.957
Fold:  4, Class dist.: [153 256], Acc: 0.935
Fold:  5, Class dist.: [153 256], Acc: 0.913
Fold:  6, Class dist.: [153 257], Acc: 0.956
Fold:  7, Class dist.: [153 257], Acc: 0.933
Fold:  8, Class dist.: [153 257], Acc: 0.956
Fold:  9, Class dist.: [153 257], Acc: 0.933
Fold: 10, Class dist.: [153 257], Acc: 0.978


In [20]:
print(f"CV accuracy: {np.mean(scores):.3f} +/- {np.std(scores):.3f}")

CV accuracy: 0.947 +/- 0.020


In [22]:
scores = cross_val_score(estimator=pipe_lr, X=X_train, y=y_train, cv=10, n_jobs=1)
print(f"CV accuracy scores: {scores}")

CV accuracy scores: [0.97826087 0.93478261 0.95652174 0.93478261 0.91304348 0.95555556
 0.93333333 0.95555556 0.93333333 0.97777778]


In [23]:
print(f"CV accuracy: {np.mean(scores):.3f} +/- {np.std(scores):.3f}")

CV accuracy: 0.947 +/- 0.020
