In [4]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_validate, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score

# Step 1: Load dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Step 2: Create preprocessing + model pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LogisticRegression(max_iter=10000))
])

# Step 3: Define scoring metrics
scoring = {
    'accuracy': make_scorer(accuracy_score),
    'precision': make_scorer(precision_score),
    'recall': make_scorer(recall_score)
}

# Step 4: Apply 5-Fold Cross Validation
cv = KFold(n_splits=5, shuffle=True, random_state=42)
cv_results = cross_validate(pipeline, X, y, cv=cv, scoring=scoring)

# Step 5: Show evaluation results
cv_df = pd.DataFrame(cv_results)
print(cv_df[['test_accuracy', 'test_precision', 'test_recall']])


   test_accuracy  test_precision  test_recall
0       0.973684        0.972222     0.985915
1       0.982456        1.000000     0.974026
2       0.964912        0.958904     0.985915
3       0.991228        0.986111     1.000000
4       0.973451        0.970588     0.985075
