In [7]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.pipeline import Pipeline

In [8]:
# The dataset has 569 samples (tumor data), 30 features, and 2 target classes (benign/malignant).
data = load_breast_cancer()
X = data.data
y = data.target

print("Feature shape:", X.shape)  # Expect (569, 30)
print("Labels shape:", y.shape)   # Expect (569,)
print("Class names:", data.target_names)  # ['malignant' 'benign']


Feature shape: (569, 30)
Labels shape: (569,)
Class names: ['malignant' 'benign']


In [3]:
# Pipeline steps:
# 1. StandardScaler - scales features (zero mean, unit variance)
# 2. SVC - Support Vector Classifier with RBF kernel by default
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42))
])

In [5]:
# cv=5 => 5-fold cross-validation
scores = cross_val_score(pipeline, X, y, cv=5, scoring='f1')

print("Cross-validation scores:", scores)
print("Mean Accuracy:", scores.mean())
print("Standard Deviation:", scores.std())


Cross-validation scores: [0.9787234  0.96551724 1.         0.97260274 0.9787234 ]
Mean Accuracy: 0.9791133579231952
Standard Deviation: 0.011522139379081702


In [6]:
# Generate cross-validated estimates for each input data point
y_pred_cv = cross_val_predict(pipeline, X, y, cv=5)

# Build confusion matrix
cm = confusion_matrix(y, y_pred_cv)
print("Confusion Matrix:\n", cm)

# Classification Report
report = classification_report(y, y_pred_cv, target_names=data.target_names)
print("Classification Report:\n", report)

Confusion Matrix:
 [[203   9]
 [  6 351]]
Classification Report:
               precision    recall  f1-score   support

   malignant       0.97      0.96      0.96       212
      benign       0.97      0.98      0.98       357

    accuracy                           0.97       569
   macro avg       0.97      0.97      0.97       569
weighted avg       0.97      0.97      0.97       569

