# Hyperparameter Tuning with GridSearchCV and Cross-Validation
A hands-on demonstration using SVM, KNN, and Decision Tree on the Breast Cancer dataset.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay

sns.set(style='whitegrid')

In [None]:
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target
print("Shape of data:", X.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)
print("Training size:", X_train.shape[0], "| Test size:", X_test.shape[0])

In [None]:
svm_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC())
])

knn_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier())
])

dt_pipe = Pipeline([
    ('dt', DecisionTreeClassifier())
])

In [None]:
svm_params = {
    'svc__C': [0.1, 1, 10],
    'svc__kernel': ['linear', 'rbf'],
    'svc__gamma': ['scale', 0.1, 1]
}

knn_params = {
    'knn__n_neighbors': [3, 5, 7],
    'knn__weights': ['uniform', 'distance'],
    'knn__metric': ['euclidean', 'manhattan']
}

dt_params = {
    'dt__max_depth': [3, 5, 7],
    'dt__min_samples_split': [2, 5, 10]
}

In [None]:
svm_grid = GridSearchCV(svm_pipe, svm_params, cv=5, scoring='accuracy', n_jobs=-1)
knn_grid = GridSearchCV(knn_pipe, knn_params, cv=5, scoring='accuracy', n_jobs=-1)
dt_grid = GridSearchCV(dt_pipe, dt_params, cv=5, scoring='accuracy', n_jobs=-1)

svm_grid.fit(X_train, y_train)
knn_grid.fit(X_train, y_train)
dt_grid.fit(X_train, y_train)

In [None]:
models = {
    'SVM': svm_grid,
    'KNN': knn_grid,
    'Decision Tree': dt_grid
}

for name, model in models.items():
    print(f"\n{name} Best Parameters:", model.best_params_)
    y_pred = model.predict(X_test)
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(classification_report(y_test, y_pred, target_names=data.target_names))

In [None]:
for name, model in models.items():
    print(f"\n{name} Confusion Matrix:")
    ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, display_labels=data.target_names)
    plt.title(f"{name} - Confusion Matrix")
    plt.grid(False)
    plt.show()