Question 7: Hyperparameter Optimization and Ensemble Methods<br>
Task: Combine GridSearchCV with an ensemble method to tune hyperparameters for boosting on a complex dataset like the Wine dataset.

In [1]:
# 🧰 Import Libraries
import numpy as np
import pandas as pd

from sklearn.datasets import load_wine
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import classification_report, accuracy_score

# 📥 Load Wine Dataset
data = load_wine()
X = data.data
y = data.target

# 🧪 Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 🌲 Initialize Gradient Boosting Classifier
gbc = GradientBoostingClassifier(random_state=42)

# 🔍 Define Hyperparameter Grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5],
    'subsample': [0.8, 1.0],
    'min_samples_split': [2, 5]
}

# 🔄 Setup GridSearchCV
grid_search = GridSearchCV(
    estimator=gbc,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    verbose=2
)

# 🧠 Fit GridSearchCV
grid_search.fit(X_train, y_train)

# 🏆 Best Params & Score
print("✅ Best Hyperparameters:")
print(grid_search.best_params_)
print(f"📈 Best Cross-validation Accuracy: {grid_search.best_score_:.4f}")

# 🧮 Evaluate Best Model on Test Data
best_gbc = grid_search.best_estimator_
y_pred = best_gbc.predict(X_test)

print("\n🔍 Classification Report on Test Set:\n", classification_report(y_test, y_pred))
print(f"✅ Test Set Accuracy: {accuracy_score(y_test, y_pred):.4f}")


Fitting 5 folds for each of 108 candidates, totalling 540 fits
[CV] END learning_rate=0.01, max_depth=3, min_samples_split=2, n_estimators=50, subsample=0.8; total time=   0.3s
[CV] END learning_rate=0.01, max_depth=3, min_samples_split=2, n_estimators=50, subsample=0.8; total time=   0.3s
[CV] END learning_rate=0.01, max_depth=3, min_samples_split=2, n_estimators=50, subsample=0.8; total time=   0.3s
[CV] END learning_rate=0.01, max_depth=3, min_samples_split=2, n_estimators=50, subsample=0.8; total time=   0.3s
[CV] END learning_rate=0.01, max_depth=3, min_samples_split=2, n_estimators=50, subsample=0.8; total time=   0.3s
[CV] END learning_rate=0.01, max_depth=3, min_samples_split=2, n_estimators=50, subsample=1.0; total time=   0.3s
[CV] END learning_rate=0.01, max_depth=3, min_samples_split=2, n_estimators=50, subsample=1.0; total time=   0.3s
[CV] END learning_rate=0.01, max_depth=3, min_samples_split=2, n_estimators=50, subsample=1.0; total time=   0.3s
[CV] END learning_rate=0.