# Gradient Boosting for Classification
In this document, we develop a gradient boosting classifier using scikit-sklearn to classify data for breast cancer dataset.

## Import Libraries

In [1]:
import numpy as np
from sklearn import datasets
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV       # for hyper-parameter tuning

import warnings
warnings.filterwarnings('ignore')

## Load Dataset

In [2]:
dataset = datasets.load_breast_cancer()
X, y = dataset.data, dataset.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

## Fit Model

In [4]:
param_grid = {
    'n_estimators'      : [50, 100, 200],           # number of trees for the forest
    'max_depth'         : ['None', 10, 50, 100],    # maximum depth of tree for convergence
    'min_samples_split' : [2, 5, 10],               # minumum number of samples required to split an internal node (convergence)
    'learning_rate'     : [0.001, 0.01, 0.1],       # learning rate
}

clf = GradientBoostingClassifier(random_state=42)

grid_search = GridSearchCV(
    estimator  = clf,
    param_grid = param_grid,
    cv         = 5,                                 # number of folds for cross-validation
    scoring    = 'accuracy',
    n_jobs     = -1                                 # number of CPU cores to use (-1 means all cores)
)

grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_score      = grid_search.best_score_
best_clf        = grid_search.best_estimator_

print("Best Parameters:")
for k, v in best_parameters.items():
    print(f"{k}: {v}")

print(f"Training Accuracy: {best_score*100:.2f}")

Best Parameters:
learning_rate: 0.01
max_depth: 10
min_samples_split: 10
n_estimators: 100
Training Accuracy: 94.95


## Testing

In [5]:
y_pred = best_clf.predict(X_test)
accuracy = np.sum(y_pred == y_test) / len(y_test)
print(f"Accuracy: {accuracy*100:.2f}")

Accuracy: 93.86
