In [1]:
## Gradient Boosting Classification

In [2]:
from sklearn.datasets import make_classification
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import warnings
warnings.filterwarnings('ignore')

In [3]:
x,y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=1)

In [4]:
x

array([[-2.04582165, -0.13791624, -0.08071423, ...,  2.48194524,
         0.74236675,  0.23154789],
       [-0.98726024,  1.30120189,  2.37734888, ...,  0.55445754,
        -0.21892143, -0.37608578],
       [ 0.57335921,  0.09375582,  0.4662521 , ..., -0.6088508 ,
         0.79903499, -0.17121177],
       ...,
       [-0.70737159,  1.07650943,  0.58510456, ..., -1.51337602,
         0.90239871, -0.69230951],
       [-0.20706849,  1.17319848, -1.94478665, ..., -0.32820676,
         1.5711921 ,  1.14877729],
       [-2.16769231, -2.54871672,  2.89359255, ...,  0.71535366,
         0.34329241,  1.07350284]])

In [5]:
y

array([0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0,
       0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [6]:
## Train the mnodel

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x,y, train_size=0.20, random_state=1)

In [8]:
x_train.shape, x_test.shape

((200, 20), (800, 20))

In [9]:
clf = GradientBoostingClassifier()
clf

In [10]:
# Fit the model
clf.fit(x_train, y_train)

In [11]:
# predict the model

y_pred = clf.predict(x_test)
y_pred

array([0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1,
       1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1,

In [12]:
# Evaluate the model

print("Current model perfomance")
print(f"Accuracy {accuracy_score(y_test, y_pred)} ")
print("Classification Report")
print(classification_report(y_test, y_pred))
print("Confusion Matrix")
print(confusion_matrix(y_test, y_pred))

Current model perfomance
Accuracy 0.8175 
Classification Report
              precision    recall  f1-score   support

           0       0.81      0.82      0.82       396
           1       0.82      0.81      0.82       404

    accuracy                           0.82       800
   macro avg       0.82      0.82      0.82       800
weighted avg       0.82      0.82      0.82       800

Confusion Matrix
[[326  70]
 [ 76 328]]


In [13]:
### Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV

param_grid = {
    "n_estimators": [50, 100, 200],
    "learning_rate": [0.01, 0.1, 1.0, 1.5, 2.0]
    
}

In [14]:
gbc = GradientBoostingClassifier()
grid_search = GridSearchCV(estimator= gbc, param_grid=param_grid, cv=5, verbose=3)
grid_search

In [16]:
grid_search.fit(x_train, y_train)

Fitting 5 folds for each of 15 candidates, totalling 75 fits
[CV 1/5] END learning_rate=0.01, n_estimators=50;, score=0.700 total time=   0.0s
[CV 2/5] END learning_rate=0.01, n_estimators=50;, score=0.850 total time=   0.0s
[CV 3/5] END learning_rate=0.01, n_estimators=50;, score=0.775 total time=   0.0s
[CV 4/5] END learning_rate=0.01, n_estimators=50;, score=0.700 total time=   0.0s
[CV 5/5] END learning_rate=0.01, n_estimators=50;, score=0.775 total time=   0.0s
[CV 1/5] END learning_rate=0.01, n_estimators=100;, score=0.700 total time=   0.1s
[CV 2/5] END learning_rate=0.01, n_estimators=100;, score=0.850 total time=   0.1s
[CV 3/5] END learning_rate=0.01, n_estimators=100;, score=0.775 total time=   0.1s
[CV 4/5] END learning_rate=0.01, n_estimators=100;, score=0.750 total time=   0.1s
[CV 5/5] END learning_rate=0.01, n_estimators=100;, score=0.750 total time=   0.1s
[CV 1/5] END learning_rate=0.01, n_estimators=200;, score=0.725 total time=   0.2s
[CV 2/5] END learning_rate=0.01

In [18]:
# Gradient Boosting Regressior

from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

import warnings
warnings.filterwarnings('ignore')

In [19]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.1, random_state=1)

In [21]:
x_train.shape, x_test.shape

((900, 20), (100, 20))

In [22]:
regressor = GradientBoostingRegressor()
regressor.fit(x_train, y_train)

In [24]:
y_pred = regressor.predict(x_test)
# Evaluate the model
print("Current model perfomance")
print(f"Accuracy {r2_score(y_test, y_pred)} ")
print(f"mean absolute error:{ mean_absolute_error(y_test, y_pred)}")
print(f"Mean Squared error: {mean_squared_error(y_test, y_pred)}")

Current model perfomance
Accuracy 0.5466319268085189 
mean absolute error:0.21732636846865788
Mean Squared error: 0.1129339870319979


In [None]:
## Model perfomance

best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(x_test)


# Evaluate the model
print("Tuned model perfomance")
print(f"Accuracy {r2_score(y_test, y_pred_tuned)} ")
print(f"mean absolute error:{ mean_absolute_error(y_test, y_pred_tuned)}")
print(f"Mean Squared error: {mean_squared_error(y_test, y_pred_tuned)}")

In [25]:
### Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV

param_grid = {
    "n_estimators": [50, 100, 200],
    "learning_rate": [0.01, 0.1, 1.0, 1.5, 2.0]
    
}
gbr = GradientBoostingRegressor()
gbr

In [28]:
grid_search = GridSearchCV(estimator=gbr, param_grid=param_grid, cv=5, verbose=3)
grid_search

In [29]:
grid_search.fit(x_train, y_train)

Fitting 5 folds for each of 15 candidates, totalling 75 fits
[CV 1/5] END learning_rate=0.01, n_estimators=50;, score=0.330 total time=   0.1s
[CV 2/5] END learning_rate=0.01, n_estimators=50;, score=0.367 total time=   0.1s
[CV 3/5] END learning_rate=0.01, n_estimators=50;, score=0.378 total time=   0.1s
[CV 4/5] END learning_rate=0.01, n_estimators=50;, score=0.344 total time=   0.1s
[CV 5/5] END learning_rate=0.01, n_estimators=50;, score=0.344 total time=   0.1s
[CV 1/5] END learning_rate=0.01, n_estimators=100;, score=0.460 total time=   0.4s
[CV 2/5] END learning_rate=0.01, n_estimators=100;, score=0.516 total time=   0.4s
[CV 3/5] END learning_rate=0.01, n_estimators=100;, score=0.526 total time=   0.4s
[CV 4/5] END learning_rate=0.01, n_estimators=100;, score=0.461 total time=   0.4s
[CV 5/5] END learning_rate=0.01, n_estimators=100;, score=0.462 total time=   0.4s
[CV 1/5] END learning_rate=0.01, n_estimators=200;, score=0.539 total time=   0.8s
[CV 2/5] END learning_rate=0.01

In [30]:
grid_search.best_params_

{'learning_rate': 0.1, 'n_estimators': 50}