# Gradient Boosting

In [1]:
import sys

sys.path.insert(1, "..")

from classic.model.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.datasets import load_diabetes, load_digits
from sklearn import ensemble
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import train_test_split

## 1. GradientBoostingRegressor

In [2]:
dataset = load_diabetes()
X, y = dataset["data"], dataset["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_train[:2]

array([[-0.01277963,  0.05068012, -0.05578531, -0.00222757, -0.02771206,
        -0.02918409,  0.019187  , -0.03949338, -0.01705628,  0.04448548],
       [ 0.09619652, -0.04464164,  0.04013997, -0.05731319,  0.04521344,
         0.06068952, -0.02131102,  0.03615391,  0.01255119,  0.02377494]])

In [3]:
gbr = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=2)
gbr.fit(X_train, y_train)
pred = gbr.predict(X_test)
print(f"Decision Tree Regressor mse = {mean_squared_error(y_test, pred)}")

Decision Tree Regressor mse = 3405.0490380060082


In [4]:
gbr = ensemble.GradientBoostingRegressor(
    n_estimators=100, learning_rate=0.1, max_depth=2
)
gbr.fit(X_train, y_train)
pred = gbr.predict(X_test)
print(f"Decision Tree Regressor (Sklearn) mse = {mean_squared_error(y_test, pred)}")

Decision Tree Regressor (Sklearn) mse = 3567.021574760132


## 2. GradientBoostingClassifier

In [5]:
dataset = load_digits()
X, y = dataset["data"], dataset["target"]
X = X.reshape(-1, 64)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_train[:2]

array([[ 0.,  0.,  9.,  9.,  4.,  0.,  0.,  0.,  0.,  0., 15., 15., 14.,
        12.,  0.,  0.,  0.,  3., 10.,  1.,  0., 12.,  5.,  0.,  0.,  5.,
         8.,  0.,  0.,  8.,  6.,  0.,  0.,  8.,  8.,  0.,  0.,  8.,  8.,
         0.,  0.,  5.,  8.,  0.,  0., 10.,  6.,  0.,  0.,  4., 13.,  4.,
         6., 13.,  0.,  0.,  0.,  0.,  6., 16., 14.,  3.,  0.,  0.],
       [ 0.,  0.,  0., 13.,  9.,  0.,  0.,  0.,  0.,  0.,  6., 16.,  2.,
         0.,  0.,  0.,  0.,  0., 12.,  9.,  0.,  2.,  0.,  0.,  0.,  7.,
        15.,  1.,  5., 15.,  1.,  0.,  0., 14., 10.,  4., 11., 12.,  3.,
         0.,  2., 16., 16., 16., 16., 13.,  2.,  0.,  0.,  3.,  4., 11.,
        14.,  0.,  0.,  0.,  0.,  0.,  0., 15.,  4.,  0.,  0.,  0.]])

In [6]:
gbc = GradientBoostingClassifier(
    num_classes=10, n_estimators=200, learning_rate=0.1, max_depth=2
)
gbc.fit(X_train, y_train)
pred = gbc.predict(X_test)
pred_ = pred.argmax(axis=1)
print(f"Accuracy = {accuracy_score(y_test, pred_)}")

Accuracy = 0.8685185185185185


In [7]:
gbc = ensemble.GradientBoostingClassifier(
    n_estimators=200, learning_rate=0.1, max_depth=2
)
gbc.fit(X_train, y_train)
pred = gbc.predict(X_test)
print(f"Accuracy (Sklearn) = {accuracy_score(y_test, pred)}")

Accuracy (Sklearn) = 0.9611111111111111
