# Gradient Boosting

In [1]:
from classic.model.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.datasets import load_diabetes, load_digits
from sklearn import ensemble
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import train_test_split

## 1. GradientBoostingRegressor

In [2]:
dataset = load_diabetes()
X, y = dataset["data"], dataset["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_train[:2]

array([[ 0.03081083,  0.05068012,  0.04660684, -0.01599898,  0.02044629,
         0.05066877, -0.0581274 ,  0.07120998,  0.00620674,  0.00720652],
       [-0.00188202, -0.04464164,  0.05415152, -0.0665056 ,  0.07273249,
         0.05661859, -0.04340085,  0.08486339,  0.08449153,  0.04862759]])

In [3]:
gbr = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=2)
gbr.fit(X_train, y_train)
pred = gbr.predict(X_test)
print(f"Decision Tree Regressor mse = {mean_squared_error(y_test, pred)}")

Decision Tree Regressor mse = 3722.0576951594817


In [4]:
gbr = ensemble.GradientBoostingRegressor(
    n_estimators=100, learning_rate=0.1, max_depth=2
)
gbr.fit(X_train, y_train)
pred = gbr.predict(X_test)
print(f"Decision Tree Regressor mse = {mean_squared_error(y_test, pred)}")

Decision Tree Regressor mse = 3859.001102197477


## 2. GradientBoostingClassifier

In [5]:
dataset = load_digits()
X, y = dataset["data"], dataset["target"]
X = X.reshape(-1, 64)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_train[:2]

array([[ 0.,  0.,  2., 10., 16.,  4.,  0.,  0.,  1., 10., 16., 16., 15.,
         4.,  0.,  0.,  0., 16., 16., 10.,  1.,  0.,  0.,  0.,  0., 15.,
        16., 16.,  7.,  0.,  0.,  0.,  0.,  5., 11.,  5., 15.,  2.,  0.,
         0.,  0.,  0.,  0.,  0., 11.,  9.,  0.,  0.,  0.,  0.,  3., 10.,
        16.,  9.,  0.,  0.,  0.,  0.,  2., 16., 15.,  2.,  0.,  0.],
       [ 0.,  0.,  6., 15.,  4.,  0.,  0.,  0.,  0.,  1., 14.,  7.,  0.,
         0.,  0.,  0.,  0.,  4., 15.,  1.,  0.,  0.,  0.,  0.,  0.,  5.,
        11.,  0.,  2.,  1.,  0.,  0.,  0.,  4., 13., 12., 16., 13.,  3.,
         0.,  0.,  1., 16.,  2.,  1.,  8., 10.,  0.,  0.,  0., 12.,  4.,
         0., 11., 12.,  0.,  0.,  0.,  4., 13., 12., 14.,  2.,  0.]])

In [6]:
gbc = GradientBoostingClassifier(
    num_classes=10, n_estimators=200, learning_rate=0.1, max_depth=2
)
gbc.fit(X_train, y_train)
pred = gbc.predict(X_test)
pred_ = pred.argmax(axis=1)
print(f"Accuracy = {accuracy_score(y_test, pred_)}")

Accuracy = 0.9092592592592592


In [7]:
gbc = ensemble.GradientBoostingClassifier(
    n_estimators=200, learning_rate=0.1, max_depth=2
)
gbc.fit(X_train, y_train)
pred = gbc.predict(X_test)
print(f"Accuracy = {accuracy_score(y_test, pred)}")

Accuracy = 0.9574074074074074
