# Understanding Gradient Boosting in Codes

##### Imports

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error as MSE
from sklearn import datasets

##### Read & Split Data

In [2]:
boston = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=0)

### GBM from Scratch

In [3]:
# Build the 1st weak model
weak_1 = DecisionTreeRegressor(max_depth=2, random_state=2).fit(X_train, y_train)
pred = weak_1.predict(X_train)

In [5]:
# Compute residuals
residual = y_train - pred

# Build the 2nd weak model
weak_2 = DecisionTreeRegressor(max_depth=2, random_state=2).fit(X_train, residual)
pred = weak_2.predict(X_train)

In [6]:
# Compute residuals
residual = residual - pred

# Build the 3rd weak model
weak_3 = DecisionTreeRegressor(max_depth=2, random_state=2).fit(X_train, residual)

In [7]:
# Build a strong model with the weak models
pred = weak_1.predict(X_test) + weak_2.predict(X_test) + weak_3.predict(X_test)

# Compute root mean squared error (rmse)
MSE(y_test, pred)**0.5

5.244119681229871

### GBM in sklearn

In [8]:
gbr = GradientBoostingRegressor(max_depth=2, n_estimators=3, random_state=2, learning_rate=1.0)
gbr.fit(X_train, y_train)
pred = gbr.predict(X_test)

# Compute root mean squared error (rmse)
MSE(y_test, pred)**0.5

5.244119681229871

# End