## Simple Implementation of Gradient Boosted Decision Tree For Regression

#### for this implementation we use squared loss divided by 2 as loss function for GBDT

$$L(y^{true}, y^{pred}) = \frac{1}{2} (y^{true} - y^{pred})^2 $$

so that our loss function gradient is 
$$y^{true} - y^{pred}$$ 
we will use this to compute residual

#### for the sake of example we use sklearn DecisionTreeRegressor as our tree

In [1]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor

## Implementation

In [2]:
class GradientBoostedDecisionTreeRegressor:
    def __init__(self, model_used, model_param={}, learning_rate=1e-4, n_trees=10):
        
        # the tree class that will be used
        self.model_used = model_used
        # the tree class parameter
        self.model_param = model_param
        
        # learning rate for our GBDT
        self.learning_rate = learning_rate
        
        # number of trees in our model
        self.n_trees = n_trees
        self.trees = []
        self.initial_prediction = None
        
    def fit(self, X, y, verbose=False):
        
        # because we're using squared loss divided by 2 our initial prediction will be our label mean
        self.initial_prediction = np.mean(y)
        
        last_y_pred = np.zeros(y.shape[0])+ self.initial_prediction
        
        # for every iteration we create new tree and update residual and predicted for training set
        for i in range(self.n_trees):
            
            # the residual is the true value - our prediction, we want this to be 0
            residual = y - last_y_pred
            if verbose:
                print('iteration num {}, mean residual {}'.format(i+1, np.mean(residual)))
            # we train new tree on residual instead of y
            dt = self.model_used(**self.model_param)
            dt.fit(X, residual)
            self.trees.append(dt)
            
            last_y_pred = last_y_pred + self.learning_rate * dt.predict(X)

    def predict(self, X):

        last_y_pred = np.zeros(X.shape[0])+ self.initial_prediction
        
        # for every tree update prediction by adding it to the last predicted value of last tree
        for i in range(self.n_trees):
            last_y_pred = last_y_pred + self.learning_rate * self.trees[i].predict(X)
        return last_y_pred

## usage example

In [3]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error


boston = datasets.load_boston()
X, y = shuffle(boston.data, boston.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.8)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

# #############################################################################
# Fit regression model
regressor = GradientBoostedDecisionTreeRegressor(DecisionTreeRegressor, {'max_depth':3}, n_trees=500, learning_rate=1e-2)

regressor.fit(X_train, y_train, verbose=False)
mse_train = mean_squared_error(y_train, regressor.predict(X_train))
mse_test = mean_squared_error(y_test, regressor.predict(X_test))
print("Train MSE: %.4f" % mse_train)
print("Test MSE: %.4f" % mse_test)

Train MSE: 3.1952
Test MSE: 8.6603
