## Boosting from Scratch

### 1- Load Libraries

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor

import warnings
warnings.filterwarnings('ignore')

### 2- Load dataset

In [2]:
from sklearn.datasets import load_boston
boston = load_boston()
bos = pd.DataFrame(boston.data)
bos.columns = boston.feature_names
bos['PRICE'] = boston.target
bos.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


### 3- Split dataset into train/test set

In [3]:
# Split Train/Test Set
X_train, X_test, y_train, y_test = train_test_split(bos.drop(["PRICE"], axis=1), bos["PRICE"],
                                                             random_state=10, test_size=0.25)
X_train.shape

(379, 13)

### 4- Create an ensemble model in three steps

In [4]:
# Train a base decision tree regressor model on the data
from sklearn.tree import DecisionTreeRegressor

# Fit FIRST model
tree_reg1 = DecisionTreeRegressor(max_depth=2)
tree_reg1.fit(X_train, y_train)

# Compute errors/residuals on first tree
r1 = y_train - tree_reg1.predict(X_train)

# Fit SECOND model
tree_reg2 = DecisionTreeRegressor(max_depth=2)
tree_reg2.fit(X_train, r1)

# Compute errors/residuals on second tree
r2 = r1 - tree_reg2.predict(X_train)

# Fit THIRD model
tree_reg3 = DecisionTreeRegressor(max_depth=2)
tree_reg3.fit(X_train, r2)

# Add up the predictions of each tree model, which is the prediction of ensemble of three trees
y_pred = sum(tree.predict(X_train) for tree in (tree_reg1, tree_reg2, tree_reg3))

### 5- Examine all model predictions

In [5]:
# Create dataframe of all predictions
predictions = pd.DataFrame(tree_reg1.predict(X_train)[:10], columns=['Model_1'])
predictions['Model_2'] = pd.DataFrame(tree_reg2.predict(X_train)[:10])
predictions['Model_3'] = pd.DataFrame(tree_reg3.predict(X_train)[:10])
predictions['Ensemble'] = pd.DataFrame(y_pred[:10])
predictions['Actual'] = y_train.head(10).reset_index()['PRICE']

# Display predictions
predictions

Unnamed: 0,Model_1,Model_2,Model_3,Ensemble,Actual
0,21.420472,-0.988859,0.391106,20.82272,23.8
1,21.420472,-0.988859,0.391106,20.82272,23.1
2,28.151515,2.985454,0.391106,31.528075,33.2
3,28.151515,2.985454,0.391106,31.528075,28.2
4,14.654478,-0.988859,-5.071778,8.593841,8.5
5,28.151515,2.985454,0.391106,31.528075,32.4
6,28.151515,2.985454,0.391106,31.528075,29.6
7,14.654478,-0.988859,0.391106,14.056725,17.1
8,28.151515,-0.988859,0.391106,27.553762,24.2
9,21.420472,-0.988859,0.391106,20.82272,26.4
