In [1]:
# boosting is any ensemble method that combines weak learners to make a strong learner

In [14]:
import numpy as np
from sklearn.datasets import make_moons
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score

In [26]:
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=42)

In [27]:
# AdaBoost
# first trains a base classifier and makes predictions on the training set
# sequentially adds predictors to ensemble
# in subsequent iterations more weight is givent to misclassified training instances
# Sciki-learn uses a multiclass version of AdaBoost called SAMME

In [28]:
ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1),
    n_estimators=200,
    algorithm="SAMME.R",
    learning_rate=0.5
)

In [29]:
ada_clf.fit(X_train, y_train)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1),
                   learning_rate=0.5, n_estimators=200)

In [30]:
ada_pred = ada_clf.predict(X_val)

In [31]:
accuracy_score(y_val, ada_pred)

0.896

In [32]:
# Gradient Boosting
# Instead of tweaking weights, it fits the new predictor to residual error of previous predictor
# works well with regression 
# training rate determines the contribution of each tree
# lower training rate tends to generalise well but needs more trees
# if there are too many trees you can overfit the data - use early stopping to find optimal number of trees

In [33]:
gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120)

In [34]:
gbrt.fit(X_train, y_train)

GradientBoostingRegressor(max_depth=2, n_estimators=120)

In [35]:
errors = [mean_squared_error(y_val, y_pred)
         for y_pred in gbrt.staged_predict(X_val)]
bst_n_estimators = np.argmin(errors) + 1

In [36]:
bst_n_estimators

75

In [37]:
# After training on 120 trees, it has been found that the optimal number of trees is 75 (lowest error)

In [38]:
# create a gradient boosting ensemble with optimum trees
gbrt_best = GradientBoostingRegressor(max_depth=2, n_estimators=bst_n_estimators)
gbrt_best.fit(X_train, y_train)

GradientBoostingRegressor(max_depth=2, n_estimators=75)

In [40]:
gbrt_pred = gbrt_best.predict(X_val)

In [41]:
accuracy_score(y_val, gbrt_pred)

ValueError: Classification metrics can't handle a mix of binary and continuous targets