In [1]:
# majority vote classification in ensemble learning is hard voting classifier
# ensemble of weak learners can become strong learner

In [6]:
# we can train a voting classifier with 3 different models
from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier
svm_clf = SVC()

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='hard')

X_train, y = make_moons(n_samples=100, noise=.15)
# voting_clf.fit(X_train, y)

In [7]:
# soft voting is to predict calss with highest class probability averaged over all individual classifiers
# bagging is training using same algo but different subsets of data with replacement
# pasting is same but without replacement
# ensemble will generally have similar bias but lower variance than the individual classifiers


In [9]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, max_samples=100, bootstrap=True, n_jobs=-1, oob_score=True)
bag_clf.fit(X_train, y)
bag_clf.oob_score_

0.94

In [10]:
# random forest is basically the same as above
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X_train, y)
# trains on random subset of feature at each level rather than all of them like a normal decision tree

RandomForestClassifier(max_leaf_nodes=16, n_estimators=500, n_jobs=-1)

In [11]:
# there are also extremely randomized trees which use 
#random thresholds for each feature rather than searching for the best possible threshold
from sklearn.ensemble import ExtraTreesClassifier


In [14]:
# we can also use random forests to measure feature importance
from sklearn.datasets import load_iris

iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1)
rnd_clf.fit(iris['data'], iris['target'])
for name, score in zip(iris['feature_names'], rnd_clf.feature_importances_):
    print(name, score)
# random forests can be very useful then for feature selection

sepal length (cm) 0.09852074111801488
sepal width (cm) 0.026008648500031442
petal length (cm) 0.42474168354337527
petal width (cm) 0.4507289268385784


In [15]:
# boosting is combining several weak learners into a strong learner
# train predictors sequentially, each trying to correct its predecessor

# adaboost is to pay more attention to the training data the predecessor underfitted
# the weights of the misclassified instances are increased

from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=200, algorithm='SAMME.R', learning_rate=.5)
ada_clf.fit(X_train, y)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1),
                   learning_rate=0.5, n_estimators=200)

In [16]:
# gradient boosting tries to fit new predictor to residual errors made by previous predictor

from sklearn.tree import DecisionTreeRegressor

tree_reg1 = DecisionTreeRegressor(max_depth=2)
tree_reg1.fit(X_train, y)


DecisionTreeRegressor(max_depth=2)

In [17]:
y2 = y - tree_reg1.predict(X_train)
tree_reg2 = DecisionTreeRegressor(max_depth=2)
tree_reg2.fit(X_train, y2)

DecisionTreeRegressor(max_depth=2)

In [18]:
y3 = y2 - tree_reg2.predict(X_train)
tree_reg3 = DecisionTreeRegressor(max_depth=2)
tree_reg3.fit(X_train, y3)

DecisionTreeRegressor(max_depth=2)

In [20]:
# or scikit
from sklearn.ensemble import GradientBoostingRegressor
X = X_train

gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=1.0)
gbrt.fit(X, y)

GradientBoostingRegressor(learning_rate=1.0, max_depth=2, n_estimators=3)

In [21]:
# to find the optimal number of trees you could use early stopping with staged_predict()
# gbr class has subsample hyperparam which is how many training instances to use for each tree
# higher bias, lower variance : Stochastic Gradient Boosting

In [23]:
# xgboost is extreme gradient boosting and is extremely fast, scalable and portable
# often important component of winning entries in ml competitions
# automatically takes care of early stopping
import xgboost

xgb_reg = xgboost.XGBRegressor()
xgb_reg.fit(X, y)


XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.300000012,
             max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=100, n_jobs=4,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=None)

In [None]:
# stacking is training model to do the final voting
