In [1]:
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.datasets import make_classification

# Load Dataset from `make_classification`

In [2]:
X, y = make_classification(n_samples=750, n_features=20, random_state=42)
print(len(X), X[:5])
print(len(y), y[:5])

750 [[ 0.65497558 -1.18403056 -1.79310645 -1.96850421 -0.85649824 -1.59632309
   0.26929475 -0.99386267 -0.93217686 -1.82213703  0.42019154  0.03471375
  -0.39989906 -1.77417397 -0.12372895 -0.38281719  0.09767038  0.81079865
  -1.26449841 -0.39767969]
 [-0.60696345 -0.55173785  0.5433396   0.44594778  0.9669174   0.95599979
  -0.76310635 -1.35825699  0.99316486  1.29621716 -0.09216581  0.9431933
   2.30167485  1.14794806 -0.07247772 -0.86052387 -0.7230731   1.33511267
   0.30955565 -0.15650383]
 [ 0.35834451 -0.47020334 -0.99435507 -0.59127801  0.72099695  0.25371139
  -0.97858839 -1.49350978 -2.38153115 -0.90270652 -2.12394758 -1.22767996
   0.61233221 -0.21498032 -0.56539297 -0.92145102 -0.54518624 -1.19655793
   2.92727009  0.24313466]
 [ 0.10589445  1.51115493 -1.33031363  0.68631846  1.61721257 -1.75625076
  -0.01208895  1.97644058 -1.04806973 -2.01529399 -1.56424161 -0.40703631
  -1.45117569 -1.9563403   0.92165011 -0.18912039  0.10435594  0.58441298
  -0.89878405 -0.12754912]
 

# Train/Test Split

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Tuning AdaBoostClassifier

In [5]:
ada = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
ada_params = {
    'n_estimators': [50,100], #maximum number of estimators at which boosting is terminated
    'base_estimator__max_depth': [1,2], #maximum depth of the decision tree
    'learning_rate': [.9, 1.] #maxiumum is 1
}
gs = GridSearchCV(ada, param_grid=ada_params, cv=3)
gs.fit(X_train, y_train)
print(gs.best_score_)
gs.best_params_

0.9234554556832405


{'base_estimator__max_depth': 2, 'learning_rate': 0.9, 'n_estimators': 100}

# GradientBoostingClassifier

In [8]:
gboost = GradientBoostingClassifier()
gboost_params = {
    'max_depth': [2,3,4],
    'n_estimators': [100, 125, 150],
    'learning_rate': [.08, .1, .12]
}

gb_gs = GridSearchCV(gboost, param_grid=gboost_params, cv=3)
%time
gb_gs.fit(X_train, y_train)
print(gb_gs.best_score_)
gb_gs.best_params_

Wall time: 0 ns
0.9341601243979216


{'learning_rate': 0.1, 'max_depth': 2, 'n_estimators': 100}

# VotingClassifier

In [11]:
vote = VotingClassifier([
    ('tree', DecisionTreeClassifier()),
    ('ada', AdaBoostClassifier()),
    ('gb', GradientBoostingClassifier())
])
vote_params = {
    'ada__n_estimators': [50,75],
    'gb__n_estimators': [100,125],
    'tree__max_depth': [None, 5]
}


gs = GridSearchCV(vote, param_grid=vote_params, cv=3)
%time
gs.fit(X_train, y_train)
print(gs.best_score_)
gs.best_params_

Wall time: 0 ns
0.9270394811696439


{'ada__n_estimators': 75, 'gb__n_estimators': 100, 'tree__max_depth': None}