In [28]:
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.datasets import make_classification

# Load Dataset from `make_classification`

In [4]:
X, y = make_classification(
    n_samples=10000,
    n_features=300
)

In [11]:
X.shape

(10000, 300)

In [12]:
y.shape

(10000,)

# Train/Test Split

In [13]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    stratify=y
)

# Tuning AdaBoostClassifier

Will take a classifier as a base

In [20]:
ada = AdaBoostClassifier(
    base_estimator=DecisionTreeClassifier()
)

ada_params = {
    'base_estimator__max_depth': [1, 3, 5],
    'n_estimators': [50, 100] # number of trees
}

ada_gs = GridSearchCV(
    estimator=ada,
    param_grid=ada_params
)

In [21]:
ada_gs.fit(X_train, y_train)

GridSearchCV(estimator=AdaBoostClassifier(base_estimator=DecisionTreeClassifier()),
             param_grid={'base_estimator__max_depth': [1, 3, 5],
                         'n_estimators': [50, 100]})

In [30]:
print(ada_gs.score(X_train, y_train))
print(ada_gs.score(X_test, y_test))

0.8952
0.8616


In [26]:
ada_gs.best_params_

{'base_estimator__max_depth': 1, 'n_estimators': 50}

In [15]:
ada.get_params()

{'algorithm': 'SAMME.R',
 'base_estimator__ccp_alpha': 0.0,
 'base_estimator__class_weight': None,
 'base_estimator__criterion': 'gini',
 'base_estimator__max_depth': None,
 'base_estimator__max_features': None,
 'base_estimator__max_leaf_nodes': None,
 'base_estimator__min_impurity_decrease': 0.0,
 'base_estimator__min_impurity_split': None,
 'base_estimator__min_samples_leaf': 1,
 'base_estimator__min_samples_split': 2,
 'base_estimator__min_weight_fraction_leaf': 0.0,
 'base_estimator__random_state': None,
 'base_estimator__splitter': 'best',
 'base_estimator': DecisionTreeClassifier(),
 'learning_rate': 1.0,
 'n_estimators': 50,
 'random_state': None}

In [29]:

bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier()
)

bag_params = {
    'base_estimator__max_depth': [1, 3, 5],
    'n_estimators': [50, 100]
}

bag_gs = GridSearchCV(
    estimator=bag,
    param_grid=bag_params
)

bag_gs.fit(X_train, y_train)
print(f'Bag Train: {bag_gs.score(X_train, y_train)}')
print(f'Bag Score: {bag_gs.score(X_test, y_test)}')

0.9078666666666667
0.8944


A root with just one level is called a Stump!

                    ROOT
                   /    \
               Branch  Branch

# GradientBoostingClassifier

# VotingClassifier