In [2]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier

import warnings
warnings.simplefilter('ignore')

In [3]:
pokemon_df = pd.read_excel("Pokemon.xlsx")
X = pokemon_df[['HP','Attack','Defense','Sp. Atk','Sp. Def','Speed','Generation']]
y = pokemon_df['Legendary']
pokemon_df.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4)

In [5]:
# Instantiate the base model
clf_dt = DecisionTreeClassifier(max_depth = 4)

In [6]:
# Build and train the Bagging classifier
clf_bag = BaggingClassifier(
  base_estimator = clf_dt,
  n_estimators = 21,
  oob_score = True,
  random_state = 500)
clf_bag.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=21, n_jobs=None, oob_score=True,
         random_state=500, verbose=0, warm_start=False)

In [7]:
# Predict the labels of the test set
pred = clf_bag.predict(X_test)

In [8]:
# Print the out-of-bag score
print('OOB-Score: {:.3f}'.format(clf_bag.oob_score_))

OOB-Score: 0.923


In [9]:
# Evaluate the performance on the test set to compare
pred = clf_bag.predict(X_test)
print('Accuracy: {:.3f}'.format(accuracy_score(y_test, pred)))

Accuracy: 0.944


### GridSearch on Bagging

In [10]:
from sklearn.model_selection import GridSearchCV

In [11]:
from sklearn.svm import SVC
parameters = {
    'n_estimators': (1, 2, 3, 4, 5, 6, 7),
    'base_estimator__C': (1, 2)}

GridSearchCV(BaggingClassifier(SVC()), parameters, scoring="roc_auc").fit(X_train, y_train)

GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=BaggingClassifier(base_estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=10, n_jobs=None, oob_score=False,
         random_state=None, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_estimators': (1, 2, 3, 4, 5, 6, 7), 'base_estimator__C': (1, 2)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='roc_auc', verbose=0)