In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [7]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()

In [9]:
data.keys()

['target_names', 'data', 'target', 'DESCR', 'feature_names']

In [10]:
print data['DESCR']

Breast Cancer Wisconsin (Diagnostic) Database

Notes
-----
Data Set Characteristics:
    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry 
        - fractal dimension ("coastline approximation" - 1)
        
        The mean, standard error, and "worst" or largest (mean of the three
        largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 3 is Mean Radius, field
        13 is Radius SE, field 23 is Worst Radius.
 

In [11]:
df = pd.DataFrame(data['data'], columns = data['feature_names'])
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [12]:
df.columns

Index([u'mean radius', u'mean texture', u'mean perimeter', u'mean area',
       u'mean smoothness', u'mean compactness', u'mean concavity',
       u'mean concave points', u'mean symmetry', u'mean fractal dimension',
       u'radius error', u'texture error', u'perimeter error', u'area error',
       u'smoothness error', u'compactness error', u'concavity error',
       u'concave points error', u'symmetry error', u'fractal dimension error',
       u'worst radius', u'worst texture', u'worst perimeter', u'worst area',
       u'worst smoothness', u'worst compactness', u'worst concavity',
       u'worst concave points', u'worst symmetry', u'worst fractal dimension'],
      dtype='object')

In [13]:
y = pd.DataFrame(data['target'], columns=['malignant'])

In [14]:
y.head()
y.describe()

Unnamed: 0,malignant
count,569.0
mean,0.627417
std,0.483918
min,0.0
25%,0.0
50%,1.0
75%,1.0
max,1.0


In [15]:
from sklearn.cross_validation import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier

In [16]:
cross_val_score?

In [20]:
decision_tree = DecisionTreeClassifier()
cv_decision_tree = cross_val_score(decision_tree, df, y['malignant'].values,cv=5)

In [29]:
# scores are what happens within the folds. 
cv_decision_tree

array([ 0.90434783,  0.92173913,  0.91150442,  0.96460177,  0.91150442])

In [30]:
print 'Decison Tree mean accuracy %s' % (np.mean(cv_decision_tree))

Decison Tree mean accuracy 0.922739515198


In [38]:
bagging = BaggingClassifier(decision_tree)

In [40]:
cv_bagging_classifier = cross_val_score(bagging,
                                       df.values,
                                       y['malignant'],
                                       cv=5)

In [41]:
cv_bagging_classifier

array([ 0.91304348,  0.93043478,  0.97345133,  0.92920354,  0.97345133])

In [43]:
# made a smaller set of trees 
print 'Bagging a Decision Tree mean accuracy %s' % (np.mean(cv_bagging_classifier))

Bagging a Decision Tree mean accuracy 0.943916891112


In [44]:
bagging.fit(df.values, y['malignant'])

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=10, n_jobs=1, oob_score=False,
         random_state=None, verbose=0, warm_start=False)

In [45]:
bagging.score(df.values, y['malignant'])

0.99648506151142358

scaled piplines

In [47]:
from sklearn.preprocessing import StandardScaler

In [48]:
standard_scalar = StandardScaler()

In [50]:
# normalizing features. compare differences in numbers across the columns. lets us compare features kind of like 
# categories.
scaled_xs = standard_scalar.fit_transform(df.values)
scaled_xs

array([[ 1.09706398, -2.07333501,  1.26993369, ...,  2.29607613,
         2.75062224,  1.93701461],
       [ 1.82982061, -0.35363241,  1.68595471, ...,  1.0870843 ,
        -0.24388967,  0.28118999],
       [ 1.57988811,  0.45618695,  1.56650313, ...,  1.95500035,
         1.152255  ,  0.20139121],
       ..., 
       [ 0.70228425,  2.0455738 ,  0.67267578, ...,  0.41406869,
        -1.10454895, -0.31840916],
       [ 1.83834103,  2.33645719,  1.98252415, ...,  2.28998549,
         1.91908301,  2.21963528],
       [-1.80840125,  1.22179204, -1.81438851, ..., -1.74506282,
        -0.04813821, -0.75120669]])

In [51]:
decision_tree = DecisionTreeClassifier()
cv_decision_tree_scaled = cross_val_score(decision_tree, scaled_xs,
                                          y['malignant'].values,cv=5)

In [52]:
cv_decision_tree_scaled

array([ 0.90434783,  0.90434783,  0.92035398,  0.9380531 ,  0.90265487])

In [53]:
np.mean(cv_decision_tree_scaled)

0.91395151981531364

In [54]:
cv_bagging_classifier_scaled = cross_val_score(bagging,
                                           scaled_xs,
                                           y['malignant'],
                                           cv=5)

In [55]:
cv_bagging_classifier_scaled

array([ 0.94782609,  0.92173913,  0.96460177,  0.92920354,  0.98230088])

In [56]:
# same because it doesnt care whether data is scaled or not. Can go ahead and scale anyway cause it doesnt hurt
np.mean(cv_bagging_classifier_scaled)

0.94913428241631392

Grid Search

In [64]:
from sklearn.grid_search import GridSearchCV

decision_tree_classifier = DecisionTreeClassifier()
param_grid = {
    'max_depth' : [None, 1,2,3,4,5],
    'min_samples_split' : [2,10,25,50,100]
}

grid_search_dt = GridSearchCV(decision_tree_classifier,
                             param_grid=param_grid,
                                 cv=5,
                             verbose=10)

In [65]:
# what was the best score you found, gridsearch?
grid_search_dt.fit(df.values, y['malignant'].values)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV] min_samples_split=2, max_depth=None .............................
[CV] .... min_samples_split=2, max_depth=None, score=0.913043 -   0.0s
[CV] min_samples_split=2, max_depth=None .............................
[CV] .... min_samples_split=2, max_depth=None, score=0.895652 -   0.0s
[CV] min_samples_split=2, max_depth=None .............................
[CV] .... min_samples_split=2, max_depth=None, score=0.929204 -   0.0s
[CV] min_samples_split=2, max_depth=None .............................
[CV] .... min_samples_split=2, max_depth=None, score=0.955752 -   0.0s
[CV] min_samples_split=2, max_depth=None .............................
[CV] .... min_samples_split=2, max_depth=None, score=0.893805 -   0.0s
[CV] min_samples_split=10, max_depth=None ............................
[CV] ... min_samples_split=10, max_depth=None, score=0.913043 -   0.0s
[CV] min_samples_split=10, max_depth=None ............................
[CV] ... min_sa

[Parallel(n_jobs=1)]: Done   1 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done   4 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done   7 tasks       | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done  12 tasks       | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done  17 tasks       | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done  24 tasks       | elapsed:    0.2s


[CV] .. min_samples_split=100, max_depth=None, score=0.929204 -   0.0s
[CV] min_samples_split=100, max_depth=None ...........................
[CV] .. min_samples_split=100, max_depth=None, score=0.911504 -   0.0s
[CV] min_samples_split=2, max_depth=1 ................................
[CV] ....... min_samples_split=2, max_depth=1, score=0.878261 -   0.0s
[CV] min_samples_split=2, max_depth=1 ................................
[CV] ....... min_samples_split=2, max_depth=1, score=0.904348 -   0.0s
[CV] min_samples_split=2, max_depth=1 ................................
[CV] ....... min_samples_split=2, max_depth=1, score=0.920354 -   0.0s
[CV] min_samples_split=2, max_depth=1 ................................
[CV] ....... min_samples_split=2, max_depth=1, score=0.902655 -   0.0s
[CV] min_samples_split=2, max_depth=1 ................................
[CV] ....... min_samples_split=2, max_depth=1, score=0.911504 -   0.0s
[CV] min_samples_split=10, max_depth=1 ...............................
[CV] .

[Parallel(n_jobs=1)]: Done  31 tasks       | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done  40 tasks       | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done  60 tasks       | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done  71 tasks       | elapsed:    0.4s


[CV] ...... min_samples_split=50, max_depth=2, score=0.938053 -   0.0s
[CV] min_samples_split=50, max_depth=2 ...............................
[CV] ...... min_samples_split=50, max_depth=2, score=0.929204 -   0.0s
[CV] min_samples_split=50, max_depth=2 ...............................
[CV] ...... min_samples_split=50, max_depth=2, score=0.938053 -   0.0s
[CV] min_samples_split=100, max_depth=2 ..............................
[CV] ..... min_samples_split=100, max_depth=2, score=0.913043 -   0.0s
[CV] min_samples_split=100, max_depth=2 ..............................
[CV] ..... min_samples_split=100, max_depth=2, score=0.921739 -   0.0s
[CV] min_samples_split=100, max_depth=2 ..............................
[CV] ..... min_samples_split=100, max_depth=2, score=0.938053 -   0.0s
[CV] min_samples_split=100, max_depth=2 ..............................
[CV] ..... min_samples_split=100, max_depth=2, score=0.929204 -   0.0s
[CV] min_samples_split=100, max_depth=2 ..............................
[CV] .

[Parallel(n_jobs=1)]: Done  84 tasks       | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done  97 tasks       | elapsed:    0.6s


[CV] ....... min_samples_split=2, max_depth=4, score=0.921739 -   0.0s
[CV] min_samples_split=2, max_depth=4 ................................
[CV] ....... min_samples_split=2, max_depth=4, score=0.939130 -   0.0s
[CV] min_samples_split=2, max_depth=4 ................................
[CV] ....... min_samples_split=2, max_depth=4, score=0.929204 -   0.0s
[CV] min_samples_split=2, max_depth=4 ................................
[CV] ....... min_samples_split=2, max_depth=4, score=0.946903 -   0.0s
[CV] min_samples_split=2, max_depth=4 ................................
[CV] ....... min_samples_split=2, max_depth=4, score=0.902655 -   0.0s
[CV] min_samples_split=10, max_depth=4 ...............................
[CV] ...... min_samples_split=10, max_depth=4, score=0.921739 -   0.0s
[CV] min_samples_split=10, max_depth=4 ...............................
[CV] ...... min_samples_split=10, max_depth=4, score=0.939130 -   0.0s
[CV] min_samples_split=10, max_depth=4 ...............................
[CV] .

[Parallel(n_jobs=1)]: Done 112 tasks       | elapsed:    0.8s
[Parallel(n_jobs=1)]: Done 127 tasks       | elapsed:    0.9s


[CV] ..... min_samples_split=100, max_depth=4, score=0.913043 -   0.0s
[CV] min_samples_split=100, max_depth=4 ..............................
[CV] ..... min_samples_split=100, max_depth=4, score=0.895652 -   0.0s
[CV] min_samples_split=100, max_depth=4 ..............................
[CV] ..... min_samples_split=100, max_depth=4, score=0.920354 -   0.0s
[CV] min_samples_split=100, max_depth=4 ..............................
[CV] ..... min_samples_split=100, max_depth=4, score=0.929204 -   0.0s
[CV] min_samples_split=100, max_depth=4 ..............................
[CV] ..... min_samples_split=100, max_depth=4, score=0.920354 -   0.0s
[CV] min_samples_split=2, max_depth=5 ................................
[CV] ....... min_samples_split=2, max_depth=5, score=0.904348 -   0.0s
[CV] min_samples_split=2, max_depth=5 ................................
[CV] ....... min_samples_split=2, max_depth=5, score=0.939130 -   0.0s
[CV] min_samples_split=2, max_depth=5 ................................
[CV] .

[Parallel(n_jobs=1)]: Done 144 tasks       | elapsed:    1.0s
[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:    1.0s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best'),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'min_samples_split': [2, 10, 25, 50, 100], 'max_depth': [None, 1, 2, 3, 4, 5]},
       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=10)

In [66]:
grid_search_dt.best_score_

0.93145869947275928

In [67]:
grid_search_dt.best_estimator_

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
            min_samples_split=10, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')

In [70]:
bagging_classifier = BaggingClassifier(grid_search_dt.best_estimator_)
param_grid_bagging = {
    'n_estimators' : [4,10,25,50,1000],
    'max_samples' : [0.25, 0.5, 1.0],
    'max_features' : [0.25,0.5,0.75,1.0]
}
grid_search_bagging = GridSearchCV(bagging_classifier,
                             param_grid=param_grid_bagging,
                                 cv=5,
                             verbose=10)

In [71]:
grid_search_bagging.fit(df.values, y['malignant'].values)

Fitting 5 folds for each of 60 candidates, totalling 300 fits
[CV] max_features=0.25, max_samples=0.25, n_estimators=4 .............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=4, score=0.878261 -   0.0s
[CV] max_features=0.25, max_samples=0.25, n_estimators=4 .............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=4, score=0.956522 -   0.0s
[CV] max_features=0.25, max_samples=0.25, n_estimators=4 .............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=4, score=0.938053 -   0.0s
[CV] max_features=0.25, max_samples=0.25, n_estimators=4 .............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=4, score=0.946903 -   0.0s
[CV] max_features=0.25, max_samples=0.25, n_estimators=4 .............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=4, score=0.884956 -   0.0s
[CV] max_features=0.25, max_samples=0.25, n_estimators=10 ............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=10, score=0.939130 -   0.0s
[CV] max_feat

[Parallel(n_jobs=1)]: Done   1 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done   4 tasks       | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done   7 tasks       | elapsed:    0.2s


[CV]  max_features=0.25, max_samples=0.25, n_estimators=10, score=0.946903 -   0.0s
[CV] max_features=0.25, max_samples=0.25, n_estimators=10 ............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=10, score=0.955752 -   0.0s
[CV] max_features=0.25, max_samples=0.25, n_estimators=25 ............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=25, score=0.921739 -   0.1s
[CV] max_features=0.25, max_samples=0.25, n_estimators=25 ............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=25, score=0.930435 -   0.1s
[CV] max_features=0.25, max_samples=0.25, n_estimators=25 ............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=25, score=0.964602 -   0.1s
[CV] max_features=0.25, max_samples=0.25, n_estimators=25 ............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=25, score=0.964602 -   0.1s
[CV] max_features=0.25, max_samples=0.25, n_estimators=25 ............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=25, score=0.97

[Parallel(n_jobs=1)]: Done  12 tasks       | elapsed:    0.4s


[CV]  max_features=0.25, max_samples=0.25, n_estimators=50, score=0.913043 -   0.1s
[CV] max_features=0.25, max_samples=0.25, n_estimators=50 ............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=50, score=0.947826 -   0.1s
[CV] max_features=0.25, max_samples=0.25, n_estimators=50 ............


[Parallel(n_jobs=1)]: Done  17 tasks       | elapsed:    0.9s


[CV]  max_features=0.25, max_samples=0.25, n_estimators=50, score=0.964602 -   0.1s
[CV] max_features=0.25, max_samples=0.25, n_estimators=50 ............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=50, score=0.946903 -   0.1s
[CV] max_features=0.25, max_samples=0.25, n_estimators=50 ............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=50, score=0.964602 -   0.1s
[CV] max_features=0.25, max_samples=0.25, n_estimators=1000 ..........
[CV]  max_features=0.25, max_samples=0.25, n_estimators=1000, score=0.913043 -   2.3s
[CV] max_features=0.25, max_samples=0.25, n_estimators=1000 ..........
[CV]  max_features=0.25, max_samples=0.25, n_estimators=1000, score=0.939130 -   2.2s
[CV] max_features=0.25, max_samples=0.25, n_estimators=1000 ..........
[CV]  max_features=0.25, max_samples=0.25, n_estimators=1000, score=0.982301 -   2.3s
[CV] max_features=0.25, max_samples=0.25, n_estimators=1000 ..........
[CV]  max_features=0.25, max_samples=0.25, n_estimators=1000, sc

[Parallel(n_jobs=1)]: Done  24 tasks       | elapsed:   10.6s


[CV]  max_features=0.25, max_samples=0.25, n_estimators=1000, score=0.964602 -   2.9s
[CV] max_features=0.25, max_samples=0.5, n_estimators=4 ..............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=4, score=0.878261 -   0.0s
[CV] max_features=0.25, max_samples=0.5, n_estimators=4 ..............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=4, score=0.930435 -   0.0s
[CV] max_features=0.25, max_samples=0.5, n_estimators=4 ..............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=4, score=0.946903 -   0.0s
[CV] max_features=0.25, max_samples=0.5, n_estimators=4 ..............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=4, score=0.946903 -   0.0s
[CV] max_features=0.25, max_samples=0.5, n_estimators=4 ..............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=4, score=0.955752 -   0.0s
[CV] max_features=0.25, max_samples=0.5, n_estimators=10 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=10, score=0.913043 -   

[Parallel(n_jobs=1)]: Done  31 tasks       | elapsed:   13.7s


[CV]  max_features=0.25, max_samples=0.5, n_estimators=10, score=0.973451 -   0.0s
[CV] max_features=0.25, max_samples=0.5, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=25, score=0.947826 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=25, score=0.930435 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=25, score=0.973451 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=25, score=0.955752 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=25, score=0.955752 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=50 .............


[Parallel(n_jobs=1)]: Done  40 tasks       | elapsed:   14.2s


[CV]  max_features=0.25, max_samples=0.5, n_estimators=50, score=0.921739 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=50 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=50, score=0.947826 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=50 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=50, score=0.991150 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=50 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=50, score=0.964602 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=50 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=50, score=0.973451 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=1000 ...........
[CV]  max_features=0.25, max_samples=0.5, n_estimators=1000, score=0.930435 -   2.4s
[CV] max_features=0.25, max_samples=0.5, n_estimators=1000 ...........
[CV]  max_features=0.25, max_samples=0.5, n_estimators=1000, score=0.93913

[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:   26.2s


[CV]  max_features=0.25, max_samples=0.5, n_estimators=1000, score=0.964602 -   2.5s
[CV] max_features=0.25, max_samples=1.0, n_estimators=4 ..............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=4, score=0.930435 -   0.0s
[CV] max_features=0.25, max_samples=1.0, n_estimators=4 ..............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=4, score=0.913043 -   0.0s
[CV] max_features=0.25, max_samples=1.0, n_estimators=4 ..............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=4, score=0.938053 -   0.0s
[CV] max_features=0.25, max_samples=1.0, n_estimators=4 ..............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=4, score=0.955752 -   0.0s
[CV] max_features=0.25, max_samples=1.0, n_estimators=4 ..............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=4, score=0.911504 -   0.0s
[CV] max_features=0.25, max_samples=1.0, n_estimators=10 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=10, score=0.921739 -   0

[Parallel(n_jobs=1)]: Done  60 tasks       | elapsed:   28.9s


[CV]  max_features=0.25, max_samples=1.0, n_estimators=25, score=0.930435 -   0.1s
[CV] max_features=0.25, max_samples=1.0, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=25, score=0.982301 -   0.1s
[CV] max_features=0.25, max_samples=1.0, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=25, score=0.964602 -   0.1s
[CV] max_features=0.25, max_samples=1.0, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=25, score=0.973451 -   0.1s
[CV] max_features=0.25, max_samples=1.0, n_estimators=50 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=50, score=0.930435 -   0.1s
[CV] max_features=0.25, max_samples=1.0, n_estimators=50 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=50, score=0.939130 -   0.2s
[CV] max_features=0.25, max_samples=1.0, n_estimators=50 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=50, score=0.973451 - 

[Parallel(n_jobs=1)]: Done  71 tasks       | elapsed:   32.9s


[CV]  max_features=0.25, max_samples=1.0, n_estimators=1000, score=0.939130 -   2.8s
[CV] max_features=0.25, max_samples=1.0, n_estimators=1000 ...........
[CV]  max_features=0.25, max_samples=1.0, n_estimators=1000, score=0.973451 -   2.8s
[CV] max_features=0.25, max_samples=1.0, n_estimators=1000 ...........
[CV]  max_features=0.25, max_samples=1.0, n_estimators=1000, score=0.955752 -   2.7s
[CV] max_features=0.25, max_samples=1.0, n_estimators=1000 ...........
[CV]  max_features=0.25, max_samples=1.0, n_estimators=1000, score=0.964602 -   2.7s
[CV] max_features=0.5, max_samples=0.25, n_estimators=4 ..............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=4, score=0.860870 -   0.0s
[CV] max_features=0.5, max_samples=0.25, n_estimators=4 ..............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=4, score=0.930435 -   0.0s
[CV] max_features=0.5, max_samples=0.25, n_estimators=4 ..............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=4, score=0.9292

[Parallel(n_jobs=1)]: Done  84 tasks       | elapsed:   44.2s


[CV]  max_features=0.5, max_samples=0.25, n_estimators=10, score=0.973451 -   0.0s
[CV] max_features=0.5, max_samples=0.25, n_estimators=25 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=25, score=0.921739 -   0.1s
[CV] max_features=0.5, max_samples=0.25, n_estimators=25 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=25, score=0.939130 -   0.1s
[CV] max_features=0.5, max_samples=0.25, n_estimators=25 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=25, score=0.973451 -   0.1s
[CV] max_features=0.5, max_samples=0.25, n_estimators=25 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=25, score=0.938053 -   0.1s
[CV] max_features=0.5, max_samples=0.25, n_estimators=25 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=25, score=0.964602 -   0.1s
[CV] max_features=0.5, max_samples=0.25, n_estimators=50 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=50, score=0.947826 - 

[Parallel(n_jobs=1)]: Done  97 tasks       | elapsed:   50.2s


[CV]  max_features=0.5, max_samples=0.25, n_estimators=1000, score=0.982301 -   2.4s
[CV] max_features=0.5, max_samples=0.25, n_estimators=1000 ...........
[CV]  max_features=0.5, max_samples=0.25, n_estimators=1000, score=0.955752 -   2.5s
[CV] max_features=0.5, max_samples=0.25, n_estimators=1000 ...........
[CV]  max_features=0.5, max_samples=0.25, n_estimators=1000, score=0.964602 -   3.6s
[CV] max_features=0.5, max_samples=0.5, n_estimators=4 ...............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=4, score=0.913043 -   0.0s
[CV] max_features=0.5, max_samples=0.5, n_estimators=4 ...............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=4, score=0.921739 -   0.0s
[CV] max_features=0.5, max_samples=0.5, n_estimators=4 ...............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=4, score=0.955752 -   0.0s
[CV] max_features=0.5, max_samples=0.5, n_estimators=4 ...............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=4, score=0.964602 -   

[Parallel(n_jobs=1)]: Done 112 tasks       | elapsed:   59.6s


[CV]  max_features=0.5, max_samples=0.5, n_estimators=25, score=0.964602 -   0.2s
[CV] max_features=0.5, max_samples=0.5, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=25, score=0.955752 -   0.2s
[CV] max_features=0.5, max_samples=0.5, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=25, score=0.955752 -   0.1s
[CV] max_features=0.5, max_samples=0.5, n_estimators=50 ..............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=50, score=0.939130 -   0.3s
[CV] max_features=0.5, max_samples=0.5, n_estimators=50 ..............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=50, score=0.939130 -   0.3s
[CV] max_features=0.5, max_samples=0.5, n_estimators=50 ..............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=50, score=0.973451 -   0.3s
[CV] max_features=0.5, max_samples=0.5, n_estimators=50 ..............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=50, score=0.955752 -   0.3s


[Parallel(n_jobs=1)]: Done 127 tasks       | elapsed:  1.3min


[CV]  max_features=0.5, max_samples=1.0, n_estimators=10, score=0.964602 -   0.0s
[CV] max_features=0.5, max_samples=1.0, n_estimators=10 ..............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=10, score=0.938053 -   0.0s
[CV] max_features=0.5, max_samples=1.0, n_estimators=10 ..............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=10, score=0.973451 -   0.0s
[CV] max_features=0.5, max_samples=1.0, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=25, score=0.939130 -   0.1s
[CV] max_features=0.5, max_samples=1.0, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=25, score=0.930435 -   0.1s
[CV] max_features=0.5, max_samples=1.0, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=25, score=0.982301 -   0.1s
[CV] max_features=0.5, max_samples=1.0, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=25, score=0.973451 -   0.1s


[Parallel(n_jobs=1)]: Done 144 tasks       | elapsed:  1.3min


[CV]  max_features=0.5, max_samples=1.0, n_estimators=50, score=0.964602 -   0.2s
[CV] max_features=0.5, max_samples=1.0, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=1000, score=0.939130 -   3.5s
[CV] max_features=0.5, max_samples=1.0, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=1000, score=0.947826 -   3.4s
[CV] max_features=0.5, max_samples=1.0, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=1000, score=0.982301 -   3.5s
[CV] max_features=0.5, max_samples=1.0, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=1000, score=0.964602 -   5.0s
[CV] max_features=0.5, max_samples=1.0, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=1000, score=0.964602 -   3.5s
[CV] max_features=0.75, max_samples=0.25, n_estimators=4 .............
[CV]  max_features=0.75, max_samples=0.25, n_estimators=4, score=0.89565

[Parallel(n_jobs=1)]: Done 161 tasks       | elapsed:  1.6min


[CV]  max_features=0.75, max_samples=0.25, n_estimators=25, score=0.930435 -   0.1s
[CV] max_features=0.75, max_samples=0.25, n_estimators=25 ............
[CV]  max_features=0.75, max_samples=0.25, n_estimators=25, score=0.964602 -   0.1s
[CV] max_features=0.75, max_samples=0.25, n_estimators=25 ............
[CV]  max_features=0.75, max_samples=0.25, n_estimators=25, score=0.946903 -   0.1s
[CV] max_features=0.75, max_samples=0.25, n_estimators=25 ............
[CV]  max_features=0.75, max_samples=0.25, n_estimators=25, score=0.973451 -   0.1s
[CV] max_features=0.75, max_samples=0.25, n_estimators=50 ............
[CV]  max_features=0.75, max_samples=0.25, n_estimators=50, score=0.921739 -   0.1s
[CV] max_features=0.75, max_samples=0.25, n_estimators=50 ............
[CV]  max_features=0.75, max_samples=0.25, n_estimators=50, score=0.947826 -   0.1s
[CV] max_features=0.75, max_samples=0.25, n_estimators=50 ............
[CV]  max_features=0.75, max_samples=0.25, n_estimators=50, score=0.96

[Parallel(n_jobs=1)]: Done 180 tasks       | elapsed:  1.9min


[CV]  max_features=0.75, max_samples=0.5, n_estimators=10, score=0.946903 -   0.0s
[CV] max_features=0.75, max_samples=0.5, n_estimators=10 .............
[CV]  max_features=0.75, max_samples=0.5, n_estimators=10, score=0.955752 -   0.0s
[CV] max_features=0.75, max_samples=0.5, n_estimators=10 .............
[CV]  max_features=0.75, max_samples=0.5, n_estimators=10, score=0.973451 -   0.0s
[CV] max_features=0.75, max_samples=0.5, n_estimators=25 .............
[CV]  max_features=0.75, max_samples=0.5, n_estimators=25, score=0.939130 -   0.1s
[CV] max_features=0.75, max_samples=0.5, n_estimators=25 .............
[CV]  max_features=0.75, max_samples=0.5, n_estimators=25, score=0.947826 -   0.2s
[CV] max_features=0.75, max_samples=0.5, n_estimators=25 .............
[CV]  max_features=0.75, max_samples=0.5, n_estimators=25, score=0.982301 -   0.1s
[CV] max_features=0.75, max_samples=0.5, n_estimators=25 .............
[CV]  max_features=0.75, max_samples=0.5, n_estimators=25, score=0.946903 - 

[Parallel(n_jobs=1)]: Done 199 tasks       | elapsed:  2.1min


[CV]  max_features=0.75, max_samples=0.5, n_estimators=1000, score=0.964602 -   3.2s
[CV] max_features=0.75, max_samples=1.0, n_estimators=4 ..............
[CV]  max_features=0.75, max_samples=1.0, n_estimators=4, score=0.939130 -   0.0s
[CV] max_features=0.75, max_samples=1.0, n_estimators=4 ..............
[CV]  max_features=0.75, max_samples=1.0, n_estimators=4, score=0.930435 -   0.0s
[CV] max_features=0.75, max_samples=1.0, n_estimators=4 ..............
[CV]  max_features=0.75, max_samples=1.0, n_estimators=4, score=0.973451 -   0.0s
[CV] max_features=0.75, max_samples=1.0, n_estimators=4 ..............
[CV]  max_features=0.75, max_samples=1.0, n_estimators=4, score=0.929204 -   0.0s
[CV] max_features=0.75, max_samples=1.0, n_estimators=4 ..............
[CV]  max_features=0.75, max_samples=1.0, n_estimators=4, score=0.955752 -   0.0s
[CV] max_features=0.75, max_samples=1.0, n_estimators=10 .............
[CV]  max_features=0.75, max_samples=1.0, n_estimators=10, score=0.913043 -   0

[Parallel(n_jobs=1)]: Done 220 tasks       | elapsed:  2.2min


[CV]  max_features=0.75, max_samples=1.0, n_estimators=1000, score=0.913043 -   4.1s
[CV] max_features=0.75, max_samples=1.0, n_estimators=1000 ...........
[CV]  max_features=0.75, max_samples=1.0, n_estimators=1000, score=0.947826 -   5.7s
[CV] max_features=0.75, max_samples=1.0, n_estimators=1000 ...........
[CV]  max_features=0.75, max_samples=1.0, n_estimators=1000, score=0.982301 -   4.5s
[CV] max_features=0.75, max_samples=1.0, n_estimators=1000 ...........
[CV]  max_features=0.75, max_samples=1.0, n_estimators=1000, score=0.964602 -   4.5s
[CV] max_features=0.75, max_samples=1.0, n_estimators=1000 ...........
[CV]  max_features=0.75, max_samples=1.0, n_estimators=1000, score=0.964602 -   4.3s
[CV] max_features=1.0, max_samples=0.25, n_estimators=4 ..............
[CV]  max_features=1.0, max_samples=0.25, n_estimators=4, score=0.921739 -   0.0s
[CV] max_features=1.0, max_samples=0.25, n_estimators=4 ..............
[CV]  max_features=1.0, max_samples=0.25, n_estimators=4, score=0.9

[Parallel(n_jobs=1)]: Done 241 tasks       | elapsed:  2.6min


[CV]  max_features=1.0, max_samples=0.25, n_estimators=50, score=0.947826 -   0.1s
[CV] max_features=1.0, max_samples=0.25, n_estimators=50 .............
[CV]  max_features=1.0, max_samples=0.25, n_estimators=50, score=0.982301 -   0.1s
[CV] max_features=1.0, max_samples=0.25, n_estimators=50 .............
[CV]  max_features=1.0, max_samples=0.25, n_estimators=50, score=0.973451 -   0.2s
[CV] max_features=1.0, max_samples=0.25, n_estimators=50 .............
[CV]  max_features=1.0, max_samples=0.25, n_estimators=50, score=0.964602 -   0.1s
[CV] max_features=1.0, max_samples=0.25, n_estimators=1000 ...........
[CV]  max_features=1.0, max_samples=0.25, n_estimators=1000, score=0.904348 -   3.3s
[CV] max_features=1.0, max_samples=0.25, n_estimators=1000 ...........
[CV]  max_features=1.0, max_samples=0.25, n_estimators=1000, score=0.947826 -   3.1s
[CV] max_features=1.0, max_samples=0.25, n_estimators=1000 ...........
[CV]  max_features=1.0, max_samples=0.25, n_estimators=1000, score=0.982

[Parallel(n_jobs=1)]: Done 264 tasks       | elapsed:  2.9min


[CV]  max_features=1.0, max_samples=0.5, n_estimators=25, score=0.964602 -   0.1s
[CV] max_features=1.0, max_samples=0.5, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=50, score=0.904348 -   0.2s
[CV] max_features=1.0, max_samples=0.5, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=50, score=0.947826 -   0.2s
[CV] max_features=1.0, max_samples=0.5, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=50, score=0.982301 -   0.2s
[CV] max_features=1.0, max_samples=0.5, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=50, score=0.964602 -   0.2s
[CV] max_features=1.0, max_samples=0.5, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=50, score=0.982301 -   0.2s
[CV] max_features=1.0, max_samples=0.5, n_estimators=1000 ............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=1000, score=0.913043 -   3.5

[Parallel(n_jobs=1)]: Done 287 tasks       | elapsed:  3.2min


[CV]  max_features=1.0, max_samples=1.0, n_estimators=25, score=0.982301 -   0.1s
[CV] max_features=1.0, max_samples=1.0, n_estimators=25 ..............
[CV]  max_features=1.0, max_samples=1.0, n_estimators=25, score=0.955752 -   0.1s
[CV] max_features=1.0, max_samples=1.0, n_estimators=25 ..............
[CV]  max_features=1.0, max_samples=1.0, n_estimators=25, score=0.973451 -   0.1s
[CV] max_features=1.0, max_samples=1.0, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=1.0, n_estimators=50, score=0.913043 -   0.2s
[CV] max_features=1.0, max_samples=1.0, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=1.0, n_estimators=50, score=0.947826 -   0.3s
[CV] max_features=1.0, max_samples=1.0, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=1.0, n_estimators=50, score=0.982301 -   0.2s
[CV] max_features=1.0, max_samples=1.0, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=1.0, n_estimators=50, score=0.964602 -   0.2s


[Parallel(n_jobs=1)]: Done 300 out of 300 | elapsed:  3.7min finished


GridSearchCV(cv=5, error_score='raise',
       estimator=BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
            min_samples_split=10, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, sp...n_estimators=10, n_jobs=1, oob_score=False,
         random_state=None, verbose=0, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'n_estimators': [4, 10, 25, 50, 1000], 'max_samples': [0.25, 0.5, 1.0], 'max_features': [0.25, 0.5, 0.75, 1.0]},
       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=10)

In [72]:
grid_search_bagging.best_score_

0.961335676625659

In [73]:
grid_search_bagging.best_estimator_

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
            min_samples_split=10, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=0.75,
         max_samples=0.5, n_estimators=50, n_jobs=1, oob_score=False,
         random_state=None, verbose=0, warm_start=False)

In [75]:
from sklearn.cross_validation import train_test_split

combined_df = df.join(y)
combined_df

x_train, x_test, y_train, y_test = train_test_split(df, y['malignant'])

In [76]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((426, 30), (143, 30), (426,), (143,))

In [79]:
bagging_classifier = BaggingClassifier(grid_search_dt.best_estimator_)
param_grid_bagging = {
    'n_estimators' : [4,10,25,50,1000],
    'max_samples' : [0.25, 0.5, 1.0],
    'max_features' : [0.25,0.5,0.75,1.0]
}
grid_search_bagging = GridSearchCV(bagging_classifier,
                             param_grid=param_grid_bagging,
                                 cv=5,
                             verbose=10,
                                  n_jobs=-1)

In [80]:
grid_search_bagging.fit(x_train, y_train)

Fitting 5 folds for each of 60 candidates, totalling 300 fits
[CV] max_features=0.25, max_samples=0.25, n_estimators=4 .............
[CV] max_features=0.25, max_samples=0.25, n_estimators=4 .............
[CV] max_features=0.25, max_samples=0.25, n_estimators=4 .............
[CV] max_features=0.25, max_samples=0.25, n_estimators=4 .............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=4, score=0.895349 -   0.0s
[CV] max_features=0.25, max_samples=0.25, n_estimators=4 .............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=4, score=0.941860 -   0.0s
[CV] max_features=0.25, max_samples=0.25, n_estimators=10 ............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=4, score=0.941176 -   0.1s
[CV]  max_features=0.25, max_samples=0.25, n_estimators=4, score=0.905882 -   0.1s
[CV] max_features=0.25, max_samples=0.25, n_estimators=10 ............
[CV] max_features=0.25, max_samples=0.25, n_estimators=10 ............
[CV]  max_features=0.25, max_samples=0

[Parallel(n_jobs=-1)]: Batch computation too fast (0.0999s.) Setting batch_size=4.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    0.3s


[CV] max_features=0.25, max_samples=0.25, n_estimators=25 ............
[CV] max_features=0.25, max_samples=0.25, n_estimators=1000 ..........
[CV]  max_features=0.25, max_samples=0.25, n_estimators=10, score=0.976471 -   0.1s
[CV] max_features=0.25, max_samples=0.25, n_estimators=10 ............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=10, score=0.964286 -   0.0s
[CV]  max_features=0.25, max_samples=0.25, n_estimators=25, score=0.952941 -   0.1s
[CV] max_features=0.25, max_samples=0.25, n_estimators=25 ............
[CV] max_features=0.25, max_samples=0.25, n_estimators=25 ............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=25, score=0.952941 -   0.2s
[CV]  max_features=0.25, max_samples=0.25, n_estimators=25, score=0.918605 -   0.2s
[CV]  max_features=0.25, max_samples=0.25, n_estimators=50, score=0.941860 -   0.4s
[CV] max_features=0.25, max_samples=0.25, n_estimators=50 ............
[CV] max_features=0.25, max_samples=0.25, n_estimators=25 ............

[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    1.2s


[CV]  max_features=0.25, max_samples=0.5, n_estimators=10, score=0.941860 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=10 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=10, score=0.976744 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=10 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=10, score=0.905882 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=10 .............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=50, score=0.952381 -   0.5s
[CV] max_features=0.25, max_samples=0.5, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=10, score=0.952941 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=10 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=10, score=0.940476 -   0.1s
[CV] max_features=0.25, max_samples=0.5, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=0.5, n_estimators=25, score=0.953488 -

[Parallel(n_jobs=-1)]: Batch computation too slow (2.76s.) Setting batch_size=2.


[CV]  max_features=0.25, max_samples=1.0, n_estimators=10, score=0.906977 -   0.0s
[CV] max_features=0.25, max_samples=1.0, n_estimators=10 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=10, score=0.953488 -   0.0s
[CV] max_features=0.25, max_samples=1.0, n_estimators=10 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=10, score=0.917647 -   0.1s
[CV] max_features=0.25, max_samples=1.0, n_estimators=10 .............


[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed:    7.4s


[CV]  max_features=0.25, max_samples=1.0, n_estimators=10, score=0.952941 -   0.1s
[CV] max_features=0.25, max_samples=1.0, n_estimators=10 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=10, score=0.964286 -   0.0s
[CV] max_features=0.25, max_samples=1.0, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=25, score=0.918605 -   0.2s
[CV] max_features=0.25, max_samples=1.0, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=25, score=0.965116 -   0.2s
[CV] max_features=0.25, max_samples=1.0, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=25, score=0.905882 -   0.2s
[CV] max_features=0.25, max_samples=1.0, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=25, score=0.941176 -   0.2s
[CV] max_features=0.25, max_samples=1.0, n_estimators=25 .............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=25, score=0.964286 - 

[Parallel(n_jobs=-1)]: Batch computation too slow (3.14s.) Setting batch_size=1.


[CV]  max_features=0.25, max_samples=0.25, n_estimators=1000, score=0.953488 -   5.1s
[CV] max_features=0.25, max_samples=0.25, n_estimators=1000 ..........
[CV]  max_features=0.25, max_samples=0.5, n_estimators=1000, score=0.953488 -   4.4s
[CV] max_features=0.25, max_samples=0.5, n_estimators=1000 ...........
[CV]  max_features=0.25, max_samples=0.5, n_estimators=1000, score=0.964286 -   4.2s
[CV] max_features=0.25, max_samples=1.0, n_estimators=4 ..............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=4, score=0.895349 -   0.0s
[CV] max_features=0.25, max_samples=1.0, n_estimators=4 ..............
[CV]  max_features=0.25, max_samples=1.0, n_estimators=4, score=0.965116 -   0.0s
[CV] max_features=0.25, max_samples=1.0, n_estimators=1000 ...........
[CV]  max_features=0.25, max_samples=1.0, n_estimators=1000, score=0.930233 -   4.4s
[CV] max_features=0.25, max_samples=1.0, n_estimators=1000 ...........
[CV]  max_features=0.25, max_samples=0.25, n_estimators=1000, score=0

[Parallel(n_jobs=-1)]: Done  68 tasks      | elapsed:   19.1s


[CV]  max_features=0.5, max_samples=0.25, n_estimators=4, score=0.905882 -   0.0s
[CV] max_features=0.5, max_samples=0.25, n_estimators=4 ..............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=4, score=0.928571 -   0.0s
[CV] max_features=0.5, max_samples=0.25, n_estimators=10 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=10, score=0.906977 -   0.0s
[CV] max_features=0.5, max_samples=0.25, n_estimators=10 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=10, score=0.953488 -   0.0s
[CV] max_features=0.5, max_samples=0.25, n_estimators=10 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=10, score=0.917647 -   0.0s
[CV] max_features=0.5, max_samples=0.25, n_estimators=10 .............
[CV]  max_features=0.25, max_samples=0.25, n_estimators=1000, score=0.952941 -   4.0s
[CV]  max_features=0.5, max_samples=0.25, n_estimators=10, score=0.905882 -   0.0s
[CV] max_features=0.5, max_samples=0.25, n_estimators=10 ......

[Parallel(n_jobs=-1)]: Done  81 tasks      | elapsed:   19.8s


[CV]  max_features=0.5, max_samples=0.25, n_estimators=25, score=0.941176 -   0.1s
[CV] max_features=0.5, max_samples=0.25, n_estimators=25 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=25, score=0.941176 -   0.2s
[CV] max_features=0.5, max_samples=0.25, n_estimators=50 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=25, score=0.940476 -   0.1s
[CV] max_features=0.5, max_samples=0.25, n_estimators=50 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=50, score=0.883721 -   0.3s
[CV] max_features=0.5, max_samples=0.25, n_estimators=50 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=50, score=0.965116 -   0.3s
[CV] max_features=0.5, max_samples=0.25, n_estimators=50 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=50, score=0.929412 -   0.3s
[CV] max_features=0.5, max_samples=0.25, n_estimators=50 .............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=50, score=0.941176 - 

[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:   20.8s


[CV]  max_features=0.25, max_samples=1.0, n_estimators=1000, score=0.964286 -   5.0s
[CV] max_features=0.5, max_samples=0.25, n_estimators=4 ..............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=4, score=0.895349 -   0.0s
[CV] max_features=0.5, max_samples=0.25, n_estimators=1000 ...........
[CV]  max_features=0.25, max_samples=1.0, n_estimators=1000, score=0.964706 -   5.1s
[CV] max_features=0.5, max_samples=0.25, n_estimators=1000 ...........
[CV]  max_features=0.5, max_samples=0.25, n_estimators=1000, score=0.918605 -   4.3s
[CV] max_features=0.5, max_samples=0.25, n_estimators=1000 ...........
[CV]  max_features=0.5, max_samples=0.25, n_estimators=1000, score=0.953488 -   4.3s
[CV] max_features=0.5, max_samples=0.5, n_estimators=4 ...............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=4, score=0.895349 -   0.0s
[CV] max_features=0.5, max_samples=0.5, n_estimators=4 ...............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=4, score=0.965116

[Parallel(n_jobs=-1)]: Done 103 tasks      | elapsed:   25.8s


[CV] max_features=0.5, max_samples=0.5, n_estimators=10 ..............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=10, score=0.929412 -   0.1s
[CV] max_features=0.5, max_samples=0.5, n_estimators=10 ..............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=10, score=0.952381 -   0.1s
[CV] max_features=0.5, max_samples=0.5, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=25, score=0.918605 -   0.1s
[CV] max_features=0.5, max_samples=0.5, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=25, score=0.965116 -   0.1s
[CV] max_features=0.5, max_samples=0.5, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=25, score=0.941176 -   0.1s
[CV] max_features=0.5, max_samples=0.5, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=25, score=0.941176 -   0.1s
[CV] max_features=0.5, max_samples=0.5, n_estimators=25 ..............
[CV]  max_f

[Parallel(n_jobs=-1)]: Done 114 tasks      | elapsed:   27.3s


[CV]  max_features=0.5, max_samples=0.5, n_estimators=50, score=0.952941 -   0.3s
[CV]  max_features=0.5, max_samples=0.5, n_estimators=50, score=0.952941 -   0.3s
[CV] max_features=0.5, max_samples=0.5, n_estimators=50 ..............
[CV] max_features=0.5, max_samples=0.5, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=50, score=0.976190 -   0.3s
[CV] max_features=0.5, max_samples=0.5, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=1000, score=0.941176 -   5.0s
[CV] max_features=0.5, max_samples=0.5, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=0.25, n_estimators=1000, score=0.964286 -   4.9s
[CV] max_features=0.5, max_samples=0.5, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=1000, score=0.918605 -   4.7s
[CV] max_features=0.5, max_samples=0.5, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=1000, score=0.965116

[Parallel(n_jobs=-1)]: Done 127 tasks      | elapsed:   33.1s


[CV]  max_features=0.5, max_samples=1.0, n_estimators=10, score=0.917647 -   0.1s
[CV] max_features=0.5, max_samples=1.0, n_estimators=10 ..............
[CV] max_features=0.5, max_samples=1.0, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=10, score=0.964286 -   0.1s
[CV] max_features=0.5, max_samples=1.0, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=25, score=0.930233 -   0.2s
[CV] max_features=0.5, max_samples=1.0, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=25, score=0.976744 -   0.2s
[CV] max_features=0.5, max_samples=1.0, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=25, score=0.929412 -   0.2s
[CV] max_features=0.5, max_samples=1.0, n_estimators=25 ..............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=25, score=0.929412 -   0.2s
[CV] max_features=0.5, max_samples=1.0, n_estimators=50 ..............
[CV]  max_f

[Parallel(n_jobs=-1)]: Done 140 tasks      | elapsed:   34.3s


[CV]  max_features=0.5, max_samples=1.0, n_estimators=50, score=0.952941 -   0.4s
[CV] max_features=0.5, max_samples=1.0, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=50, score=0.964286 -   0.3s
[CV] max_features=0.5, max_samples=1.0, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=1000, score=0.941176 -   5.2s
[CV] max_features=0.5, max_samples=1.0, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=0.5, n_estimators=1000, score=0.964286 -   5.1s
[CV] max_features=0.5, max_samples=1.0, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=1000, score=0.930233 -   5.3s
[CV] max_features=0.5, max_samples=1.0, n_estimators=1000 ............
[CV]  max_features=0.5, max_samples=1.0, n_estimators=1000, score=0.965116 -   5.3s
[CV] max_features=0.75, max_samples=0.25, n_estimators=4 .............
[CV]  max_features=0.75, max_samples=0.25, n_estimators=4, score=0.906977 

[Parallel(n_jobs=-1)]: Done 155 tasks      | elapsed:   40.9s


[CV]  max_features=0.75, max_samples=0.25, n_estimators=25, score=0.941860 -   0.1s
[CV]  max_features=0.75, max_samples=0.25, n_estimators=25, score=0.953488 -   0.1s
[CV] max_features=0.75, max_samples=0.25, n_estimators=25 ............
[CV] max_features=0.75, max_samples=0.25, n_estimators=25 ............
[CV]  max_features=0.75, max_samples=0.25, n_estimators=25, score=0.941176 -   0.1s
[CV] max_features=0.75, max_samples=0.25, n_estimators=25 ............
[CV]  max_features=0.75, max_samples=0.25, n_estimators=25, score=0.941176 -   0.2s
[CV] max_features=0.75, max_samples=0.25, n_estimators=50 ............
[CV]  max_features=0.75, max_samples=0.25, n_estimators=25, score=0.964286 -   0.1s
[CV] max_features=0.75, max_samples=0.25, n_estimators=50 ............
[CV]  max_features=0.75, max_samples=0.25, n_estimators=50, score=0.906977 -   0.2s
[CV] max_features=0.75, max_samples=0.25, n_estimators=50 ............
[CV]  max_features=0.75, max_samples=0.25, n_estimators=50, score=0.96

[Parallel(n_jobs=-1)]: Done 170 tasks      | elapsed:   45.8s


[CV]  max_features=0.75, max_samples=0.25, n_estimators=1000, score=0.918605 -   4.3s
[CV] max_features=0.75, max_samples=0.25, n_estimators=1000 ..........
[CV]  max_features=0.75, max_samples=0.25, n_estimators=1000, score=0.965116 -   4.3s
[CV] max_features=0.75, max_samples=0.5, n_estimators=4 ..............
[CV]  max_features=0.75, max_samples=0.5, n_estimators=4, score=0.918605 -   0.0s
[CV] max_features=0.75, max_samples=0.5, n_estimators=4 ..............
[CV]  max_features=0.75, max_samples=0.5, n_estimators=4, score=0.965116 -   0.0s
[CV] max_features=0.75, max_samples=0.5, n_estimators=4 ..............
[CV]  max_features=0.75, max_samples=0.5, n_estimators=4, score=0.941176 -   0.0s
[CV] max_features=0.75, max_samples=0.5, n_estimators=4 ..............
[CV]  max_features=0.75, max_samples=0.5, n_estimators=4, score=0.929412 -   0.0s
[CV] max_features=0.75, max_samples=0.5, n_estimators=4 ..............
[CV]  max_features=0.75, max_samples=0.5, n_estimators=4, score=0.964286 -

[Parallel(n_jobs=-1)]: Done 187 tasks      | elapsed:   48.1s


[CV]  max_features=0.75, max_samples=0.5, n_estimators=50, score=0.965116 -   0.3s
[CV]  max_features=0.75, max_samples=0.5, n_estimators=50, score=0.918605 -   0.3s
[CV] max_features=0.75, max_samples=0.5, n_estimators=50 .............
[CV] max_features=0.75, max_samples=0.5, n_estimators=50 .............
[CV]  max_features=0.75, max_samples=0.5, n_estimators=50, score=0.929412 -   0.2s
[CV] max_features=0.75, max_samples=0.5, n_estimators=50 .............
[CV]  max_features=0.75, max_samples=0.5, n_estimators=50, score=0.941176 -   0.3s
[CV] max_features=0.75, max_samples=0.5, n_estimators=1000 ...........
[CV]  max_features=0.75, max_samples=0.5, n_estimators=50, score=0.964286 -   0.3s
[CV] max_features=0.75, max_samples=0.5, n_estimators=1000 ...........
[CV]  max_features=0.75, max_samples=0.25, n_estimators=1000, score=0.941176 -   4.9s
[CV] max_features=0.75, max_samples=0.5, n_estimators=1000 ...........
[CV]  max_features=0.75, max_samples=0.25, n_estimators=1000, score=0.964

[Parallel(n_jobs=-1)]: Done 204 tasks      | elapsed:   54.8s


[CV] max_features=0.75, max_samples=1.0, n_estimators=10 .............
[CV]  max_features=0.75, max_samples=1.0, n_estimators=10, score=0.952381 -   0.1s
[CV] max_features=0.75, max_samples=1.0, n_estimators=25 .............
[CV]  max_features=0.75, max_samples=1.0, n_estimators=25, score=0.918605 -   0.2s
[CV] max_features=0.75, max_samples=1.0, n_estimators=25 .............
[CV]  max_features=0.75, max_samples=1.0, n_estimators=25, score=0.965116 -   0.2s
[CV] max_features=0.75, max_samples=1.0, n_estimators=25 .............
[CV]  max_features=0.75, max_samples=1.0, n_estimators=25, score=0.929412 -   0.2s
[CV] max_features=0.75, max_samples=1.0, n_estimators=25 .............
[CV]  max_features=0.75, max_samples=1.0, n_estimators=25, score=0.929412 -   0.2s
[CV] max_features=0.75, max_samples=1.0, n_estimators=25 .............
[CV]  max_features=0.75, max_samples=1.0, n_estimators=25, score=0.976190 -   0.2s
[CV] max_features=0.75, max_samples=1.0, n_estimators=50 .............
[CV] 

[Parallel(n_jobs=-1)]: Done 223 tasks      | elapsed:  1.1min


[CV]  max_features=1.0, max_samples=0.25, n_estimators=4, score=0.894118 -   0.0s
[CV] max_features=1.0, max_samples=0.25, n_estimators=4 ..............
[CV]  max_features=1.0, max_samples=0.25, n_estimators=4, score=0.952381 -   0.0s
[CV] max_features=1.0, max_samples=0.25, n_estimators=10 .............
[CV]  max_features=0.75, max_samples=1.0, n_estimators=1000, score=0.929412 -   6.2s
[CV]  max_features=1.0, max_samples=0.25, n_estimators=10, score=0.918605 -   0.1s
[CV] max_features=1.0, max_samples=0.25, n_estimators=10 .............
[CV] max_features=1.0, max_samples=0.25, n_estimators=10 .............
[CV]  max_features=1.0, max_samples=0.25, n_estimators=10, score=0.917647 -   0.0s
[CV]  max_features=1.0, max_samples=0.25, n_estimators=10, score=0.965116 -   0.1s
[CV] max_features=1.0, max_samples=0.25, n_estimators=10 .............
[CV] max_features=1.0, max_samples=0.25, n_estimators=10 .............
[CV]  max_features=1.0, max_samples=0.25, n_estimators=10, score=0.952941 - 

[Parallel(n_jobs=-1)]: Done 242 tasks      | elapsed:  1.1min


[CV]  max_features=0.75, max_samples=1.0, n_estimators=1000, score=0.952941 -   6.3s
[CV] max_features=1.0, max_samples=0.25, n_estimators=1000 ...........
[CV]  max_features=1.0, max_samples=0.25, n_estimators=1000, score=0.965116 -   4.3s
[CV] max_features=1.0, max_samples=0.25, n_estimators=1000 ...........
[CV]  max_features=1.0, max_samples=0.25, n_estimators=1000, score=0.918605 -   4.5s
[CV] max_features=1.0, max_samples=0.25, n_estimators=1000 ...........
[CV]  max_features=0.75, max_samples=1.0, n_estimators=1000, score=0.976190 -   6.5s
[CV] max_features=1.0, max_samples=0.5, n_estimators=4 ...............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=4, score=0.918605 -   0.0s
[CV] max_features=1.0, max_samples=0.5, n_estimators=4 ...............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=4, score=0.941860 -   0.0s
[CV] max_features=1.0, max_samples=0.5, n_estimators=4 ...............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=4, score=0.941176 

[Parallel(n_jobs=-1)]: Done 263 tasks      | elapsed:  1.2min


[CV]  max_features=1.0, max_samples=0.5, n_estimators=50, score=0.965116 -   0.3s
[CV] max_features=1.0, max_samples=0.5, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=50, score=0.929412 -   0.3s
[CV] max_features=1.0, max_samples=0.5, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=50, score=0.941176 -   0.3s
[CV] max_features=1.0, max_samples=0.5, n_estimators=1000 ............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=50, score=0.964286 -   0.3s
[CV] max_features=1.0, max_samples=0.5, n_estimators=1000 ............
[CV]  max_features=1.0, max_samples=0.25, n_estimators=1000, score=0.941176 -   6.0s
[CV] max_features=1.0, max_samples=0.5, n_estimators=1000 ............
[CV]  max_features=1.0, max_samples=0.25, n_estimators=1000, score=0.976190 -   6.1s
[CV] max_features=1.0, max_samples=0.5, n_estimators=1000 ............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=1000, score=0.953488 -

[Parallel(n_jobs=-1)]: Done 284 tasks      | elapsed:  1.4min


[CV]  max_features=1.0, max_samples=1.0, n_estimators=25, score=0.894118 -   0.2s
[CV] max_features=1.0, max_samples=1.0, n_estimators=25 ..............
[CV]  max_features=1.0, max_samples=1.0, n_estimators=25, score=0.929412 -   0.2s
[CV] max_features=1.0, max_samples=1.0, n_estimators=25 ..............
[CV]  max_features=1.0, max_samples=1.0, n_estimators=25, score=0.976190 -   0.3s
[CV] max_features=1.0, max_samples=1.0, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=1.0, n_estimators=50, score=0.930233 -   0.3s
[CV] max_features=1.0, max_samples=1.0, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=1.0, n_estimators=50, score=0.941860 -   0.4s
[CV] max_features=1.0, max_samples=1.0, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=1000, score=0.941176 -   7.8s
[CV] max_features=1.0, max_samples=1.0, n_estimators=50 ..............
[CV]  max_features=1.0, max_samples=0.5, n_estimators=1000, score=0.941176 -   7

[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:  1.6min finished


GridSearchCV(cv=5, error_score='raise',
       estimator=BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
            min_samples_split=10, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, sp...n_estimators=10, n_jobs=1, oob_score=False,
         random_state=None, verbose=0, warm_start=False),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid={'n_estimators': [4, 10, 25, 50, 1000], 'max_samples': [0.25, 0.5, 1.0], 'max_features': [0.25, 0.5, 0.75, 1.0]},
       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=10)

In [81]:
grid_search_bagging.best_score_

0.95305164319248825

In [82]:
best_estimator = grid_search_bagging.best_estimator_

In [83]:
best_estimator

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
            min_samples_split=10, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=0.25,
         max_samples=1.0, n_estimators=50, n_jobs=1, oob_score=False,
         random_state=None, verbose=0, warm_start=False)

In [84]:
best_estimator.fit(x_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
            min_samples_split=10, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=0.25,
         max_samples=1.0, n_estimators=50, n_jobs=1, oob_score=False,
         random_state=None, verbose=0, warm_start=False)

In [85]:
best_estimator.score(x_train, y_train)

0.99061032863849763

In [86]:
best_estimator.score(x_test,y_test)

0.95104895104895104

In [87]:
predictions = best_estimator.predict(x_test)

In [88]:
from sklearn.metrics import confusion_matrix, classification_report

In [89]:
confusion_matrix(y_test, predictions)

array([[52,  6],
       [ 1, 84]])

In [90]:
print classification_report(y_test, predictions)

             precision    recall  f1-score   support

          0       0.98      0.90      0.94        58
          1       0.93      0.99      0.96        85

avg / total       0.95      0.95      0.95       143



In [91]:
best_estimator_full_fit = best_estimator
best_estimator_full_fit.fit(df.values, y['malignant'].values)
best_estimator_full_fit.score(df.values, y['malignant'].values)

0.98418277680140598

In [92]:
predictions = best_estimator_full_fit.predict(df.values)

In [93]:
confusion_matrix(y['malignant'].values, predictions)

array([[204,   8],
       [  1, 356]])

In [95]:
print classification_report(y['malignant'].values, predictions)

             precision    recall  f1-score   support

          0       1.00      0.96      0.98       212
          1       0.98      1.00      0.99       357

avg / total       0.98      0.98      0.98       569

