In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn import datasets
from model_selection import ClfSwitcher
import pandas as pd

In [3]:
# Increase the number of rows to show in notebook
pd.set_option("display.max_rows", 2000)
pd.set_option('display.float_format', lambda x: '%.4f' % x)

In [4]:
iris = datasets.load_iris()

### Without pipeline

In [5]:
parameters = [
    {
        'estimator': [RandomForestClassifier()],
        'estimator__n_estimators':[150, 200], 
        'estimator__max_depth':[2, 3]
    },
    {
        'estimator':[SGDClassifier()],
        'estimator__alpha': (1e-2, 1e-3, 1e-1)
    }
]

In [6]:
gs = GridSearchCV(ClfSwitcher(), parameters, cv=3, n_jobs=3, verbose = 10)
gs.fit(iris.data, iris.target)

Fitting 3 folds for each of 7 candidates, totalling 21 fits


[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done   2 tasks      | elapsed:    1.3s
[Parallel(n_jobs=3)]: Done   7 tasks      | elapsed:    1.6s
[Parallel(n_jobs=3)]: Done  12 tasks      | elapsed:    1.7s
[Parallel(n_jobs=3)]: Done  19 out of  21 | elapsed:    1.8s remaining:    0.1s
[Parallel(n_jobs=3)]: Done  21 out of  21 | elapsed:    1.8s finished


GridSearchCV(cv=3, error_score='raise-deprecating',
             estimator=ClfSwitcher(estimator=RandomForestClassifier(bootstrap=True,
                                                                    class_weight=None,
                                                                    criterion='gini',
                                                                    max_depth=None,
                                                                    max_features='auto',
                                                                    max_leaf_nodes=None,
                                                                    min_impurity_decrease=0.0,
                                                                    min_impurity_split=None,
                                                                    min_samples_leaf=1,
                                                                    min_samples_split=2,
                                                                

In [7]:
cv_res = pd.DataFrame(gs.cv_results_)

In [8]:
cv_res

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_estimator,param_estimator__max_depth,param_estimator__n_estimators,param_estimator__alpha,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.1391,0.0133,0.0126,0.0043,"(DecisionTreeClassifier(class_weight=None, cri...",2.0,150.0,,{'estimator': (DecisionTreeClassifier(class_we...,0.9608,0.902,0.9792,0.9467,0.0329,4
1,0.1383,0.0031,0.0132,0.0039,"(DecisionTreeClassifier(class_weight=None, cri...",2.0,200.0,,{'estimator': (DecisionTreeClassifier(class_we...,0.9608,0.9216,0.9792,0.9533,0.024,3
2,0.1183,0.0057,0.0117,0.001,"(DecisionTreeClassifier(class_weight=None, cri...",3.0,150.0,,{'estimator': (DecisionTreeClassifier(class_we...,0.9804,0.9216,1.0,0.9667,0.0333,1
3,0.1227,0.0037,0.0153,0.0045,"(DecisionTreeClassifier(class_weight=None, cri...",3.0,200.0,,{'estimator': (DecisionTreeClassifier(class_we...,0.9804,0.9216,0.9792,0.96,0.0276,2
4,0.002,0.0,0.0013,0.0019,"SGDClassifier(alpha=0.0001, average=False, cla...",,,0.01,"{'estimator': SGDClassifier(alpha=0.0001, aver...",0.7059,0.902,0.9375,0.8467,0.1021,5
5,0.002,0.0008,0.0,0.0,"SGDClassifier(alpha=0.0001, average=False, cla...",,,0.001,"{'estimator': SGDClassifier(alpha=0.0001, aver...",0.8824,0.6667,0.9583,0.8333,0.1235,6
6,0.003,0.0008,0.0037,0.0026,"SGDClassifier(alpha=0.0001, average=False, cla...",,,0.1,"{'estimator': SGDClassifier(alpha=0.0001, aver...",0.7451,0.6667,0.6875,0.7,0.0335,7


### With pipeline

In [9]:
parameters = [
    {
        'clf__estimator': [RandomForestClassifier()],
        'clf__estimator__n_estimators':[150, 200], 
        'clf__estimator__max_depth':[2, 3]
    },
    {
        'clf__estimator':[SGDClassifier()],
        'clf__estimator__alpha': (1e-2, 1e-3, 1e-1)
    }
]

In [10]:
pipeline = Pipeline([('clf', ClfSwitcher())])

In [11]:
gs = GridSearchCV(pipeline, parameters, cv=3, n_jobs=3)
gs.fit(iris.data, iris.target)



GridSearchCV(cv=3, error_score='raise-deprecating',
             estimator=Pipeline(memory=None,
                                steps=[('clf',
                                        ClfSwitcher(estimator=RandomForestClassifier(bootstrap=True,
                                                                                     class_weight=None,
                                                                                     criterion='gini',
                                                                                     max_depth=None,
                                                                                     max_features='auto',
                                                                                     max_leaf_nodes=None,
                                                                                     min_impurity_decrease=0.0,
                                                                                     min_impurity_split=None,
             

In [12]:
gs.best_estimator_

Pipeline(memory=None,
         steps=[('clf',
                 ClfSwitcher(estimator=RandomForestClassifier(bootstrap=True,
                                                              class_weight=None,
                                                              criterion='gini',
                                                              max_depth=2,
                                                              max_features='auto',
                                                              max_leaf_nodes=None,
                                                              min_impurity_decrease=0.0,
                                                              min_impurity_split=None,
                                                              min_samples_leaf=1,
                                                              min_samples_split=2,
                                                              min_weight_fraction_leaf=0.0,
                                         

In [13]:
cv_res = pd.DataFrame(gs.cv_results_)

In [14]:
cv_res

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_clf__estimator,param_clf__estimator__max_depth,param_clf__estimator__n_estimators,param_clf__estimator__alpha,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.0907,0.0066,0.008,0.0,"(DecisionTreeClassifier(class_weight=None, cri...",2.0,150.0,,{'clf__estimator': (DecisionTreeClassifier(cla...,0.9804,0.9216,0.9792,0.96,0.0276,4
1,0.1307,0.0071,0.0123,0.0033,"(DecisionTreeClassifier(class_weight=None, cri...",2.0,200.0,,{'clf__estimator': (DecisionTreeClassifier(cla...,0.9804,0.9412,0.9792,0.9667,0.0183,1
2,0.097,0.0051,0.0077,0.0005,"(DecisionTreeClassifier(class_weight=None, cri...",3.0,150.0,,{'clf__estimator': (DecisionTreeClassifier(cla...,0.9804,0.9216,1.0,0.9667,0.0333,1
3,0.1223,0.0041,0.01,0.0042,"(DecisionTreeClassifier(class_weight=None, cri...",3.0,200.0,,{'clf__estimator': (DecisionTreeClassifier(cla...,0.9804,0.9412,0.9792,0.9667,0.0183,1
4,0.001,0.0,0.0003,0.0005,"SGDClassifier(alpha=0.0001, average=False, cla...",,,0.01,"{'clf__estimator': SGDClassifier(alpha=0.0001,...",0.7843,0.7451,0.9375,0.82,0.0822,7
5,0.001,0.0,0.0007,0.0005,"SGDClassifier(alpha=0.0001, average=False, cla...",,,0.001,"{'clf__estimator': SGDClassifier(alpha=0.0001,...",0.9608,0.902,0.8958,0.92,0.0294,5
6,0.001,0.0,0.0,0.0,"SGDClassifier(alpha=0.0001, average=False, cla...",,,0.1,"{'clf__estimator': SGDClassifier(alpha=0.0001,...",0.9412,0.8039,0.9375,0.8933,0.0642,6
