# Module 4 Hyper Parameter Tunning

## GridSearchCV

In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm

iris = datasets.load_iris()
X,y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)

In [2]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()

In [6]:
clf.get_params()

{'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'presort': False,
 'random_state': None,
 'splitter': 'best'}

In [19]:
clf.fit(X_train,y_train)
clf.score(X_test, y_test)

0.95

In [24]:
from sklearn.model_selection import GridSearchCV
param_grid = {'max_depth':range(2,8), 'min_samples_split':range(2,10)}

searcher = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=5)
searcher.fit(X_train,y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=DecisionTreeClassifier(class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features=None,
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              presort=False, random_state=None,
                                              splitter='best'),
             iid='warn', n_jobs=None,
             param_grid={'max_depth': range(2, 8),
                         'min_samples_split': range(2, 10)},
             pre_dispatc

In [25]:
print(searcher.best_params_)    
print(searcher.best_score_)

{'max_depth': 5, 'min_samples_split': 2}
0.9666666666666667


In [26]:
searcher.score(X_test, y_test)

0.95

## RandomSearchCV

In [27]:
from sklearn.model_selection import RandomizedSearchCV 
param_grid = {'max_depth':range(2,8), 'min_samples_split':range(2,10)}
searcher = RandomizedSearchCV(DecisionTreeClassifier(), param_grid, cv=5)
searcher.fit(X_train,y_train)



RandomizedSearchCV(cv=5, error_score='raise-deprecating',
                   estimator=DecisionTreeClassifier(class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features=None,
                                                    max_leaf_nodes=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
                                                    presort=False,
                                                    random_state=None,
                                                    splitter='best')

In [30]:
print(searcher.best_params_)    
print(searcher.best_score_)

{'min_samples_split': 3, 'max_depth': 7}
0.9555555555555556


In [31]:
searcher.score(X_test, y_test)

0.95

## Search on Pipeline Estimator

In [40]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

pipe = Pipeline([('scaler', StandardScaler()),
                 ('pca', PCA()),
                 ('tree', DecisionTreeClassifier())])

In [41]:
from sklearn.model_selection import GridSearchCV
param_grid = {'pca__n_components': [1,2,3,4],
              'tree__max_depth':range(2,8), 
              'tree__min_samples_split':range(2,10)}

clf = GridSearchCV(pipe, param_grid, cv=5)
clf.fit(X_train,y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=Pipeline(memory=None,
                                steps=[('scaler',
                                        StandardScaler(copy=True,
                                                       with_mean=True,
                                                       with_std=True)),
                                       ('pca',
                                        PCA(copy=True, iterated_power='auto',
                                            n_components=None,
                                            random_state=None,
                                            svd_solver='auto', tol=0.0,
                                            whiten=False)),
                                       ('tree',
                                        DecisionTreeClassifier(class_weight=None,
                                                               criterion='gini',
                                                  

In [42]:
print(clf.best_params_)    
print(clf.best_score_)

{'pca__n_components': 4, 'tree__max_depth': 5, 'tree__min_samples_split': 2}
0.9555555555555556


In [43]:
searcher.score(X_test, y_test)

0.95