# GridSearchCV

## Imports

In [8]:
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import GridSearchCV, train_test_split, LeaveOneOut

## Prepearing data

In [9]:
svc = SVC()
tree = DecisionTreeClassifier()
loo = LeaveOneOut()
iris = load_iris()

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=42)

## GridSearchCV + SVC

In [10]:
params = {'C': [0.001, 0.01, 0.1, 1, 10, 100], 
          'gamma': [0.001, 0.01, 0.1, 1, 10, 100], 
          'kernel': ['linear', 'poly', 'rbf', 'sigmoid']}


grid = GridSearchCV(svc, params, cv=5)

grid.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100], 'gamma': [0.001, 0.01, 0.1, 1, 10, 100], 'kernel': ['linear', 'poly', 'rbf', 'sigmoid']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [11]:
print('→ Train score:\t{}\n'.format(grid.score(X_train, y_train)))
print('→ Test score:\t{}\n'.format(grid.score(X_test, y_test)))
print('→ Best params:\n{}\n'.format(grid.best_params_))
print('→ Best CV score\t{}\n'.format(grid.best_score_))

→ Train score:	0.9732142857142857

→ Test score:	1.0

→ Best params:
{'C': 1, 'gamma': 0.001, 'kernel': 'linear'}

→ Best CV score	0.9642857142857143



## GridSearchCV + TreeC

In [12]:
tree = DecisionTreeClassifier()
params = {'min_samples_split': [2,3,4,5,6,7,8,9,10], 
          'max_depth': [None, 500, 400, 300, 200, 100, 50, 10], 
          'min_samples_leaf': [1,2,3,4,5]}

grid = GridSearchCV(tree, params, cv=5)

grid.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise',
       estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10], 'max_depth': [None, 500, 400, 300, 200, 100, 50, 10], 'min_samples_leaf': [1, 2, 3, 4, 5]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [13]:
print('→ Train score:\t{}\n'.format(grid.score(X_train, y_train)))
print('→ Test score:\t{}\n'.format(grid.score(X_test, y_test)))
print('→ Best params:\n{}\n'.format(grid.best_params_))
print('→ Best CV score\t{}\n'.format(grid.best_score_))

→ Train score:	0.9642857142857143

→ Test score:	1.0

→ Best params:
{'max_depth': None, 'min_samples_leaf': 3, 'min_samples_split': 4}

→ Best CV score	0.9553571428571429



## GridSearchCV + TreeC + LeaveOneOut

In [14]:
params = {'min_samples_split': [2,3,4,5,6,7,8,9,10], 
          'max_depth': [None, 500, 400, 300, 200, 100, 50, 10], 
          'min_samples_leaf': [1,2,3,4,5]}

grid = GridSearchCV(tree, params, cv=loo)

%time grid.fit(X_train, y_train)

Wall time: 40.4 s


GridSearchCV(cv=LeaveOneOut(), error_score='raise',
       estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10], 'max_depth': [None, 500, 400, 300, 200, 100, 50, 10], 'min_samples_leaf': [1, 2, 3, 4, 5]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [15]:
print('→ Train score:\t{}\n'.format(grid.score(X_train, y_train)))
print('→ Test score:\t{}\n'.format(grid.score(X_test, y_test)))
print('→ Best params:\n{}\n'.format(grid.best_params_))
print('→ Best CV score\t{}\n'.format(grid.best_score_))

→ Train score:	0.9642857142857143

→ Test score:	1.0

→ Best params:
{'max_depth': 300, 'min_samples_leaf': 3, 'min_samples_split': 3}

→ Best CV score	0.9642857142857143



![gif](gg.gif)