<a href="https://colab.research.google.com/github/h4r1c0t/WildCodeSchool/blob/master/LiveCoding/%2317_GridSearch_CV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score

In [0]:
X, y = make_classification(n_samples = 1000, n_features = 10, n_informative=4, n_redundant=3, random_state=0)

In [0]:
X.shape

(1000, 10)

In [0]:
y.shape

(1000,)

In [0]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X, y)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=42, verbose=0,
                       warm_start=False)

In [0]:
scores = cross_val_score(clf, X, y, cv=5)

In [0]:
scores

array([0.88 , 0.89 , 0.885, 0.875, 0.88 ])

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [0]:
clf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=42, verbose=0,
                       warm_start=False)

In [0]:
scores = cross_val_score(clf, X_train, y_train, cv=5)

In [0]:
scores

array([0.77857143, 0.82142857, 0.86428571, 0.92857143, 0.85      ])

In [0]:
y_pred = clf.predict(X_test)

In [0]:
accuracy_score(y_test, y_pred)

0.85

In [0]:
roc_auc_score(y_test, y_pred)

0.8490926881337839

In [0]:
## GridSearch

In [0]:
clf = RandomForestClassifier()

In [0]:
parameters = {'n_estimators':[100, 200, 300], 'max_depth':[2, 3]}

In [0]:
clf2 = GridSearchCV(clf, parameters, cv = 3, scoring = 'roc_auc', verbose = 3)
clf2.fit(X_train, y_train)

Fitting 3 folds for each of 6 candidates, totalling 18 fits
[CV] max_depth=2, n_estimators=100 ...................................
[CV] ....... max_depth=2, n_estimators=100, score=0.765, total=   0.2s
[CV] max_depth=2, n_estimators=100 ...................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s


[CV] ....... max_depth=2, n_estimators=100, score=0.836, total=   0.2s
[CV] max_depth=2, n_estimators=100 ...................................
[CV] ....... max_depth=2, n_estimators=100, score=0.854, total=   0.2s
[CV] max_depth=2, n_estimators=200 ...................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.3s remaining:    0.0s


[CV] ....... max_depth=2, n_estimators=200, score=0.759, total=   0.3s
[CV] max_depth=2, n_estimators=200 ...................................
[CV] ....... max_depth=2, n_estimators=200, score=0.828, total=   0.3s
[CV] max_depth=2, n_estimators=200 ...................................
[CV] ....... max_depth=2, n_estimators=200, score=0.851, total=   0.3s
[CV] max_depth=2, n_estimators=300 ...................................
[CV] ....... max_depth=2, n_estimators=300, score=0.760, total=   0.4s
[CV] max_depth=2, n_estimators=300 ...................................
[CV] ....... max_depth=2, n_estimators=300, score=0.832, total=   0.5s
[CV] max_depth=2, n_estimators=300 ...................................
[CV] ....... max_depth=2, n_estimators=300, score=0.844, total=   0.4s
[CV] max_depth=3, n_estimators=100 ...................................
[CV] ....... max_depth=3, n_estimators=100, score=0.813, total=   0.2s
[CV] max_depth=3, n_estimators=100 ...................................
[CV] .

[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    5.7s finished


GridSearchCV(cv=3, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              rando

In [0]:
clf2.best_score_

0.8678739545191911

In [0]:
clf2.best_params_

{'max_depth': 3, 'n_estimators': 200}