## Cross Validation

In [3]:
from sklearn.model_selection import cross_val_score
from sklearn.datasets import make_blobs
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

In [2]:
X, y = make_blobs(n_samples=10000, n_features=10, centers=100,
    random_state=0)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [5]:
clf = DecisionTreeClassifier(max_depth=None, min_samples_split=2,
    random_state=0)
scores = cross_val_score(clf, X_train, y_train, cv=5)
print(scores)
print(scores.mean())

[0.98066667 0.96666667 0.978      0.96866667 0.978     ]
0.9743999999999999


In [6]:
clf = RandomForestClassifier(n_estimators=10, max_depth=None,
    min_samples_split=2, random_state=0)
scores = cross_val_score(clf, X, y, cv=5)
print(scores)
print(scores.mean())

[1.     1.     0.9995 1.     0.999 ]
0.9997


In [7]:
clf = ExtraTreesClassifier(n_estimators=10, max_depth=None,
    min_samples_split=2, random_state=0)
scores = cross_val_score(clf, X, y, cv=5)
print(scores)
print(scores.mean())

[1. 1. 1. 1. 1.]
1.0


# Grid Search

In [8]:
from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV

In [9]:
iris = datasets.load_iris()

In [10]:
svc = svm.SVC()
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]} #Dictionary

clf = GridSearchCV(svc, parameters, cv= 5) #Default CV = 5
clf.fit(iris.data, iris.target) 

In [11]:
clf.best_params_

{'C': 1, 'kernel': 'linear'}

In [12]:
clf.best_score_

0.9800000000000001

In [13]:
clf.cv_results_

{'mean_fit_time': array([0.00080533, 0.00075622, 0.0006793 , 0.00067592]),
 'std_fit_time': array([2.11946656e-04, 1.25231274e-05, 4.35771250e-05, 1.65298199e-05]),
 'mean_score_time': array([0.00047917, 0.00050731, 0.00044065, 0.00046177]),
 'std_score_time': array([5.57113159e-05, 2.21672358e-05, 5.55551102e-06, 2.42505334e-05]),
 'param_C': masked_array(data=[1, 1, 10, 10],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['linear', 'rbf', 'linear', 'rbf'],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'linear'},
  {'C': 1, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'}],
 'split0_test_score': array([0.96666667, 0.96666667, 1.        , 0.96666667]),
 'split1_test_score': array([1.        , 0.96666667, 1.        , 1.        ]),
 'split2_test_score': array([0.96666667, 0.96666667, 0