In [14]:
%matplotlib inline

In [15]:
import numpy as np
from time import time
from scipy.stats import randint as sp_randint
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier

In [16]:
# get data
digits = load_digits()  # Source : http://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits
X, y = digits.data, digits.target

In [17]:
# build a classifier
clf = RandomForestClassifier(n_estimators=50)

In [18]:
# specify parameters and distributions to sample from
param_dist = {"max_depth": [3, None],
              "max_features": sp_randint(1, 11),
              "min_samples_split": sp_randint(2, 11),
              "min_samples_leaf": sp_randint(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [19]:
# run randomized search
samples = 10  # number of random samples 
randomCV = RandomizedSearchCV(clf, param_distributions=param_dist, n_iter=samples) #default cv = 3

In [20]:
randomCV.fit(X, y)

 
print(randomCV.best_params_)
#print(randomCV.cv_results_)



{'bootstrap': True, 'criterion': 'gini', 'max_depth': None, 'max_features': 7, 'min_samples_leaf': 1, 'min_samples_split': 3}


In [26]:
randomCV.best_estimator_

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features=7, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=3,
            min_weight_fraction_leaf=0.0, n_estimators=50, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [21]:
# use a full grid over all parameters
param_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
              "min_samples_split": [2, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [22]:
# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)
start = time()
grid_search.fit(X, y)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=50, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'max_depth': [3, None], 'max_features': [1, 3, 10], 'min_samples_split': [2, 3, 10], 'min_samples_leaf': [1, 3, 10], 'bootstrap': [True, False], 'criterion': ['gini', 'entropy']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [23]:
grid_search.best_params_

{'bootstrap': False,
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': 3,
 'min_samples_leaf': 1,
 'min_samples_split': 2}

In [24]:
grid_search.cv_results_['mean_test_score']

array([0.81246522, 0.82192543, 0.82136895, 0.80077908, 0.83138564,
       0.8263773 , 0.8196995 , 0.80745687, 0.81469115, 0.83639399,
       0.84752365, 0.85865331, 0.84529772, 0.83861992, 0.84140234,
       0.83639399, 0.83361157, 0.83528102, 0.83361157, 0.81580412,
       0.82749026, 0.82971619, 0.82470785, 0.82860323, 0.83639399,
       0.80411797, 0.82915971, 0.92153589, 0.93489149, 0.92209238,
       0.9115192 , 0.90873678, 0.90539789, 0.88258208, 0.87757373,
       0.89037284, 0.93155259, 0.93989983, 0.92932666, 0.92654424,
       0.92988314, 0.92654424, 0.9015025 , 0.90929327, 0.90428492,
       0.93489149, 0.934335  , 0.92765721, 0.92042293, 0.93210907,
       0.92765721, 0.90873678, 0.90984975, 0.89593767, 0.80467446,
       0.81023929, 0.82025598, 0.82860323, 0.82582081, 0.82081247,
       0.80077908, 0.81357819, 0.82248191, 0.83583751, 0.8230384 ,
       0.83695047, 0.82749026, 0.85030607, 0.84362827, 0.84752365,
       0.83528102, 0.84362827, 0.83973289, 0.81691708, 0.83249

In [25]:
grid_search.best_estimator_

RandomForestClassifier(bootstrap=False, class_weight=None,
            criterion='entropy', max_depth=None, max_features=3,
            max_leaf_nodes=None, min_impurity_decrease=0.0,
            min_impurity_split=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=50, n_jobs=None, oob_score=False,
            random_state=None, verbose=0, warm_start=False)