In [1]:
%matplotlib inline

In [2]:
import numpy as np
from time import time
from scipy.stats import randint as sp_randint
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier

In [3]:
# get data
digits = load_digits()  # Source : http://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits
X, y = digits.data, digits.target

In [4]:
# build a classifier
clf = RandomForestClassifier(n_estimators=50)

In [5]:
# specify parameters and distributions to sample from
param_dist = {"max_depth": [3, None],
              "max_features": sp_randint(1, 11),
              "min_samples_split": sp_randint(2, 11),
              "min_samples_leaf": sp_randint(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [6]:
# run randomized search
samples = 10  # number of random samples 
randomCV = RandomizedSearchCV(clf, param_distributions=param_dist, n_iter=samples) #default cv = 3

In [7]:
randomCV.fit(X, y)

 
print(randomCV.best_params_)


{'bootstrap': False, 'criterion': 'gini', 'max_depth': None, 'max_features': 8, 'min_samples_leaf': 2, 'min_samples_split': 2}


In [8]:
# use a full grid over all parameters
param_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
              "min_samples_split": [2, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [9]:
# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)
start = time()
grid_search.fit(X, y)

GridSearchCV(estimator=RandomForestClassifier(n_estimators=50),
             param_grid={'bootstrap': [True, False],
                         'criterion': ['gini', 'entropy'],
                         'max_depth': [3, None], 'max_features': [1, 3, 10],
                         'min_samples_leaf': [1, 3, 10],
                         'min_samples_split': [2, 3, 10]})

In [10]:
grid_search.best_params_

{'bootstrap': False,
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': 3,
 'min_samples_leaf': 1,
 'min_samples_split': 2}

In [11]:
grid_search.cv_results_['mean_test_score']

array([0.82584803, 0.80917054, 0.81359177, 0.82416435, 0.81248066,
       0.81192201, 0.82417673, 0.80803312, 0.80523677, 0.83084494,
       0.85030486, 0.83419065, 0.84140514, 0.8481074 , 0.82693439,
       0.83751161, 0.85365986, 0.84365367, 0.82195141, 0.83026772,
       0.8241597 , 0.82639431, 0.82082018, 0.82528629, 0.83083101,
       0.81579542, 0.8275147 , 0.92934231, 0.93100588, 0.91877437,
       0.91097957, 0.90040854, 0.90372795, 0.87646704, 0.87981894,
       0.87648561, 0.93157536, 0.93601052, 0.9343485 , 0.93155989,
       0.92767409, 0.91710152, 0.90374961, 0.89983751, 0.92098886,
       0.93156763, 0.93546271, 0.92100279, 0.92378366, 0.92267255,
       0.92044723, 0.90207985, 0.90097184, 0.91208604, 0.79523832,
       0.81525998, 0.81693439, 0.82247601, 0.82026308, 0.80246828,
       0.79967502, 0.82080625, 0.82473692, 0.84418446, 0.83862426,
       0.83364129, 0.83973383, 0.84697926, 0.83140204, 0.84697772,
       0.8464268 , 0.84140978, 0.84139585, 0.83473692, 0.82137

In [12]:
grid_search.best_estimator_

RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=3,
                       n_estimators=50)