In [1]:
import warnings

warnings.filterwarnings(action='ignore')

### Preparing the Data

In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

# load data
digits = datasets.load_digits()

# flatten the images
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))

# Split data into train and test subsets
X_train, X_test, y_train, y_test = train_test_split(data, digits.target, test_size=0.25, shuffle=False)

### Defining the Search Space

In [3]:
from sklearn.ensemble import RandomForestClassifier

# random forest classifier object
rfc = RandomForestClassifier(random_state=42)

# define sample space
param_grid = {
    'n_estimators': [100,150,200],
    'criterion': ['gini', 'entropy'],
    'min_samples_split': [2, 3, 4],
    'min_samples_leaf': [1, 2, 3, 4, 5],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [5, 6, 7]
    }

### Grid Search

In [4]:
import time
from sklearn.model_selection import GridSearchCV
import pandas as pd

# create grid search object
gs = GridSearchCV(estimator=rfc,
                  param_grid=param_grid,
                  scoring='f1_micro',
                  cv=5,
                  n_jobs=-1,
                  verbose=2)

# perform hyperparameter tuning (while timing the process)
time_start = time.time()
gs.fit(X_train, y_train)
time_grid = time.time() - time_start

# store result in a data frame 
values_grid = [810, gs.best_index_+1, gs.best_score_, time_grid]
columns = ['Number of iterations', 'Iteration Number of Optimal Hyperparamters', 'Score', 'Time Elapsed (s)']
results_grid = pd.DataFrame([values_grid], columns = columns)

Fitting 5 folds for each of 810 candidates, totalling 4050 fits


### Random Search

In [16]:
from sklearn.model_selection import RandomizedSearchCV

# create a random search object
rs = RandomizedSearchCV(estimator=rfc,
                  param_distributions=param_grid,
                  scoring='f1_micro',
                  cv=5,
                  n_jobs=-1,
                  verbose=2,
                  random_state=43,
                  n_iter=100)

# perform hyperparamter tuning (while timing the process)
time_start = time.time()
rs.fit(X_train, y_train)
time_random = time.time() - time_start

# store result in a data frame 
values_grid = [[100, rs.best_index_+1, rs.best_score_, time_random]]
results_random = pd.DataFrame(values_grid, columns = columns)


Fitting 5 folds for each of 100 candidates, totalling 500 fits


### Bayesian Search

In [6]:
import optuna 
from optuna.samplers import TPESampler
from sklearn.model_selection import cross_val_score

def objective(trial):
    """return the f1-score"""

    # search space
    n_estimators =  trial.suggest_int('n_estimators', low=100, high=200, step=50)
    criterion = trial.suggest_categorical('criterion', ['gini', 'entropy'])
    min_samples_split = trial.suggest_int('min_samples_split', low=2, high=4, step=1)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', low=1, high=5, step=1)
    max_depth = trial.suggest_int('max_depth', low=5, high=7, step=1)
    max_features = trial.suggest_categorical('max_features', ['auto', 'sqrt','log2'])

    # random forest classifier object
    rfc = RandomForestClassifier(n_estimators=n_estimators, 
                                                  criterion=criterion,
                                                  min_samples_split=min_samples_split,
                                                  min_samples_leaf=min_samples_leaf,
                                                  max_depth=max_depth,
                                                  max_features=max_features,
                                                  random_state=42)
    score =  cross_val_score(estimator=rfc, 
                             X=X_train, 
                             y=y_train, 
                             scoring='f1_micro',
                             cv=5,
                             n_jobs=-1).mean()
    
    return score

# create a study (aim to maximize score)
study = optuna.create_study(sampler=TPESampler(), direction='maximize')

# perform hyperparamter tuning (while timing the process)
time_start = time.time()
study.optimize(objective, n_trials=100)
time_bayesian = time.time() - time_start

# store result in a data frame 
values_bayesian = [100, study.best_trial.number, study.best_trial.value, time_bayesian]
results_bayesian = pd.DataFrame([values_bayesian], columns = columns)

[32m[I 2022-05-03 22:56:50,803][0m A new study created in memory with name: no-name-41077707-1195-463e-83ae-c061196fb313[0m
[32m[I 2022-05-03 22:56:51,096][0m Trial 0 finished with value: 0.9109238606636376 and parameters: {'n_estimators': 100, 'criterion': 'entropy', 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_depth': 6, 'max_features': 'log2'}. Best is trial 0 with value: 0.9109238606636376.[0m
[32m[I 2022-05-03 22:56:51,715][0m Trial 1 finished with value: 0.909436871816054 and parameters: {'n_estimators': 200, 'criterion': 'entropy', 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 5, 'max_features': 'log2'}. Best is trial 0 with value: 0.9109238606636376.[0m
[32m[I 2022-05-03 22:56:52,082][0m Trial 2 finished with value: 0.9168718160539722 and parameters: {'n_estimators': 100, 'criterion': 'entropy', 'min_samples_split': 4, 'min_samples_leaf': 3, 'max_depth': 6, 'max_features': 'auto'}. Best is trial 2 with value: 0.9168718160539722.[0m
[32m[I 2022-

[32m[I 2022-05-03 22:57:11,019][0m Trial 29 finished with value: 0.9161145532149249 and parameters: {'n_estimators': 100, 'criterion': 'entropy', 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 6, 'max_features': 'auto'}. Best is trial 25 with value: 0.9280104639955941.[0m
[32m[I 2022-05-03 22:57:11,614][0m Trial 30 finished with value: 0.9272669695718022 and parameters: {'n_estimators': 150, 'criterion': 'entropy', 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_depth': 7, 'max_features': 'auto'}. Best is trial 25 with value: 0.9280104639955941.[0m
[32m[I 2022-05-03 22:57:12,143][0m Trial 31 finished with value: 0.9272669695718022 and parameters: {'n_estimators': 150, 'criterion': 'entropy', 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_depth': 7, 'max_features': 'auto'}. Best is trial 25 with value: 0.9280104639955941.[0m
[32m[I 2022-05-03 22:57:12,695][0m Trial 32 finished with value: 0.9228115103951534 and parameters: {'n_estimators': 150, 'criterio

[32m[I 2022-05-03 22:57:29,446][0m Trial 58 finished with value: 0.9294864381109734 and parameters: {'n_estimators': 150, 'criterion': 'gini', 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 7, 'max_features': 'auto'}. Best is trial 47 with value: 0.9354261324521549.[0m
[32m[I 2022-05-03 22:57:30,009][0m Trial 59 finished with value: 0.9354261324521549 and parameters: {'n_estimators': 150, 'criterion': 'entropy', 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 7, 'max_features': 'auto'}. Best is trial 47 with value: 0.9354261324521549.[0m
[32m[I 2022-05-03 22:57:30,503][0m Trial 60 finished with value: 0.9072118959107808 and parameters: {'n_estimators': 150, 'criterion': 'entropy', 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_depth': 5, 'max_features': 'auto'}. Best is trial 47 with value: 0.9354261324521549.[0m
[32m[I 2022-05-03 22:57:31,146][0m Trial 61 finished with value: 0.9354261324521549 and parameters: {'n_estimators': 150, 'criterion':

[32m[I 2022-05-03 22:57:48,353][0m Trial 87 finished with value: 0.9354261324521549 and parameters: {'n_estimators': 150, 'criterion': 'entropy', 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 7, 'max_features': 'auto'}. Best is trial 47 with value: 0.9354261324521549.[0m
[32m[I 2022-05-03 22:57:48,813][0m Trial 88 finished with value: 0.9294864381109734 and parameters: {'n_estimators': 150, 'criterion': 'gini', 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 7, 'max_features': 'auto'}. Best is trial 47 with value: 0.9354261324521549.[0m
[32m[I 2022-05-03 22:57:49,378][0m Trial 89 finished with value: 0.9354261324521549 and parameters: {'n_estimators': 150, 'criterion': 'entropy', 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 7, 'max_features': 'auto'}. Best is trial 47 with value: 0.9354261324521549.[0m
[32m[I 2022-05-03 22:57:49,928][0m Trial 90 finished with value: 0.9272614622057 and parameters: {'n_estimators': 150, 'criterion': 'e

### Comparing the 3 Methods

In [17]:
# store all results in a single data frame
df = results_grid.append(results_random).append(results_bayesian)
df.index = ['Grid Search', 'Random Search', 'Bayesian Optimization']
df

Unnamed: 0,Number of iterations,Iteration Number of Optimal Hyperparamters,Score,Time Elapsed (s)
Grid Search,810,680,0.935426,265.657518
Random Search,100,25,0.934685,31.886001
Bayesian Optimization,100,47,0.935426,64.596206
