# Hyperparameter Optimization with GridSearchCV

## Setup

In [1]:
import numpy as np
import pandas as pd

from sklearn import metrics
from sklearn import ensemble
from sklearn import model_selection

## Data read

In [2]:
df = pd.read_csv('../data/raw/train.csv')

In [10]:
X = df.drop(columns = ['price_range']).values.copy()
y = df.price_range.values

## Random forest classifier

In [11]:
classifier = ensemble.RandomForestClassifier(n_jobs = -1)
parameters_grid = {
    'n_estimators' : [100, 200, 300, 400],
    'max_depth' : [1, 3, 5, 7],
    'criterion' : ['gini', 'entropy']
}

In [12]:
model = model_selection.GridSearchCV(
    estimator = classifier,
    param_grid = parameters_grid,
    scoring = 'accuracy',
    verbose = 10,
    n_jobs = 1,
    cv = 5,
)

model.fit(X, y)

Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV 1/5; 1/32] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 1/5; 1/32] END criterion=gini, max_depth=1, n_estimators=100;, score=0.603 total time=   5.5s
[CV 2/5; 1/32] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 2/5; 1/32] END criterion=gini, max_depth=1, n_estimators=100;, score=0.557 total time=   0.0s
[CV 3/5; 1/32] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 3/5; 1/32] END criterion=gini, max_depth=1, n_estimators=100;, score=0.652 total time=   0.0s
[CV 4/5; 1/32] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 4/5; 1/32] END criterion=gini, max_depth=1, n_estimators=100;, score=0.535 total time=   0.0s
[CV 5/5; 1/32] START criterion=gini, max_depth=1, n_estimators=100..............
[CV 5/5; 1/32] END criterion=gini, max_depth=1, n_estimators=100;, score=0.580 total time=   0.0s
[CV 1/5; 2/32] START criterion=gini, max_de

GridSearchCV(cv=5, estimator=RandomForestClassifier(n_jobs=-1), n_jobs=1,
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [1, 3, 5, 7],
                         'n_estimators': [100, 200, 300, 400]},
             scoring='accuracy', verbose=10)

In [13]:
print(model.best_score_)
print(model.best_estimator_.get_params())

0.8705
{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'entropy', 'max_depth': 7, 'max_features': 'auto', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 200, 'n_jobs': -1, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}
