## Load toy data and create evaluation function

In [1]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from sklearn import datasets

boston_dataset = datasets.load_boston()
X, y = boston_dataset.data, boston_dataset.target

def evaluate_model(learner, X, y, num_folds):
    mse = 0
    for train_ind, val_ind in KFold(n_splits=num_folds).split(X, y):
        learner.fit(X[train_ind, :], y[train_ind])
        mse += mean_squared_error(learner.predict(X[val_ind, :]), y[val_ind])

    print("MSE:", mse/num_folds)

## Evaluate the model with default hyperparameters

In [2]:
learner = RandomForestRegressor(random_state=0, n_jobs=-1)
evaluate_model(learner, X, y, 5)

MSE: 22.5738170763


## Make an educated guess about hyperparameters and try again

Well, that seems to be not well-educated:)

In [3]:
learner = RandomForestRegressor(random_state=0, n_jobs=-1, n_estimators=20, max_depth=8, 
                                min_impurity_decrease=2, criterion="mse")
evaluate_model(learner, X, y, 5)

MSE: 27.2470565399


## Optimize hyperparameters using genetic-hyperopt

Use your initial guess about hyperparameter values as priors

In [4]:
from param import ContinuousParam, CategoricalParam, ConstantParam
from genetic_hyperopt import GeneticHyperopt

optimizer = GeneticHyperopt(RandomForestRegressor, X, y, mean_squared_error, maximize=False)

n_estimators_param = ContinuousParam("n_estimators", 20, 4, min_limit=5, max_limit=100, is_int=True)
max_depth_param = ContinuousParam("max_depth", 8, 2, min_limit=3, max_limit=20, is_int=True)
min_impurity_param = ContinuousParam("min_impurity_decrease", 0.02, 0.05, min_limit=0, is_int=False)
criterion_param = CategoricalParam("criterion", ["mse", "friedman_mse"], [0.6, 0.4])
max_features_param = CategoricalParam("max_features", ["auto", "sqrt", "log2"])
random_state_param = ConstantParam("random_state", 0)
n_jobs_param = ConstantParam("n_jobs", -1)

optimizer.add_param(random_state_param).add_param(n_jobs_param)
optimizer.add_param(max_features_param).add_param(criterion_param)
optimizer.add_param(n_estimators_param).add_param(max_depth_param).add_param(min_impurity_param)

best_params, best_score = optimizer.evolve()

Generation 0
Calculating fitness...
Best individual: {'n_jobs': -1, 'max_depth': 7, 'n_estimators': 24, 'max_features': 'auto', 'random_state': 0, 'min_impurity_decrease': 0.01690347756058859, 'criterion': 'mse'}
Best score: 21.5197981536
Population mean: 24.3662687443
Generating children...
---
Generation 1
Calculating fitness...
Best individual: {'n_jobs': -1, 'max_depth': 8, 'n_estimators': 24, 'max_features': 'auto', 'random_state': 0, 'min_impurity_decrease': 0.008451738780294295, 'criterion': 'mse'}
Best score: 21.0891637084
Population mean: 23.2920095669
Generating children...
---
Generation 2
Calculating fitness...
Best individual: {'n_jobs': -1, 'max_depth': 10, 'n_estimators': 24, 'max_features': 'auto', 'random_state': 0, 'min_impurity_decrease': 0.01690347756058859, 'criterion': 'mse'}
Best score: 20.9701834726
Population mean: 21.8665854789
Generating children...
---
Generation 3
Calculating fitness...
Best individual: {'n_jobs': -1, 'max_depth': 10, 'n_estimators': 24, 'm