In [1]:
import california_housing_data

housing = california_housing_data.load_data()
train_set, test_set = california_housing_data.split_train_test(housing, test_ratio=0.2)

train_data, train_labels = california_housing_data.split_sample_and_label(train_set)
train_data = california_housing_data.preprocess(train_data)

test_data, test_labels = california_housing_data.split_sample_and_label(test_set)
test_data = california_housing_data.preprocess(test_data)

In [2]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

forest_reg = RandomForestRegressor(n_estimators=10)

param_grid = [
    {'n_estimators': [3, 10, 30],
     'max_features': [2, 4, 6, 8]},
    {'bootstrap': [False],
     'n_estimators': [3, 10],
     'max_features': [2, 3, 4]},
]

# Grid search cross-validation
grid_search = GridSearchCV(estimator=forest_reg,
                           param_grid=param_grid,
                           cv=5,
                           scoring='neg_mean_squared_error',
                           return_train_score=True)

grid_search.fit(train_data, train_labels)

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid=[{'n_estimators': [3, 10, 30], 'max_features': [2, 4, 6, 8]}, {'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='neg_mean_squared_error', verbose=0)

In [3]:
grid_search.best_params_

{'max_features': 8, 'n_estimators': 30}

In [4]:
grid_search.best_estimator_

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features=8, max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=30, n_jobs=None, oob_score=False,
           random_state=None, verbose=0, warm_start=False)

In [7]:
import numpy as np

cross_validation_result = grid_search.cv_results_
for mean_score, params in zip(cross_validation_result['mean_test_score'],
                              cross_validation_result['params']):
    print(np.sqrt(np.abs(mean_score)), params)

64870.11906549322 {'max_features': 2, 'n_estimators': 3}
55424.51098847784 {'max_features': 2, 'n_estimators': 10}
52719.382279896425 {'max_features': 2, 'n_estimators': 30}
61141.819360847956 {'max_features': 4, 'n_estimators': 3}
52681.23972949057 {'max_features': 4, 'n_estimators': 10}
50424.78686184351 {'max_features': 4, 'n_estimators': 30}
58332.48024548647 {'max_features': 6, 'n_estimators': 3}
52016.56736323846 {'max_features': 6, 'n_estimators': 10}
50012.03214788614 {'max_features': 6, 'n_estimators': 30}
58919.65538540382 {'max_features': 8, 'n_estimators': 3}
52122.13143477026 {'max_features': 8, 'n_estimators': 10}
49939.61461966787 {'max_features': 8, 'n_estimators': 30}
61554.437834594406 {'bootstrap': False, 'max_features': 2, 'n_estimators': 3}
55250.8674319347 {'bootstrap': False, 'max_features': 2, 'n_estimators': 10}
60284.202536107354 {'bootstrap': False, 'max_features': 3, 'n_estimators': 3}
52928.86655233073 {'bootstrap': False, 'max_features': 3, 'n_estimators':