## Hyperparameter Tuning

In [1]:
import sys
sys.path.append("..")

In [2]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from src.load import load_diamonds
from src.splitters import split_train_test, split_X_y
from src.preprocessors import PREPROCESSORS

In [3]:
# Load the training set
diamonds = load_diamonds()
diamonds, _ = split_train_test(diamonds)
X_train, y_train = split_X_y(diamonds)

In [4]:
# Prepreocess the training set
X_prep = PREPROCESSORS["adder_all_ord_enc"].fit_transform(X_train)

In [5]:
# Set up the parameter grid
param_grid = {
    "n_estimators": [100, 500, 1000], 
    "max_features": [0.5, 0.8, 1.0],
    "max_depth": [10, 30, 100]
}

In [6]:
# Set up the grid search
grid_search = GridSearchCV(
    RandomForestRegressor(),
    param_grid,
    cv=5,
    scoring="neg_root_mean_squared_error",
    return_train_score=True,
    n_jobs=-1
)

In [7]:
# Perform grid search
grid_search.fit(X_prep, y_train)

exception calling callback for &lt;Future at 0x7fae1bf510d0 state=finished raised TerminatedWorkerError&gt;
Traceback (most recent call last):
  File &quot;/home/gontz/miniconda3/envs/ih/lib/python3.8/site-packages/joblib/externals/loky/_base.py&quot;, line 625, in _invoke_callbacks
    callback(self)
  File &quot;/home/gontz/miniconda3/envs/ih/lib/python3.8/site-packages/joblib/parallel.py&quot;, line 347, in __call__
    self.parallel.dispatch_next()
  File &quot;/home/gontz/miniconda3/envs/ih/lib/python3.8/site-packages/joblib/parallel.py&quot;, line 780, in dispatch_next
    if not self.dispatch_one_batch(self._original_iterator):
  File &quot;/home/gontz/miniconda3/envs/ih/lib/python3.8/site-packages/joblib/parallel.py&quot;, line 847, in dispatch_one_batch
    self._dispatch(tasks)
  File &quot;/home/gontz/miniconda3/envs/ih/lib/python3.8/site-packages/joblib/parallel.py&quot;, line 765, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File &quot;/home/gontz

TerminatedWorkerError: A worker process managed by the executor was unexpectedly terminated. This could be caused by a segmentation fault while calling the function or by an excessive memory usage causing the Operating System to kill the worker.

The exit codes of the workers are {SIGKILL(-9)}

In [None]:
# Cross-validation results (ranked)
cv_res = grid_search.cv_results_
score_tups = sorted(list(zip(cv_res["mean_test_score"], cv_res["params"])), reverse=True)

for score, params in score_tups:
    print(f"RMSE: {-score} for params {params})

In [None]:
grid_search.best_estimator_.best_params_