# Dask RF GridSearchCV

In [None]:
from time import time

import numpy as np
from dask.distributed import Client

from wildfires.dask_cx1 import DaskRandomForestRegressor, fit_dask_rf_grid_search_cv

## Establish the Dask Client

In [None]:
client = Client(
    n_workers=2, threads_per_worker=1
)  # Create a LocalCluster for demonstration purposes.
client

## Set up the grid search parameters and data

In [None]:
# Define the common training and test data.
np.random.seed(1)
X = np.random.random((int(1e3), 10))
y = X[:, 0] + X[:, 1] + np.random.random((X.shape[0],))

default_param_dict = {
    "random_state": 1,
    "bootstrap": True,
    "max_features": "auto",
}

## Carry out the grid search using our custom implementation

In [None]:
# Define the parameter space.
parameters_RF = {
    "n_estimators": [500],
    "max_depth": [6],
    "min_samples_split": [2],
    "min_samples_leaf": [1, 5, 10],
}

start = time()
results = fit_dask_rf_grid_search_cv(
    DaskRandomForestRegressor(**default_param_dict),
    X,
    y,
    n_splits=5,
    param_grid=parameters_RF,
    client=client,
    verbose=True,
    return_train_score=True,
    refit=False,
    local_n_jobs=2,
)
print(
    f"Time per n_estimators {(time() - start) / len(parameters_RF['n_estimators']):0.1f} s"
)

In [None]:
# Define the parameter space.
parameters_RF = {
    "n_estimators": [500, 501, 502, 503],
    "max_depth": [6],
    "min_samples_split": [2],
    "min_samples_leaf": [1, 5, 10],
}

start = time()
results = fit_dask_rf_grid_search_cv(
    DaskRandomForestRegressor(**default_param_dict),
    X,
    y,
    n_splits=5,
    param_grid=parameters_RF,
    client=client,
    verbose=True,
    return_train_score=True,
    refit=False,
    local_n_jobs=2,
)
print(
    f"Time per n_estimators {(time() - start) / len(parameters_RF['n_estimators']):0.1f} s"
)