# Try out raytune with scikit & keras

In [None]:
import random

import numpy as np
from ray import tune
from ray.air import session
from ray.tune.schedulers import AsyncHyperBandScheduler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from tensorflow.keras import callbacks, layers, models, optimizers

from q2_time.model import split_data_by_host
from q2_time.simulate_data import simulate_data

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
# Load a simulated dataset
host_id = "host_id"
target = "age_days"
train_size = 0.8
seed = 12

ft, md = simulate_data(100)
data = md.join(ft, how="left")
data.sort_values([host_id, target], inplace=True)
train, test = split_data_by_host(data, host_id, train_size, seed)

X_train, y_train = train[ft.columns], train[target]
X_test, y_test = test[ft.columns], test[target]

In [None]:
# Define a training function for RandomForest
def train_rf(config, X_train, y_train, X_test, y_test, seed):
    rf = RandomForestRegressor(
        n_estimators=config["n_estimators"], max_depth=config["max_depth"]
    )
    rf.fit(X_train, y_train)
    y_pred = rf.predict(X_test)
    score = np.sqrt(mean_squared_error(y_test, y_pred))

    session.report({"rmse": score})


# Define a training function for Keras neural network
def train_nn(config):
    n_layers = config["n_layers"]
    model = models.Sequential()
    model.add(layers.Input(shape=(X_train.shape[1],)))

    for i in range(n_layers):
        num_hidden = config[f"n_units_l{i}"]
        model.add(layers.Dense(num_hidden, activation="relu"))

    model.add(layers.Dense(1))

    learning_rate = config["learning_rate"]
    optimizer = optimizers.Adam(learning_rate=learning_rate)

    model.compile(optimizer=optimizer, loss="mean_squared_error")

    early_stopping = callbacks.EarlyStopping(patience=10, restore_best_weights=True)

    model.fit(
        X_train,
        y_train,
        epochs=100,
        batch_size=32,
        validation_split=0.2,
        callbacks=[early_stopping],
        verbose=0,
    )

    y_pred = model.predict(X_test)
    score = np.sqrt(mean_squared_error(y_test, y_pred))
    session.report({"mse": score})

In [None]:
# Ray Tune configuration for RandomForest
rf_space = {
    "n_estimators": tune.randint(100, 1000),
    "max_depth": tune.randint(2, 32),
    # sample a float uniformly between these two numbers while sampling in log space
    # todo: find equivalent to previous values here! 0.001, 0.01, 0.1
    "min_samples_split": tune.qloguniform(1e-4, 1e-1, 5e-5),
    "min_samples_leaf": tune.choice([0.00001, 0.0001]),
    "max_features": tune.choice([None, "sqrt", "log2", 0.1, 0.2, 0.5, 0.8]),
    "min_impurity_decrease": tune.loguniform(1e-4, 1e-2),
    "bootstrap": tune.choice([True, False]),
}

# Hyperparameter tuning using Ray Tune
num_trials = 5

# set seed for search algorithms/schedulers
random.seed(seed)
np.random.seed(seed)

# AsyncHyperBand enables aggressive early stopping of bad trials.
scheduler = AsyncHyperBandScheduler(
    # Only stop trials at least this old in time (measured in training iteration)
    grace_period=5,
    # stopping trials after max_t iterations have passed
    max_t=100,
)

# todo: how to set seed?
analysis_rf = tune.Tuner(
    # trainable with input parameters passed and set resources
    tune.with_resources(
        tune.with_parameters(
            train_rf,
            X_train=X_train,
            y_train=y_train,
            X_test=X_test,
            y_test=y_test,
            seed=seed,
        ),
        {"cpu": 1},
    ),
    # hyperparameter space
    param_space=rf_space,
    tune_config=tune.TuneConfig(
        metric="rmse",  # todo: adjust above
        mode="min",
        # define the scheduler
        scheduler=scheduler,
        # number of trials to run
        num_samples=num_trials,
    ),
)
results_rf = analysis_rf.fit()

In [None]:
print("Best hyperparameters found were: ", results_rf.get_best_result().config)

In [None]:
len(results_rf)

In [None]:
# # Ray Tune configuration for Keras neural network
# nn_space = {
#     "n_layers": tune.randint(1, 3),
#     "learning_rate": tune.loguniform(1e-5, 1e-1),
# }
# for i in range(3):
#     nn_space[f"n_units_l{i}"] = tune.randint(4, 64)

# # Tune Keras neural network hyperparameters
# analysis_nn = tune.Tuner(
#     train_nn,
#     param_space=nn_space,
#     tune_config=tune.TuneConfig(
#         num_samples=num_trials,
#         # which resource to use
#         resources_per_trial=resources_per_trial,
#     ),
#     progress_reporter=reporter,
#     name="nn_tuning",
# )
# results_nn = analysis_nn.fit()