In [None]:
from ray import tune
import os
import shutil
import time
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
from ray.tune import JupyterNotebookReporter

%matplotlib inline
plt.style.use('dark_background')
import plotly.io as pio
pio.renderers.default = 'plotly_mimetype+notebook'

import visualize
DELETE = True # to delete the tunedir at the end of the notebook

This is a general reference notebook to explore the use of ray tuner


# Very simple function to hypertune

Let's start with a basic function:
$$ f(x) = ax^3 + bx^2 + cx $$

In [None]:
import numpy as np

def model(x, config):
    """
    The model is a function that takes in some input x and a configuration.
    The configuration has parameters changes the output.
    To keep things simple, this model does not has learnable parameters like
    our models would usually have
    """
    return (
        config["a"] * x**3
        + config["b"] * x**2
        + config["c"] * x
    )

We now want to know: what are optimal values for $a$, $b$ and $c$ such that the
mean is minimized, or maximized? Let's test
some values.

In [None]:
NUM_SAMPLES = 40 # we run 40 experiments
NUM_DATA = 200 # our data has 200 observations
MAX_ITER = 15 # we run every experiment for a max of 15 epochs
MODE = "max" # we want to maximize the mean. This can also be "min"

In [None]:
x = np.linspace(-2, 2, NUM_DATA)
y1 = model(x, dict(a=-2, b=2, c=2))
y2 = model(x, dict(a=1.2, b=-3.5, c=2))
plt.plot(x, y1, label="y1")
plt.plot(x, y2, label="y2")
plt.legend()


In [None]:
y1.mean(), y2.mean()


Now, let's try to hypertune this.

# Data

First, generate a 100 datapoints

In [None]:
# random datapoints, uniform distributed on the domain [-2, 2]
np.random.seed(42)
data = np.random.uniform(-2, 2, NUM_DATA)

# Train function and config

In [None]:
from typing import Dict
import ray


def train(config: Dict):
    total = 0
    np.random.seed(42)
    # The seed is just for didactical purposes, to make the conclusions reproducable.
    # In a real setting, you shouldnt use a seed while exploring hyperparameters!
    # If you really want something deterministic, you need to change the
    # seed every time you run, and store the seed in the settings.
    np.random.shuffle(data)
    for epoch in range(100):
        loss = 0.0
        # we run the model on the data
        for i, x in enumerate(data):
            score = model(x, config)
            # calculate the loss
            loss += score.mean()

        # and log the loss to ray.tune
        ray.train.report({"mean_score": loss / (len(data))})


config = {"a": tune.uniform(-2, 2), "b": tune.uniform(-2, 2), "c": tune.uniform(-2, 2)}


We initialize a total score, and loop through the data.
For every observation $x$ we test the function, and keep track of the score.
The score is reported to `tune` with `tune.report`. We keep track of the
iterations and of the mean score.

Our config defines a uniform distribution for values of a, b and c

In [None]:
timer = {}
best_config = {}


# Random search

Now, let's do a random search. The reporter shows some output to keep track of.
The `tune.run` function runs the hypertuning. Our metric is the value of
`mean_score`, which is what we report in `tune.report`. We want to maximize this
value, so we tell tune to set `mode` to `"max"`.

We will take 40 samples, and stop training after 100 iterations for every sample.

In [None]:
from pathlib import Path

tune_dir = Path("../../models/ray/")
tune_dir.exists(), tune_dir.resolve()


In [None]:
tic = time.time()
analysis = tune.run(
    train,
    config=config,
    metric="mean_score",
    mode=MODE,
    local_dir=str(tune_dir.resolve()),
    num_samples=NUM_SAMPLES,
    stop={"training_iteration": MAX_ITER},
    verbose=1,
)

timer["ray_random"] = time.time() - tic
best = analysis.get_best_config()
best["mean_score"] = analysis.best_result["mean_score"]
best_config["random"] = best


So, we searched the hyperparameter space. Problem is, these spaces potentially can get
pretty big. Let's imagine you have 10 hyperparameters, and every hyperparameter has 5
possible (relevant) values, you already have $5^{10}$ possible combinations, which is almost 10 million. Even if checking of every configuration would take just 1 second, it would take more than a 100 days to check them all...This
space can grow out of control pretty fast.

In [None]:
best

# Resulting config

Now, lets run the objective with the best config.

In [None]:
config = analysis.get_best_config()
y = model(data, config)
plt.scatter(data, y)


# config is sampled at random

And if we visualize the sampled hyperparameter space, we can clearly see it is
samples at random.

In [None]:
plot = analysis.results_df
plot.columns

In [None]:
import plotly.express as px

plot = analysis.results_df
select = ["config/a", "config/b", "config/c", "mean_score"]
p = plot[select].reset_index()
px.parallel_coordinates(p, color="mean_score")


Note how the mean scores are sort of randomly distributed. This is a direct
effect of random guessing parameters.

# Bayes

Now, we improve the search algorithm with a bayesian optimization.

In [None]:
from ray.tune.search.bayesopt import BayesOptSearch

bayesopt = BayesOptSearch(random_search_steps=NUM_SAMPLES)

config = {"a": tune.uniform(-2, 2), "b": tune.uniform(-2, 2), "c": tune.uniform(-2, 2)}

tic = time.time()
analysis = tune.run(
    train,
    config=config,
    metric="mean_score",
    mode=MODE,
    local_dir=str(tune_dir.resolve()),
    num_samples=NUM_SAMPLES,
    stop={"training_iteration": MAX_ITER},
    search_alg=bayesopt,
    verbose=2,
)

timer["ray_bayes"] = time.time() - tic

best = analysis.get_best_config()
best["mean_score"] = analysis.best_result["mean_score"]
best_config["bayes"] = best


In [None]:
best

In [None]:
visualize.plot_timers(timer)


In [None]:
plot = analysis.results_df
select = ["config/a", "config/b", "config/c", "mean_score"]
p = plot[select].reset_index()
px.parallel_coordinates(p, color="mean_score")


Not only is this slightly faster, you can also see that some scores are a bit more clustered.
. In addition to that, the result is more often a bit
better than random guesses.

In [None]:
import pandas as pd

pd.DataFrame.from_dict(best_config, orient="index")


# Hyperband

Hyperband aborts runs early. Configs that are unpromising are abandoned before they complete.

In [None]:
from ray.tune.schedulers import AsyncHyperBandScheduler

scheduler = AsyncHyperBandScheduler(
    time_attr="training_iteration", grace_period=1, reduction_factor=3, max_t=MAX_ITER
)

config = {"a": tune.uniform(-2, 2), "b": tune.uniform(-2, 2), "c": tune.uniform(-2, 2)}

tic = time.time()
analysis = tune.run(
    train,
    config=config,
    metric="mean_score",
    mode=MODE,
    local_dir=str(tune_dir.resolve()),
    num_samples=NUM_SAMPLES,
    stop={"training_iteration": MAX_ITER},
    scheduler=scheduler,
    verbose=2,
)
timer["ray_hyperband"] = time.time() - tic

best = analysis.get_best_config()
best["mean_score"] = analysis.best_result["mean_score"]
best_config["hyperband"] = best


In [None]:
visualize.plot_timers(timer)


In [None]:
plot = analysis.results_df
select = ["config/a", "config/b", "config/c", "mean_score"]
p = plot[select].reset_index()
px.parallel_coordinates(p, color="mean_score")


In [None]:
plt.hist(plot["mean_score"])


The result is better, too. You can see that there are more scores towards the maximum.
You can also see that only some (the best) have been run for the maximum amount of iterations.

In [None]:
plt.scatter(data=p, x=p.index, y="mean_score")


In [None]:
pd.DataFrame.from_dict(best_config, orient="index")


# Hyperbayes

In [None]:
from ray.tune.schedulers.hb_bohb import HyperBandForBOHB
from ray.tune.search.bohb import TuneBOHB

bohb_hyperband = HyperBandForBOHB(
    time_attr="training_iteration",
    max_t=MAX_ITER,
    reduction_factor=3,
    stop_last_trials=False,
)


config = {"a": tune.uniform(-2, 2), "b": tune.uniform(-2, 2), "c": tune.uniform(-2, 2)}

bohb_search = TuneBOHB()

tic = time.time()
analysis = tune.run(
    train,
    config=config,
    metric="mean_score",
    mode=MODE,
    local_dir=str(tune_dir.resolve()),
    num_samples=NUM_SAMPLES,
    stop={"training_iteration": MAX_ITER},
    search_alg=bohb_search,
    scheduler=bohb_hyperband,
    verbose=2,
)
timer["ray_hyperbayes"] = time.time() - tic

best = analysis.get_best_config()
best["mean_score"] = analysis.best_result["mean_score"]
best_config["hyperbayes"] = best


In [None]:
visualize.plot_timers(timer)


In [None]:
plot = analysis.results_df
select = ["config/a", "config/b", "config/c", "mean_score"]
p = plot[select].reset_index()
px.parallel_coordinates(p, color="mean_score")


In [None]:
pd.DataFrame.from_dict(best_config, orient="index")


In [None]:
p = analysis.results_df[["training_iteration", "mean_score"]]
plt.scatter(data=p, x="training_iteration", y="mean_score")

In [None]:
if DELETE:
    shutil.rmtree(tune_dir)
