In [None]:
import optuna
import gym
import numpy as np
import random
from q_learning_oop import *
from IPython.display import clear_output
from test_runner import *

frozen_lake_env = gym.make("FrozenLake-v0").env

In [None]:
def get_model_params(trial):
    """
    Learning hyperparamters we want to optimize
    """
    return {
          "alpha": trial.suggest_uniform("alpha", 0.2, 0.3),
          "gamma": trial.suggest_uniform("gamma", 0.8, 0.999),
          "min_epsilon": trial.suggest_uniform("min_epsilon", 0.01, 0.1),
          "epsilon_decay": trial.suggest_uniform("epsilon_decay", 0.8, 0.99999)
    }

In [None]:
def objective(trial):
    """
    Define an objective function to be minimized.
    """
    model_params = get_model_params(trial)
    agent = QLearningAgent(frozen_lake_env, epsilon=1, **model_params)
    train_runner = TrainRunner(frozen_lake_env, agent)
    train_runner.train(episodes=5000, steps_per_episode=100)
    tester = TestRunner(frozen_lake_env, agent)
    test_results = tester.test(1000)
    return test_results.bad  # A objective value linked with the Trial object.

study = optuna.create_study()  # Create a new study.
optuna.logging.disable_default_handler()
study.optimize(objective, n_trials=500, n_jobs=-1)  # Invoke optimization of the objective function.
print(f"best params:\n\t{study.best_params}")
print(f"trials = {len(study.trials)}")
print(f"best value: {study.best_value}")

In [None]:
agent = QLearningAgent(frozen_lake_env, epsilon=1, **study.best_params)
train_runner = TrainRunner(frozen_lake_env, agent)
train_runner.train(episodes=5000, steps_per_episode=100)

In [None]:
tester = TestRunner(frozen_lake_env, agent)
test_results = tester.test(1000)
print(f"Results after {1000} episodes:")
print(f"good = {test_results.good}")
print(f"bad = {test_results.bad}")