In [1]:
import gymnasium as gym
from pogema import GridConfig

from stable_baselines3.common.evaluation import evaluate_policy

%load_ext autoreload
%autoreload 2
%matplotlib inline

# Set Global parameters
GRID_SIZE = 8
DENSITY = 0.3
OBS_RADIUS = 3
MAX_EPISODE_STEPS = 128
SAVE_PARAMS_PATH = 'saved/tuned_params.yml'
SAVE_METRICS_PATH = 'saved/evaluation_metrics.yml'
MODEL_NAME = 'DQN_C'

grid_config = GridConfig(
    size=GRID_SIZE,                         # size of the grid map 8 = (8x8)
    density=DENSITY,                        # obstacle density
    num_agents=1,                           # number of agents
    obs_radius=OBS_RADIUS,                  # defines field of view
    max_episode_steps=MAX_EPISODE_STEPS,    # time horizon
    seed=None                               # set to None for random obstacles, agents and targets positions at each reset
)

env = gym.make("Pogema-v0",grid_config=grid_config)

  logger.warn(
  logger.warn(


### Optuna Integration

In [2]:
from typing import Any
from typing import Dict

import gymnasium
import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
import torch
import torch.nn as nn


N_TRIALS = 100
N_STARTUP_TRIALS = 5
N_EVALUATIONS = 2
N_TIMESTEPS = int(1.2e5)
EVAL_FREQ = int(N_TIMESTEPS / N_EVALUATIONS)
N_EVAL_EPISODES = 3

DEFAULT_HYPERPARAMS = {
    "policy": "MlpPolicy",
    "verbose": 1,
    "env": env
}

def sample_dqn_params(trial: optuna.Trial) -> Dict[str, Any]:
    """Sampler for DQN hyperparameters."""
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1, log=True)
    # batch_size = 2 ** trial.suggest_int("batch_size", 3, 10)
    gamma = 1.0 - trial.suggest_float("gamma", 0.0001, 0.1, log=True)
    # max_grad_norm = trial.suggest_float("max_grad_norm", 0.3, 10.0, log=True)
    # target_update_interval = trial.suggest_int("target_update_interval", 500, 20000, log=True)
    # exploration_fraction = trial.suggest_float("exploration_fraction", 0.1, 0.5, log=True)
    # exploration_final_eps = trial.suggest_float("exploration_final_eps", 0.01, 0.1, log=True)

    # Display true values.
    trial.set_user_attr("gamma", gamma)
    # trial.set_user_attr("batch_size", batch_size)

    return {
        "learning_rate": learning_rate,        
        # "batch_size": batch_size,
        "gamma": gamma,
        # "max_grad_norm": max_grad_norm,
        # "target_update_interval": target_update_interval,
        # "exploration_fraction": exploration_fraction,
        # "exploration_final_eps": exploration_final_eps
    }


class TrialEvalCallback(EvalCallback):
    """Callback used for evaluating and reporting a trial."""

    def __init__(
        self,
        eval_env: gymnasium.Env,
        trial: optuna.Trial,
        n_eval_episodes: int = 5,
        eval_freq: int = 10000,
        deterministic: bool = True,
        verbose: int = 0,
    ):
        super().__init__(
            eval_env=eval_env,
            n_eval_episodes=n_eval_episodes,
            eval_freq=eval_freq,
            deterministic=deterministic,
            verbose=verbose,
        )
        self.trial = trial
        self.eval_idx = 0
        self.is_pruned = False

    def _on_step(self) -> bool:
        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
            super()._on_step()
            self.eval_idx += 1
            self.trial.report(self.last_mean_reward, self.eval_idx)
            # Prune trial if need.
            if self.trial.should_prune():
                self.is_pruned = True
                return False
        return True


def objective(trial: optuna.Trial) -> float:
    kwargs = DEFAULT_HYPERPARAMS.copy()
    # Sample hyperparameters.
    kwargs.update(sample_dqn_params(trial))
    # Create the RL model.
    model = DQN(**kwargs)
    # Create env used for evaluation.
    eval_env = Monitor(env)
    # Create the callback that will periodically evaluate and report the performance.
    eval_callback = TrialEvalCallback(
        eval_env, trial, n_eval_episodes=N_EVAL_EPISODES, eval_freq=EVAL_FREQ, deterministic=True
    )

    nan_encountered = False
    try:
        model.learn(N_TIMESTEPS, callback=eval_callback)
    except AssertionError as e:
        # Sometimes, random hyperparams can generate NaN.
        print(e)
        nan_encountered = True
    finally:
        # Free memory.
        model.env.close()
        eval_env.close()

    # Tell the optimizer that the trial failed.
    if nan_encountered:
        return float("nan")

    if eval_callback.is_pruned:
        raise optuna.exceptions.TrialPruned()

    return eval_callback.last_mean_reward


if __name__ == "__main__":
    # Set pytorch num threads to 1 for faster training.
    torch.set_num_threads(1)

    sampler = TPESampler(n_startup_trials=N_STARTUP_TRIALS)
    # Do not prune before 1/3 of the max budget is used.
    pruner = MedianPruner(n_startup_trials=N_STARTUP_TRIALS, n_warmup_steps=N_EVALUATIONS // 3)

    study = optuna.create_study(sampler=sampler, pruner=pruner, direction="maximize")
    try:
        study.optimize(objective, n_trials=N_TRIALS, timeout=600)
    except KeyboardInterrupt:
        pass

    print("Number of finished trials: ", len(study.trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

    print("  User attrs:")
    for key, value in trial.user_attrs.items():
        print("    {}: {}".format(key, value))

  from .autonotebook import tqdm as notebook_tqdm
[I 2023-11-05 22:40:02,137] A new study created in memory with name: no-name-740663bd-fca6-4a6d-8bc3-8b8ab211e2c0


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 94       |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.97     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 17798    |
|    time_elapsed     | 0        |
|    total_timesteps  | 376      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 79.1     |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.95     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 18115    |
|    time_elapsed     | 0        |
|    total_timesteps  | 633      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 85.9     |
|    ep_rew_mean      | 0.667 

[I 2023-11-05 22:41:10,089] Trial 0 finished with value: 0.0 and parameters: {'learning_rate': 0.004472404643186033, 'gamma': 0.00016489410508541355}. Best is trial 0 with value: 0.0.


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 36.8     |
|    ep_rew_mean      | 1        |
|    exploration_rate | 0.988    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 12168    |
|    time_elapsed     | 0        |
|    total_timesteps  | 147      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 66.6     |
|    ep_rew_mean      | 0.625    |
|    exploration_rate | 0.958    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 14312    |
|    time_elapsed     | 0        |
|    total_timesteps  | 533      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 81.7     |
|    ep_rew_mean      | 0.5   



----------------------------------
| rollout/            |          |
|    ep_len_mean      | 94.9     |
|    ep_rew_mean      | 0.4      |
|    exploration_rate | 0.699    |
| time/               |          |
|    episodes         | 40       |
|    fps              | 18158    |
|    time_elapsed     | 0        |
|    total_timesteps  | 3797     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 94.5     |
|    ep_rew_mean      | 0.409    |
|    exploration_rate | 0.671    |
| time/               |          |
|    episodes         | 44       |
|    fps              | 18094    |
|    time_elapsed     | 0        |
|    total_timesteps  | 4158     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 92.6     |
|    ep_rew_mean      | 0.438    |
|    exploration_rate | 0.648    |
| time/               |          |
|    episodes       

[I 2023-11-05 22:42:13,367] Trial 1 finished with value: 0.0 and parameters: {'learning_rate': 0.6513443699055571, 'gamma': 0.031135585597910583}. Best is trial 0 with value: 0.0.


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 58.8     |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.981    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 14636    |
|    time_elapsed     | 0        |
|    total_timesteps  | 235      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 76.8     |
|    ep_rew_mean      | 0.625    |
|    exploration_rate | 0.951    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 15240    |
|    time_elapsed     | 0        |
|    total_timesteps  | 614      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 67.2     |
|    ep_rew_mean      | 0.667 



----------------------------------
| rollout/            |          |
|    ep_len_mean      | 86.1     |
|    ep_rew_mean      | 0.477    |
|    exploration_rate | 0.7      |
| time/               |          |
|    episodes         | 44       |
|    fps              | 17508    |
|    time_elapsed     | 0        |
|    total_timesteps  | 3790     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 83.2     |
|    ep_rew_mean      | 0.521    |
|    exploration_rate | 0.684    |
| time/               |          |
|    episodes         | 48       |
|    fps              | 17506    |
|    time_elapsed     | 0        |
|    total_timesteps  | 3995     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 81.7     |
|    ep_rew_mean      | 0.538    |
|    exploration_rate | 0.664    |
| time/               |          |
|    episodes       

[I 2023-11-05 22:43:18,387] Trial 2 finished with value: 0.3333333333333333 and parameters: {'learning_rate': 0.0001427505728460859, 'gamma': 0.0036527688275634334}. Best is trial 2 with value: 0.3333333333333333.


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 124      |
|    ep_rew_mean      | 0.25     |
|    exploration_rate | 0.961    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 19667    |
|    time_elapsed     | 0        |
|    total_timesteps  | 497      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 126      |
|    ep_rew_mean      | 0.125    |
|    exploration_rate | 0.92     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 19372    |
|    time_elapsed     | 0        |
|    total_timesteps  | 1009     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 109      |
|    ep_rew_mean      | 0.25  



----------------------------------
| rollout/            |          |
|    ep_len_mean      | 109      |
|    ep_rew_mean      | 0.361    |
|    exploration_rate | 0.69     |
| time/               |          |
|    episodes         | 36       |
|    fps              | 18983    |
|    time_elapsed     | 0        |
|    total_timesteps  | 3920     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 105      |
|    ep_rew_mean      | 0.375    |
|    exploration_rate | 0.667    |
| time/               |          |
|    episodes         | 40       |
|    fps              | 18980    |
|    time_elapsed     | 0        |
|    total_timesteps  | 4209     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 106      |
|    ep_rew_mean      | 0.386    |
|    exploration_rate | 0.632    |
| time/               |          |
|    episodes       

[I 2023-11-05 22:44:23,088] Trial 3 finished with value: 0.3333333333333333 and parameters: {'learning_rate': 0.00011619368563320167, 'gamma': 0.00028424287408071935}. Best is trial 2 with value: 0.3333333333333333.


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 58       |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.982    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 15854    |
|    time_elapsed     | 0        |
|    total_timesteps  | 232      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 73.8     |
|    ep_rew_mean      | 0.625    |
|    exploration_rate | 0.953    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 16718    |
|    time_elapsed     | 0        |
|    total_timesteps  | 590      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 71.5     |
|    ep_rew_mean      | 0.667 



----------------------------------
| rollout/            |          |
|    ep_len_mean      | 90.8     |
|    ep_rew_mean      | 0.475    |
|    exploration_rate | 0.712    |
| time/               |          |
|    episodes         | 40       |
|    fps              | 19241    |
|    time_elapsed     | 0        |
|    total_timesteps  | 3632     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 91.2     |
|    ep_rew_mean      | 0.477    |
|    exploration_rate | 0.682    |
| time/               |          |
|    episodes         | 44       |
|    fps              | 19155    |
|    time_elapsed     | 0        |
|    total_timesteps  | 4013     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 88.1     |
|    ep_rew_mean      | 0.5      |
|    exploration_rate | 0.665    |
| time/               |          |
|    episodes       

[I 2023-11-05 22:45:25,543] Trial 4 finished with value: 0.0 and parameters: {'learning_rate': 4.273876301526579e-05, 'gamma': 0.03249188822429629}. Best is trial 2 with value: 0.3333333333333333.


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 66.5     |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.979    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 17850    |
|    time_elapsed     | 0        |
|    total_timesteps  | 266      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 67.4     |
|    ep_rew_mean      | 0.625    |
|    exploration_rate | 0.957    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 18537    |
|    time_elapsed     | 0        |
|    total_timesteps  | 539      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 77.2     |
|    ep_rew_mean      | 0.5   



----------------------------------
| rollout/            |          |
|    ep_len_mean      | 87       |
|    ep_rew_mean      | 0.438    |
|    exploration_rate | 0.78     |
| time/               |          |
|    episodes         | 32       |
|    fps              | 15069    |
|    time_elapsed     | 0        |
|    total_timesteps  | 2785     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 91.5     |
|    ep_rew_mean      | 0.417    |
|    exploration_rate | 0.739    |
| time/               |          |
|    episodes         | 36       |
|    fps              | 15087    |
|    time_elapsed     | 0        |
|    total_timesteps  | 3295     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 95.2     |
|    ep_rew_mean      | 0.375    |
|    exploration_rate | 0.699    |
| time/               |          |
|    episodes       

[I 2023-11-05 22:45:36,973] Trial 5 pruned. 


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 114      |
|    ep_rew_mean      | 0.5      |
|    exploration_rate | 0.964    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 17238    |
|    time_elapsed     | 0        |
|    total_timesteps  | 456      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 115      |
|    ep_rew_mean      | 0.375    |
|    exploration_rate | 0.927    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 17933    |
|    time_elapsed     | 0        |
|    total_timesteps  | 921      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 99.9     |
|    ep_rew_mean      | 0.417 



----------------------------------
| rollout/            |          |
|    ep_len_mean      | 87       |
|    ep_rew_mean      | 0.5      |
|    exploration_rate | 0.697    |
| time/               |          |
|    episodes         | 44       |
|    fps              | 17905    |
|    time_elapsed     | 0        |
|    total_timesteps  | 3828     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 88.5     |
|    ep_rew_mean      | 0.5      |
|    exploration_rate | 0.664    |
| time/               |          |
|    episodes         | 48       |
|    fps              | 18037    |
|    time_elapsed     | 0        |
|    total_timesteps  | 4249     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 91.3     |
|    ep_rew_mean      | 0.481    |
|    exploration_rate | 0.624    |
| time/               |          |
|    episodes       

[I 2023-11-05 22:45:48,172] Trial 6 pruned. 


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 87       |
|    ep_rew_mean      | 0.5      |
|    exploration_rate | 0.972    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 18499    |
|    time_elapsed     | 0        |
|    total_timesteps  | 348      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 73       |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.954    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 18619    |
|    time_elapsed     | 0        |
|    total_timesteps  | 584      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 75.7     |
|    ep_rew_mean      | 0.75  

[I 2023-11-05 22:46:49,864] Trial 7 finished with value: 0.3333333333333333 and parameters: {'learning_rate': 0.002142552026450209, 'gamma': 0.0936782517307773}. Best is trial 2 with value: 0.3333333333333333.


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38       |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.988    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 8923     |
|    time_elapsed     | 0        |
|    total_timesteps  | 152      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 62       |
|    ep_rew_mean      | 0.625    |
|    exploration_rate | 0.961    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 13500    |
|    time_elapsed     | 0        |
|    total_timesteps  | 496      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 60.9     |
|    ep_rew_mean      | 0.667 



----------------------------------
| rollout/            |          |
|    ep_len_mean      | 82.2     |
|    ep_rew_mean      | 0.545    |
|    exploration_rate | 0.714    |
| time/               |          |
|    episodes         | 44       |
|    fps              | 17992    |
|    time_elapsed     | 0        |
|    total_timesteps  | 3617     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 83.5     |
|    ep_rew_mean      | 0.521    |
|    exploration_rate | 0.683    |
| time/               |          |
|    episodes         | 48       |
|    fps              | 17794    |
|    time_elapsed     | 0        |
|    total_timesteps  | 4010     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 86       |
|    ep_rew_mean      | 0.519    |
|    exploration_rate | 0.646    |
| time/               |          |
|    episodes       

[I 2023-11-05 22:47:01,288] Trial 8 pruned. 


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 105      |
|    ep_rew_mean      | 0.5      |
|    exploration_rate | 0.967    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 19333    |
|    time_elapsed     | 0        |
|    total_timesteps  | 419      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 72.8     |
|    ep_rew_mean      | 0.625    |
|    exploration_rate | 0.954    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 18448    |
|    time_elapsed     | 0        |
|    total_timesteps  | 582      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 72.8     |
|    ep_rew_mean      | 0.583 



----------------------------------
| rollout/            |          |
|    ep_len_mean      | 82.1     |
|    ep_rew_mean      | 0.477    |
|    exploration_rate | 0.714    |
| time/               |          |
|    episodes         | 44       |
|    fps              | 17802    |
|    time_elapsed     | 0        |
|    total_timesteps  | 3614     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 83.8     |
|    ep_rew_mean      | 0.479    |
|    exploration_rate | 0.681    |
| time/               |          |
|    episodes         | 48       |
|    fps              | 17795    |
|    time_elapsed     | 0        |
|    total_timesteps  | 4024     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 87.1     |
|    ep_rew_mean      | 0.462    |
|    exploration_rate | 0.641    |
| time/               |          |
|    episodes       

[I 2023-11-05 22:48:02,280] Trial 9 finished with value: 0.0 and parameters: {'learning_rate': 0.014523416594828297, 'gamma': 0.009507431641432255}. Best is trial 2 with value: 0.3333333333333333.


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 63.2     |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.98     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 11247    |
|    time_elapsed     | 0        |
|    total_timesteps  | 253      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 64.2     |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.959    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 13046    |
|    time_elapsed     | 0        |
|    total_timesteps  | 514      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 62.8     |
|    ep_rew_mean      | 0.75  



----------------------------------
| rollout/            |          |
|    ep_len_mean      | 85.5     |
|    ep_rew_mean      | 0.477    |
|    exploration_rate | 0.702    |
| time/               |          |
|    episodes         | 44       |
|    fps              | 17832    |
|    time_elapsed     | 0        |
|    total_timesteps  | 3762     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 88.4     |
|    ep_rew_mean      | 0.479    |
|    exploration_rate | 0.664    |
| time/               |          |
|    episodes         | 48       |
|    fps              | 18056    |
|    time_elapsed     | 0        |
|    total_timesteps  | 4242     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 91.4     |
|    ep_rew_mean      | 0.442    |
|    exploration_rate | 0.624    |
| time/               |          |
|    episodes       

[I 2023-11-05 22:48:13,744] Trial 10 pruned. 


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 106      |
|    ep_rew_mean      | 0.5      |
|    exploration_rate | 0.966    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 17817    |
|    time_elapsed     | 0        |
|    total_timesteps  | 424      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 82       |
|    ep_rew_mean      | 0.625    |
|    exploration_rate | 0.948    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 18179    |
|    time_elapsed     | 0        |
|    total_timesteps  | 656      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 77.7     |
|    ep_rew_mean      | 0.583 



----------------------------------
| rollout/            |          |
|    ep_len_mean      | 72.8     |
|    ep_rew_mean      | 0.615    |
|    exploration_rate | 0.7      |
| time/               |          |
|    episodes         | 52       |
|    fps              | 18595    |
|    time_elapsed     | 0        |
|    total_timesteps  | 3786     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 75.2     |
|    ep_rew_mean      | 0.589    |
|    exploration_rate | 0.667    |
| time/               |          |
|    episodes         | 56       |
|    fps              | 18628    |
|    time_elapsed     | 0        |
|    total_timesteps  | 4210     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 77.8     |
|    ep_rew_mean      | 0.567    |
|    exploration_rate | 0.63     |
| time/               |          |
|    episodes       

[I 2023-11-05 22:49:15,845] Trial 11 finished with value: 0.0 and parameters: {'learning_rate': 0.00010562302392289465, 'gamma': 0.00016899290372743368}. Best is trial 2 with value: 0.3333333333333333.


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 82.5     |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.974    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 16667    |
|    time_elapsed     | 0        |
|    total_timesteps  | 330      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 95.1     |
|    ep_rew_mean      | 0.5      |
|    exploration_rate | 0.94     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 18174    |
|    time_elapsed     | 0        |
|    total_timesteps  | 761      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 84.3     |
|    ep_rew_mean      | 0.583 



----------------------------------
| rollout/            |          |
|    ep_len_mean      | 102      |
|    ep_rew_mean      | 0.45     |
|    exploration_rate | 0.678    |
| time/               |          |
|    episodes         | 40       |
|    fps              | 18883    |
|    time_elapsed     | 0        |
|    total_timesteps  | 4063     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 98.6     |
|    ep_rew_mean      | 0.477    |
|    exploration_rate | 0.657    |
| time/               |          |
|    episodes         | 44       |
|    fps              | 18811    |
|    time_elapsed     | 0        |
|    total_timesteps  | 4337     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 97       |
|    ep_rew_mean      | 0.479    |
|    exploration_rate | 0.632    |
| time/               |          |
|    episodes       

[I 2023-11-05 22:49:26,944] Trial 12 pruned. 


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 99.5     |
|    ep_rew_mean      | 0.25     |
|    exploration_rate | 0.968    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 18661    |
|    time_elapsed     | 0        |
|    total_timesteps  | 398      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 72.2     |
|    ep_rew_mean      | 0.5      |
|    exploration_rate | 0.954    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 17463    |
|    time_elapsed     | 0        |
|    total_timesteps  | 578      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 70.7     |
|    ep_rew_mean      | 0.583 



----------------------------------
| rollout/            |          |
|    ep_len_mean      | 84.8     |
|    ep_rew_mean      | 0.5      |
|    exploration_rate | 0.731    |
| time/               |          |
|    episodes         | 40       |
|    fps              | 17449    |
|    time_elapsed     | 0        |
|    total_timesteps  | 3392     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 85.9     |
|    ep_rew_mean      | 0.5      |
|    exploration_rate | 0.701    |
| time/               |          |
|    episodes         | 44       |
|    fps              | 17505    |
|    time_elapsed     | 0        |
|    total_timesteps  | 3780     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 86.8     |
|    ep_rew_mean      | 0.479    |
|    exploration_rate | 0.67     |
| time/               |          |
|    episodes       

[I 2023-11-05 22:49:37,814] Trial 13 pruned. 


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 99.2     |
|    ep_rew_mean      | 0.25     |
|    exploration_rate | 0.969    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 15344    |
|    time_elapsed     | 0        |
|    total_timesteps  | 397      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 97.1     |
|    ep_rew_mean      | 0.375    |
|    exploration_rate | 0.938    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 16608    |
|    time_elapsed     | 0        |
|    total_timesteps  | 777      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 91.4     |
|    ep_rew_mean      | 0.417 



----------------------------------
| rollout/            |          |
|    ep_len_mean      | 85.6     |
|    ep_rew_mean      | 0.455    |
|    exploration_rate | 0.702    |
| time/               |          |
|    episodes         | 44       |
|    fps              | 18587    |
|    time_elapsed     | 0        |
|    total_timesteps  | 3765     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 84.5     |
|    ep_rew_mean      | 0.458    |
|    exploration_rate | 0.679    |
| time/               |          |
|    episodes         | 48       |
|    fps              | 18542    |
|    time_elapsed     | 0        |
|    total_timesteps  | 4054     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 87.8     |
|    ep_rew_mean      | 0.423    |
|    exploration_rate | 0.639    |
| time/               |          |
|    episodes       

[I 2023-11-05 22:49:48,965] Trial 14 pruned. 


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 55.2     |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.983    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 11825    |
|    time_elapsed     | 0        |
|    total_timesteps  | 221      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 60.8     |
|    ep_rew_mean      | 0.625    |
|    exploration_rate | 0.962    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 12677    |
|    time_elapsed     | 0        |
|    total_timesteps  | 486      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 63.8     |
|    ep_rew_mean      | 0.583 

[I 2023-11-05 22:50:51,033] Trial 15 finished with value: 0.0 and parameters: {'learning_rate': 0.00039287493180241206, 'gamma': 0.00010261269962331702}. Best is trial 2 with value: 0.3333333333333333.


Number of finished trials:  16
Best trial:
  Value:  0.3333333333333333
  Params: 
    learning_rate: 0.0001427505728460859
    gamma: 0.0036527688275634334
  User attrs:
    gamma: 0.9963472311724366


### Save tuned hyperparameters

In [3]:
from lib.utils import *

if DEFAULT_HYPERPARAMS.get('env', None):
    del DEFAULT_HYPERPARAMS['env'] # remove env object from being saved as value in YAML file
    
save_model_params(trial, MODEL_NAME, SAVE_PARAMS_PATH, DEFAULT_HYPERPARAMS)