<a href="https://colab.research.google.com/github/i-ganza007/PacMan_Formative/blob/main/CNNPOLICY_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip uninstall -y gymnasium stable-baselines3 ale-py shimmy tensorboard protobuf -y
!rm -rf /usr/local/lib/python3.11/dist-packages/gymnasium*
!rm -rf ~/.cache/pip
!pip cache purge

!pip install --no-cache-dir \
    "protobuf==3.20.3" \
    "tensorboard==2.14.0" \
    "gymnasium[atari,accept-rom-license]==0.29.1" \
    "ale-py==0.8.1" \
    "shimmy==0.2.1" \
    "stable-baselines3==2.3.0" \
    "autorom[accept-rom-license]"

!python -c "import gymnasium.vector; print(dir(gymnasium.vector))"

In [None]:
import os
import gc
import torch
import warnings
import numpy as np
import gymnasium as gym
import ale_py
from datetime import datetime
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
warnings.filterwarnings('ignore')
gym.register_envs(ale_py)
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CallbackList
from gymnasium.wrappers import GrayScaleObservation, ResizeObservation, TransformReward

torch.cuda.empty_cache()
gc.collect()

CONFIG_ID = 1
configs = [
    {"lr": 2e-4, "gamma": 0.999, "batch": 32,
     "epsilon_start": 1.0, "epsilon_end": 0.05, "epsilon_decay": 0.3,
     "buffer": 20_000, "img_size": 64, "stack": 1, "net_arch": [128, 128]},

    {"lr": 1e-4, "gamma": 0.99, "batch": 32,
     "epsilon_start": 1.0, "epsilon_end": 0.02, "epsilon_decay": 0.25,
     "buffer": 20_000, "img_size": 64, "stack": 1, "net_arch": [128, 128]},

    {"lr": 3e-4, "gamma": 0.999, "batch": 64,
     "epsilon_start": 1.0, "epsilon_end": 0.1, "epsilon_decay": 0.2,
     "buffer": 25_000, "img_size": 64, "stack": 1, "net_arch": [128, 128]},

    {"lr": 2e-4, "gamma": 0.95, "batch": 16,
     "epsilon_start": 1.0, "epsilon_end": 0.01, "epsilon_decay": 0.4,
     "buffer": 15_000, "img_size": 64, "stack": 1, "net_arch": [64, 64]},

    {"lr": 1.5e-4, "gamma": 0.995, "batch": 32,
     "epsilon_start": 1.0, "epsilon_end": 0.05, "epsilon_decay": 0.25,
     "buffer": 20_000, "img_size": 64, "stack": 1, "net_arch": [128, 128]},
]

cfg = configs[CONFIG_ID - 1]

In [None]:
print(f" PACMAN DQN TRAINING - CONFIG {CONFIG_ID}")

for k, v in cfg.items():
    if k == "epsilon_decay":
        print(f"{k:18}: {v:.1%} of timesteps")
    elif k == "net_arch":
        print(f"{k:18}: {v}")
    else:
        print(f"{k:18}: {v}")
print(f"{'GPU':18}: {torch.cuda.get_device_name(0)}")
print(f"{'VRAM Total':18}: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")


 PACMAN DQN TRAINING - CONFIG 1
lr                : 0.0002
gamma             : 0.999
batch             : 32
epsilon_start     : 1.0
epsilon_end       : 0.05
epsilon_decay     : 30.0% of timesteps
buffer            : 20000
img_size          : 64
stack             : 1
net_arch          : [128, 128]
GPU               : Tesla P100-PCIE-16GB
VRAM Total        : 17.1 GB


In [None]:
def make_base_env(render_mode=None):
    env = gym.make("ALE/Pacman-v5", frameskip=4, render_mode=render_mode)
    env = GrayScaleObservation(env)
    env = ResizeObservation(env, (cfg["img_size"], cfg["img_size"]))
    env = TransformReward(env, lambda r: np.clip(r, -1.0, 1.0))
    return env

train_raw = make_base_env()
train_raw = Monitor(train_raw, filename=f"./logs/episode_logs/cfg{CONFIG_ID}.monitor.csv", allow_early_resets=True)
train_env = DummyVecEnv([lambda: train_raw])
if cfg["stack"] > 1:
    train_env = VecFrameStack(train_env, n_stack=cfg["stack"])

eval_raw = make_base_env()
eval_env = DummyVecEnv([lambda: eval_raw])
if cfg["stack"] > 1:
    eval_env = VecFrameStack(eval_env, n_stack=cfg["stack"])

A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


In [None]:
model = DQN(
    "MlpPolicy",
    train_env,
    learning_rate=cfg["lr"],
    gamma=cfg["gamma"],
    batch_size=cfg["batch"],
    buffer_size=cfg["buffer"],
    learning_starts=5_000,
    target_update_interval=1_000,
    train_freq=4,
    gradient_steps=1,
    exploration_initial_eps=cfg["epsilon_start"],
    exploration_final_eps=cfg["epsilon_end"],
    exploration_fraction=cfg["epsilon_decay"],
    policy_kwargs=dict(net_arch=cfg["net_arch"]),
    tensorboard_log="./logs/tensorboard/",
    device="cuda",
    verbose=1
)

print(f"Initial VRAM Usage: {torch.cuda.memory_allocated() / 1e9:.2f} GB\n")

Using cuda device
Initial VRAM Usage: 0.00 GB



In [None]:
class MemoryCallback(BaseCallback):
    def __init__(self, check_freq=10_000):
        super().__init__()
        self.check_freq = check_freq
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            alloc = torch.cuda.memory_allocated() / 1e9
            reserved = torch.cuda.memory_reserved() / 1e9
            peak = torch.cuda.max_memory_allocated() / 1e9
            print(f"Step {self.n_calls:,} | VRAM: {alloc:.2f}G alloc | {reserved:.2f}G res | {peak:.2f}G peak")
        return True

eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=f"./logs/best_models/cfg{CONFIG_ID}/",
    log_path=f"./logs/eval_logs/cfg{CONFIG_ID}/",
    eval_freq=10_000,
    n_eval_episodes=5,
    deterministic=False,
    render=False,
    verbose=1
)

callback = CallbackList([MemoryCallback(check_freq=10_000), eval_callback])

total_timesteps = 1_500_000
print(f"Starting training for {total_timesteps:,} timesteps...")
print(" Logs:")
print(f" • Episode CSV → ./logs/episode_logs/cfg{CONFIG_ID}.monitor.csv")
print(f" • TensorBoard → tensorboard --logdir ./logs/tensorboard/")
print(f" • Eval Results → ./logs/eval_logs/cfg{CONFIG_ID}/")
print(f" • Best Model → ./logs/best_models/cfg{CONFIG_ID}/\n")

model.learn(
    total_timesteps=total_timesteps,
    tb_log_name=f"cfg{CONFIG_ID}_p100",
    log_interval=1,
    callback=callback,
    progress_bar=True,
    reset_num_timesteps=True
)

print("\nEvaluating final policy...")
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=15, deterministic=False)  # ← FIXED

model.save(f"dqn_pacman_p100_cfg{CONFIG_ID}")
peak_vram = torch.cuda.max_memory_allocated() / 1e9

print(" TRAINING COMPLETE")
print(f" Config ID : {CONFIG_ID}")
print(f" Mean Reward : {mean_reward:.1f} ± {std_reward:.1f}")
print(f" Peak VRAM : {peak_vram:.2f} GB")
print(f" Model Saved : dqn_pacman_p100_cfg{CONFIG_ID}.zip")
print(f" TensorBoard : tensorboard --logdir ./logs/tensorboard/")

print("\nStarting LIVE RENDER – Close window to stop...")
render_env = make_base_env(render_mode="human")
obs, _ = render_env.reset()
for _ in range(2000):
    action, _ = model.predict(obs, deterministic=False)
    obs, reward, terminated, truncated, info = render_env.step(action)
    render_env.render()
    if terminated or truncated:
        obs, _ = render_env.reset()
render_env.close()

torch.cuda.empty_cache()
gc.collect()
train_env.close()
eval_env.close()
print("\nCleanup complete. Ready for next run!\n")

Starting training for 1,500,000 timesteps...
 Logs:
 • Episode CSV → ./logs/episode_logs/cfg1.monitor.csv
 • TensorBoard → tensorboard --logdir ./logs/tensorboard/
 • Eval Results → ./logs/eval_logs/cfg1/
 • Best Model → ./logs/best_models/cfg1/

Logging to ./logs/tensorboard/cfg1_p100_2


Output()

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 300      |
|    ep_rew_mean      | 7        |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 1        |
|    fps              | 685      |
|    time_elapsed     | 0        |
|    total_timesteps  | 300      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 449      |
|    ep_rew_mean      | 17.5     |
|    exploration_rate | 0.998    |
| time/               |          |
|    episodes         | 2        |
|    fps              | 879      |
|    time_elapsed     | 1        |
|    total_timesteps  | 898      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 411      |
|    ep_rew_mean      | 15.7     |
|    exploration_rate | 0.997    |
| time/               |          |
|    episodes       

----------------------------------
| eval/               |          |
|    mean_ep_length   | 449      |
|    mean_reward      | 18.8     |
| rollout/            |          |
|    exploration_rate | 0.979    |
| time/               |          |
|    total_timesteps  | 10000    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.172    |
|    n_updates        | 1249     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 427      |
|    ep_rew_mean      | 14.6     |
|    exploration_rate | 0.978    |
| time/               |          |
|    episodes         | 24       |
|    fps              | 628      |
|    time_elapsed     | 16       |
|    total_timesteps  | 10254    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.115    |
|    n_updates        | 1313     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 437      |
|    ep_rew_mean      | 14.8     |
|    exploration_rate | 0.977    |
| time/               |          |
|    episodes         | 25       |
|    fps              | 627      |
|    time_elapsed     | 17       |
|    total_timesteps  | 10928    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.029    |
|    n_updates      

----------------------------------
| eval/               |          |
|    mean_ep_length   | 442      |
|    mean_reward      | 15       |
| rollout/            |          |
|    exploration_rate | 0.958    |
| time/               |          |
|    total_timesteps  | 20000    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0385   |
|    n_updates        | 3749     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 430      |
|    ep_rew_mean      | 14.1     |
|    exploration_rate | 0.957    |
| time/               |          |
|    episodes         | 47       |
|    fps              | 578      |
|    time_elapsed     | 34       |
|    total_timesteps  | 20226    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0154   |
|    n_updates        | 3806     |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 471      |
|    mean_reward      | 15.2     |
| rollout/            |          |
|    exploration_rate | 0.937    |
| time/               |          |
|    total_timesteps  | 30000    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0435   |
|    n_updates        | 6249     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 437      |
|    ep_rew_mean      | 14.5     |
|    exploration_rate | 0.936    |
| time/               |          |
|    episodes         | 69       |
|    fps              | 561      |
|    time_elapsed     | 53       |
|    total_timesteps  | 30120    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0149   |
|    n_updates        | 6279     |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 413      |
|    mean_reward      | 14.2     |
| rollout/            |          |
|    exploration_rate | 0.916    |
| time/               |          |
|    total_timesteps  | 40000    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.00852  |
|    n_updates        | 8749     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 431      |
|    ep_rew_mean      | 14.2     |
|    exploration_rate | 0.915    |
| time/               |          |
|    episodes         | 93       |
|    fps              | 552      |
|    time_elapsed     | 72       |
|    total_timesteps  | 40064    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0184   |
|    n_updates        | 8765     |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 417      |
|    mean_reward      | 14.4     |
| rollout/            |          |
|    exploration_rate | 0.894    |
| time/               |          |
|    total_timesteps  | 50000    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0731   |
|    n_updates        | 11249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 433      |
|    ep_rew_mean      | 14.6     |
|    exploration_rate | 0.894    |
| time/               |          |
|    episodes         | 116      |
|    fps              | 547      |
|    time_elapsed     | 91       |
|    total_timesteps  | 50099    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0284   |
|    n_updates        | 11274    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 384      |
|    mean_reward      | 14       |
| rollout/            |          |
|    exploration_rate | 0.873    |
| time/               |          |
|    total_timesteps  | 60000    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0301   |
|    n_updates        | 13749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 430      |
|    ep_rew_mean      | 14.9     |
|    exploration_rate | 0.873    |
| time/               |          |
|    episodes         | 140      |
|    fps              | 545      |
|    time_elapsed     | 110      |
|    total_timesteps  | 60083    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0543   |
|    n_updates        | 13770    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 413      |
|    mean_reward      | 15.6     |
| rollout/            |          |
|    exploration_rate | 0.852    |
| time/               |          |
|    total_timesteps  | 70000    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0206   |
|    n_updates        | 16249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 423      |
|    ep_rew_mean      | 14.7     |
|    exploration_rate | 0.852    |
| time/               |          |
|    episodes         | 164      |
|    fps              | 540      |
|    time_elapsed     | 129      |
|    total_timesteps  | 70193    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0346   |
|    n_updates        | 16298    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 425      |
|    mean_reward      | 13.4     |
| rollout/            |          |
|    exploration_rate | 0.831    |
| time/               |          |
|    total_timesteps  | 80000    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0318   |
|    n_updates        | 18749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 428      |
|    ep_rew_mean      | 15       |
|    exploration_rate | 0.83     |
| time/               |          |
|    episodes         | 187      |
|    fps              | 534      |
|    time_elapsed     | 150      |
|    total_timesteps  | 80365    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0289   |
|    n_updates        | 18841    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 401      |
|    mean_reward      | 12.8     |
| rollout/            |          |
|    exploration_rate | 0.81     |
| time/               |          |
|    total_timesteps  | 90000    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0328   |
|    n_updates        | 21249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 422      |
|    ep_rew_mean      | 14.6     |
|    exploration_rate | 0.809    |
| time/               |          |
|    episodes         | 211      |
|    fps              | 531      |
|    time_elapsed     | 169      |
|    total_timesteps  | 90287    |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0421   |
|    n_updates        | 21321    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 483      |
|    mean_reward      | 19.8     |
| rollout/            |          |
|    exploration_rate | 0.789    |
| time/               |          |
|    total_timesteps  | 100000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0148   |
|    n_updates        | 23749    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 426      |
|    ep_rew_mean      | 14.6     |
|    exploration_rate | 0.788    |
| time/               |          |
|    episodes         | 234      |
|    fps              | 527      |
|    time_elapsed     | 190      |
|    total_timesteps  | 100281   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.00744  |
|    n_updates        | 23820    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 426      |
|    ep_rew_mean      | 14.7     |
|    exploration_rate | 0.787    |
| time/               |          |
|    episodes         | 235      |
|    fps              | 527      |
|    time_elapsed     | 190      |
|    total_timesteps  | 100737   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0231   |
|    n_updates      

----------------------------------
| eval/               |          |
|    mean_ep_length   | 441      |
|    mean_reward      | 15.8     |
| rollout/            |          |
|    exploration_rate | 0.768    |
| time/               |          |
|    total_timesteps  | 110000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0448   |
|    n_updates        | 26249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 462      |
|    ep_rew_mean      | 15.8     |
|    exploration_rate | 0.767    |
| time/               |          |
|    episodes         | 250      |
|    fps              | 525      |
|    time_elapsed     | 209      |
|    total_timesteps  | 110345   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0463   |
|    n_updates        | 26336    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 373      |
|    mean_reward      | 14.8     |
| rollout/            |          |
|    exploration_rate | 0.747    |
| time/               |          |
|    total_timesteps  | 120000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0512   |
|    n_updates        | 28749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 464      |
|    ep_rew_mean      | 16.4     |
|    exploration_rate | 0.747    |
| time/               |          |
|    episodes         | 272      |
|    fps              | 524      |
|    time_elapsed     | 228      |
|    total_timesteps  | 120063   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0808   |
|    n_updates        | 28765    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 613      |
|    mean_reward      | 21.8     |
| rollout/            |          |
|    exploration_rate | 0.726    |
| time/               |          |
|    total_timesteps  | 130000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0162   |
|    n_updates        | 31249    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 471      |
|    ep_rew_mean      | 17.4     |
|    exploration_rate | 0.726    |
| time/               |          |
|    episodes         | 293      |
|    fps              | 520      |
|    time_elapsed     | 249      |
|    total_timesteps  | 130021   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0637   |
|    n_updates        | 31255    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 472      |
|    ep_rew_mean      | 17.5     |
|    exploration_rate | 0.725    |
| time/               |          |
|    episodes         | 294      |
|    fps              | 520      |
|    time_elapsed     | 250      |
|    total_timesteps  | 130439   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0195   |
|    n_updates      

----------------------------------
| eval/               |          |
|    mean_ep_length   | 453      |
|    mean_reward      | 14.4     |
| rollout/            |          |
|    exploration_rate | 0.704    |
| time/               |          |
|    total_timesteps  | 140000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0266   |
|    n_updates        | 33749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 480      |
|    ep_rew_mean      | 18.2     |
|    exploration_rate | 0.703    |
| time/               |          |
|    episodes         | 316      |
|    fps              | 519      |
|    time_elapsed     | 270      |
|    total_timesteps  | 140605   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0131   |
|    n_updates        | 33901    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 445      |
|    mean_reward      | 17.4     |
| rollout/            |          |
|    exploration_rate | 0.683    |
| time/               |          |
|    total_timesteps  | 150000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0933   |
|    n_updates        | 36249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 485      |
|    ep_rew_mean      | 18.7     |
|    exploration_rate | 0.683    |
| time/               |          |
|    episodes         | 337      |
|    fps              | 517      |
|    time_elapsed     | 290      |
|    total_timesteps  | 150129   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0979   |
|    n_updates        | 36282    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 403      |
|    mean_reward      | 13.8     |
| rollout/            |          |
|    exploration_rate | 0.662    |
| time/               |          |
|    total_timesteps  | 160000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0627   |
|    n_updates        | 38749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 453      |
|    ep_rew_mean      | 17.7     |
|    exploration_rate | 0.662    |
| time/               |          |
|    episodes         | 360      |
|    fps              | 515      |
|    time_elapsed     | 310      |
|    total_timesteps  | 160291   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.988    |
|    n_updates        | 38822    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 357      |
|    mean_reward      | 10.6     |
| rollout/            |          |
|    exploration_rate | 0.641    |
| time/               |          |
|    total_timesteps  | 170000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0603   |
|    n_updates        | 41249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 451      |
|    ep_rew_mean      | 17.1     |
|    exploration_rate | 0.641    |
| time/               |          |
|    episodes         | 383      |
|    fps              | 514      |
|    time_elapsed     | 331      |
|    total_timesteps  | 170247   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0692   |
|    n_updates        | 41311    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 507      |
|    mean_reward      | 15.4     |
| rollout/            |          |
|    exploration_rate | 0.62     |
| time/               |          |
|    total_timesteps  | 180000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0776   |
|    n_updates        | 43749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 447      |
|    ep_rew_mean      | 16.9     |
|    exploration_rate | 0.62     |
| time/               |          |
|    episodes         | 405      |
|    fps              | 511      |
|    time_elapsed     | 351      |
|    total_timesteps  | 180073   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0522   |
|    n_updates        | 43768    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 470      |
|    mean_reward      | 19.4     |
| rollout/            |          |
|    exploration_rate | 0.599    |
| time/               |          |
|    total_timesteps  | 190000   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 451      |
|    ep_rew_mean      | 17.2     |
|    exploration_rate | 0.598    |
| time/               |          |
|    episodes         | 426      |
|    fps              | 510      |
|    time_elapsed     | 373      |
|    total_timesteps  | 190336   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0698   |
|    n_updates        | 46333    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 452      |
|    ep_rew_mean      | 17.3     |
|    exploration_rat

----------------------------------
| eval/               |          |
|    mean_ep_length   | 536      |
|    mean_reward      | 19.2     |
| rollout/            |          |
|    exploration_rate | 0.578    |
| time/               |          |
|    total_timesteps  | 200000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0401   |
|    n_updates        | 48749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 445      |
|    ep_rew_mean      | 17.3     |
|    exploration_rate | 0.577    |
| time/               |          |
|    episodes         | 449      |
|    fps              | 507      |
|    time_elapsed     | 394      |
|    total_timesteps  | 200186   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0878   |
|    n_updates        | 48796    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 863      |
|    mean_reward      | 23.4     |
| rollout/            |          |
|    exploration_rate | 0.557    |
| time/               |          |
|    total_timesteps  | 210000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0937   |
|    n_updates        | 51249    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 450      |
|    ep_rew_mean      | 18.1     |
|    exploration_rate | 0.556    |
| time/               |          |
|    episodes         | 471      |
|    fps              | 503      |
|    time_elapsed     | 417      |
|    total_timesteps  | 210098   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0584   |
|    n_updates        | 51274    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 450      |
|    ep_rew_mean      | 18.1     |
|    exploration_rate | 0.556    |
| time/               |          |
|    episodes         | 472      |
|    fps              | 503      |
|    time_elapsed     | 417      |
|    total_timesteps  | 210492   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0517   |
|    n_updates      

----------------------------------
| eval/               |          |
|    mean_ep_length   | 6e+03    |
|    mean_reward      | 24.6     |
| rollout/            |          |
|    exploration_rate | 0.536    |
| time/               |          |
|    total_timesteps  | 220000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0295   |
|    n_updates        | 53749    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 460      |
|    ep_rew_mean      | 18.9     |
|    exploration_rate | 0.535    |
| time/               |          |
|    episodes         | 492      |
|    fps              | 469      |
|    time_elapsed     | 469      |
|    total_timesteps  | 220416   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.022    |
|    n_updates        | 53853    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 462      |
|    ep_rew_mean      | 19.1     |
|    exploration_rate | 0.533    |
| time/               |          |
|    episodes         | 493      |
|    fps              | 470      |
|    time_elapsed     | 470      |
|    total_timesteps  | 221024   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.167    |
|    n_updates      

----------------------------------
| eval/               |          |
|    mean_ep_length   | 471      |
|    mean_reward      | 23.8     |
| rollout/            |          |
|    exploration_rate | 0.514    |
| time/               |          |
|    total_timesteps  | 230000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 1.34     |
|    n_updates        | 56249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 464      |
|    ep_rew_mean      | 19.7     |
|    exploration_rate | 0.513    |
| time/               |          |
|    episodes         | 513      |
|    fps              | 469      |
|    time_elapsed     | 490      |
|    total_timesteps  | 230520   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0572   |
|    n_updates        | 56379    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 386      |
|    mean_reward      | 12.2     |
| rollout/            |          |
|    exploration_rate | 0.493    |
| time/               |          |
|    total_timesteps  | 240000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0312   |
|    n_updates        | 58749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 459      |
|    ep_rew_mean      | 19.2     |
|    exploration_rate | 0.492    |
| time/               |          |
|    episodes         | 535      |
|    fps              | 469      |
|    time_elapsed     | 511      |
|    total_timesteps  | 240412   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0606   |
|    n_updates        | 58852    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 555      |
|    mean_reward      | 23.6     |
| rollout/            |          |
|    exploration_rate | 0.472    |
| time/               |          |
|    total_timesteps  | 250000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0312   |
|    n_updates        | 61249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 459      |
|    ep_rew_mean      | 19.4     |
|    exploration_rate | 0.472    |
| time/               |          |
|    episodes         | 558      |
|    fps              | 469      |
|    time_elapsed     | 533      |
|    total_timesteps  | 250130   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0411   |
|    n_updates        | 61282    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 741      |
|    mean_reward      | 19       |
| rollout/            |          |
|    exploration_rate | 0.451    |
| time/               |          |
|    total_timesteps  | 260000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0358   |
|    n_updates        | 63749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 467      |
|    ep_rew_mean      | 20.3     |
|    exploration_rate | 0.45     |
| time/               |          |
|    episodes         | 580      |
|    fps              | 467      |
|    time_elapsed     | 556      |
|    total_timesteps  | 260302   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0192   |
|    n_updates        | 63825    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 528      |
|    mean_reward      | 22       |
| rollout/            |          |
|    exploration_rate | 0.43     |
| time/               |          |
|    total_timesteps  | 270000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0543   |
|    n_updates        | 66249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 447      |
|    ep_rew_mean      | 19.8     |
|    exploration_rate | 0.43     |
| time/               |          |
|    episodes         | 602      |
|    fps              | 466      |
|    time_elapsed     | 578      |
|    total_timesteps  | 270070   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0933   |
|    n_updates        | 66267    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 548      |
|    mean_reward      | 21.2     |
| rollout/            |          |
|    exploration_rate | 0.409    |
| time/               |          |
|    total_timesteps  | 280000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0489   |
|    n_updates        | 68749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 446      |
|    ep_rew_mean      | 19.9     |
|    exploration_rate | 0.408    |
| time/               |          |
|    episodes         | 625      |
|    fps              | 466      |
|    time_elapsed     | 601      |
|    total_timesteps  | 280367   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0435   |
|    n_updates        | 68841    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 739      |
|    mean_reward      | 18.2     |
| rollout/            |          |
|    exploration_rate | 0.388    |
| time/               |          |
|    total_timesteps  | 290000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0579   |
|    n_updates        | 71249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 447      |
|    ep_rew_mean      | 20.4     |
|    exploration_rate | 0.387    |
| time/               |          |
|    episodes         | 647      |
|    fps              | 464      |
|    time_elapsed     | 624      |
|    total_timesteps  | 290229   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0585   |
|    n_updates        | 71307    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 380      |
|    mean_reward      | 11.8     |
| rollout/            |          |
|    exploration_rate | 0.367    |
| time/               |          |
|    total_timesteps  | 300000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.109    |
|    n_updates        | 73749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 456      |
|    ep_rew_mean      | 20.5     |
|    exploration_rate | 0.367    |
| time/               |          |
|    episodes         | 667      |
|    fps              | 464      |
|    time_elapsed     | 646      |
|    total_timesteps  | 300073   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0304   |
|    n_updates        | 73768    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 5.85e+03 |
|    mean_reward      | 18.4     |
| rollout/            |          |
|    exploration_rate | 0.346    |
| time/               |          |
|    total_timesteps  | 310000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0384   |
|    n_updates        | 76249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 455      |
|    ep_rew_mean      | 20.7     |
|    exploration_rate | 0.345    |
| time/               |          |
|    episodes         | 690      |
|    fps              | 442      |
|    time_elapsed     | 701      |
|    total_timesteps  | 310350   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0325   |
|    n_updates        | 76337    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 430      |
|    mean_reward      | 16.4     |
| rollout/            |          |
|    exploration_rate | 0.324    |
| time/               |          |
|    total_timesteps  | 320000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.249    |
|    n_updates        | 78749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 459      |
|    ep_rew_mean      | 21       |
|    exploration_rate | 0.324    |
| time/               |          |
|    episodes         | 711      |
|    fps              | 442      |
|    time_elapsed     | 723      |
|    total_timesteps  | 320070   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0713   |
|    n_updates        | 78767    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 706      |
|    mean_reward      | 19.6     |
| rollout/            |          |
|    exploration_rate | 0.303    |
| time/               |          |
|    total_timesteps  | 330000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.166    |
|    n_updates        | 81249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 455      |
|    ep_rew_mean      | 21.3     |
|    exploration_rate | 0.303    |
| time/               |          |
|    episodes         | 734      |
|    fps              | 441      |
|    time_elapsed     | 747      |
|    total_timesteps  | 330158   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0507   |
|    n_updates        | 81289    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 415      |
|    mean_reward      | 18.4     |
| rollout/            |          |
|    exploration_rate | 0.282    |
| time/               |          |
|    total_timesteps  | 340000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0148   |
|    n_updates        | 83749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 463      |
|    ep_rew_mean      | 21.6     |
|    exploration_rate | 0.282    |
| time/               |          |
|    episodes         | 756      |
|    fps              | 441      |
|    time_elapsed     | 769      |
|    total_timesteps  | 340170   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0777   |
|    n_updates        | 83792    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 416      |
|    mean_reward      | 13.2     |
| rollout/            |          |
|    exploration_rate | 0.261    |
| time/               |          |
|    total_timesteps  | 350000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.199    |
|    n_updates        | 86249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 450      |
|    ep_rew_mean      | 20.8     |
|    exploration_rate | 0.26     |
| time/               |          |
|    episodes         | 779      |
|    fps              | 441      |
|    time_elapsed     | 792      |
|    total_timesteps  | 350338   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0811   |
|    n_updates        | 86334    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 391      |
|    mean_reward      | 10.8     |
| rollout/            |          |
|    exploration_rate | 0.24     |
| time/               |          |
|    total_timesteps  | 360000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0479   |
|    n_updates        | 88749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 442      |
|    ep_rew_mean      | 20.1     |
|    exploration_rate | 0.24     |
| time/               |          |
|    episodes         | 802      |
|    fps              | 442      |
|    time_elapsed     | 814      |
|    total_timesteps  | 360210   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.105    |
|    n_updates        | 88802    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 591      |
|    mean_reward      | 17.8     |
| rollout/            |          |
|    exploration_rate | 0.219    |
| time/               |          |
|    total_timesteps  | 370000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 1.88     |
|    n_updates        | 91249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 437      |
|    ep_rew_mean      | 19.9     |
|    exploration_rate | 0.218    |
| time/               |          |
|    episodes         | 825      |
|    fps              | 441      |
|    time_elapsed     | 838      |
|    total_timesteps  | 370200   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0405   |
|    n_updates        | 91299    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 528      |
|    mean_reward      | 17.6     |
| rollout/            |          |
|    exploration_rate | 0.198    |
| time/               |          |
|    total_timesteps  | 380000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.272    |
|    n_updates        | 93749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 439      |
|    ep_rew_mean      | 19.9     |
|    exploration_rate | 0.198    |
| time/               |          |
|    episodes         | 847      |
|    fps              | 440      |
|    time_elapsed     | 862      |
|    total_timesteps  | 380047   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0546   |
|    n_updates        | 93761    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 494      |
|    mean_reward      | 15.8     |
| rollout/            |          |
|    exploration_rate | 0.177    |
| time/               |          |
|    total_timesteps  | 390000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.264    |
|    n_updates        | 96249    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 442      |
|    ep_rew_mean      | 19.9     |
|    exploration_rate | 0.176    |
| time/               |          |
|    episodes         | 870      |
|    fps              | 440      |
|    time_elapsed     | 886      |
|    total_timesteps  | 390237   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 1.87     |
|    n_updates        | 96309    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 535      |
|    mean_reward      | 14.2     |
| rollout/            |          |
|    exploration_rate | 0.156    |
| time/               |          |
|    total_timesteps  | 400000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.104    |
|    n_updates        | 98749    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 435      |
|    ep_rew_mean      | 19.4     |
|    exploration_rate | 0.155    |
| time/               |          |
|    episodes         | 894      |
|    fps              | 439      |
|    time_elapsed     | 910      |
|    total_timesteps  | 400221   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.449    |
|    n_updates        | 98805    |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 477      |
|    mean_reward      | 16.8     |
| rollout/            |          |
|    exploration_rate | 0.134    |
| time/               |          |
|    total_timesteps  | 410000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 3.99     |
|    n_updates        | 101249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 439      |
|    ep_rew_mean      | 19.5     |
|    exploration_rate | 0.134    |
| time/               |          |
|    episodes         | 916      |
|    fps              | 439      |
|    time_elapsed     | 933      |
|    total_timesteps  | 410157   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.14     |
|    n_updates        | 101289   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 364      |
|    mean_reward      | 9.6      |
| rollout/            |          |
|    exploration_rate | 0.113    |
| time/               |          |
|    total_timesteps  | 420000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0676   |
|    n_updates        | 103749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 438      |
|    ep_rew_mean      | 19.2     |
|    exploration_rate | 0.113    |
| time/               |          |
|    episodes         | 939      |
|    fps              | 438      |
|    time_elapsed     | 956      |
|    total_timesteps  | 420033   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0497   |
|    n_updates        | 103758   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 465      |
|    mean_reward      | 13.4     |
| rollout/            |          |
|    exploration_rate | 0.0922   |
| time/               |          |
|    total_timesteps  | 430000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0299   |
|    n_updates        | 106249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 444      |
|    ep_rew_mean      | 19.9     |
|    exploration_rate | 0.0919   |
| time/               |          |
|    episodes         | 960      |
|    fps              | 438      |
|    time_elapsed     | 981      |
|    total_timesteps  | 430153   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0715   |
|    n_updates        | 106288   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 478      |
|    mean_reward      | 12.6     |
| rollout/            |          |
|    exploration_rate | 0.0711   |
| time/               |          |
|    total_timesteps  | 440000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 2.05     |
|    n_updates        | 108749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 444      |
|    ep_rew_mean      | 19.4     |
|    exploration_rate | 0.0709   |
| time/               |          |
|    episodes         | 983      |
|    fps              | 437      |
|    time_elapsed     | 1005     |
|    total_timesteps  | 440097   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 2.32     |
|    n_updates        | 108774   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 408      |
|    mean_reward      | 5        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 450000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.141    |
|    n_updates        | 111249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 448      |
|    ep_rew_mean      | 19.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1006     |
|    fps              | 437      |
|    time_elapsed     | 1028     |
|    total_timesteps  | 450025   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0363   |
|    n_updates        | 111256   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 466      |
|    mean_reward      | 9.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 460000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.059    |
|    n_updates        | 113749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 435      |
|    ep_rew_mean      | 18       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1030     |
|    fps              | 436      |
|    time_elapsed     | 1053     |
|    total_timesteps  | 460033   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.249    |
|    n_updates        | 113758   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 329      |
|    mean_reward      | 5.4      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 470000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0391   |
|    n_updates        | 116249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 428      |
|    ep_rew_mean      | 17       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1054     |
|    fps              | 436      |
|    time_elapsed     | 1077     |
|    total_timesteps  | 470433   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0268   |
|    n_updates        | 116358   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 441      |
|    mean_reward      | 10.2     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 480000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 2.41     |
|    n_updates        | 118749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 425      |
|    ep_rew_mean      | 16.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1078     |
|    fps              | 436      |
|    time_elapsed     | 1101     |
|    total_timesteps  | 480313   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0285   |
|    n_updates        | 118828   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 506      |
|    mean_reward      | 12.2     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 490000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.422    |
|    n_updates        | 121249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 414      |
|    ep_rew_mean      | 15.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1103     |
|    fps              | 435      |
|    time_elapsed     | 1125     |
|    total_timesteps  | 490099   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0593   |
|    n_updates        | 121274   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 422      |
|    mean_reward      | 9        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 500000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.136    |
|    n_updates        | 123749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 420      |
|    ep_rew_mean      | 15.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1126     |
|    fps              | 435      |
|    time_elapsed     | 1149     |
|    total_timesteps  | 500182   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0174   |
|    n_updates        | 123795   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 436      |
|    mean_reward      | 11.6     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 510000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.215    |
|    n_updates        | 126249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 411      |
|    ep_rew_mean      | 14.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1151     |
|    fps              | 434      |
|    time_elapsed     | 1173     |
|    total_timesteps  | 510282   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0771   |
|    n_updates        | 126320   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 490      |
|    mean_reward      | 8.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 520000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0701   |
|    n_updates        | 128749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 421      |
|    ep_rew_mean      | 14.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1173     |
|    fps              | 434      |
|    time_elapsed     | 1197     |
|    total_timesteps  | 520240   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0245   |
|    n_updates        | 128809   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 436      |
|    mean_reward      | 6.4      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 530000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0274   |
|    n_updates        | 131249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 429      |
|    ep_rew_mean      | 15.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1196     |
|    fps              | 433      |
|    time_elapsed     | 1221     |
|    total_timesteps  | 530338   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.357    |
|    n_updates        | 131334   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 506      |
|    mean_reward      | 8        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 540000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.219    |
|    n_updates        | 133749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 435      |
|    ep_rew_mean      | 16       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1218     |
|    fps              | 433      |
|    time_elapsed     | 1245     |
|    total_timesteps  | 540076   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.15     |
|    n_updates        | 133768   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 462      |
|    mean_reward      | 6.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 550000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.358    |
|    n_updates        | 136249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 435      |
|    ep_rew_mean      | 15.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1242     |
|    fps              | 433      |
|    time_elapsed     | 1269     |
|    total_timesteps  | 550028   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 2.29     |
|    n_updates        | 136256   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 404      |
|    mean_reward      | 5.4      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 560000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0719   |
|    n_updates        | 138749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 429      |
|    ep_rew_mean      | 15.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1266     |
|    fps              | 432      |
|    time_elapsed     | 1293     |
|    total_timesteps  | 560142   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0123   |
|    n_updates        | 138785   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 496      |
|    mean_reward      | 9.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 570000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.105    |
|    n_updates        | 141249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 425      |
|    ep_rew_mean      | 15.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1290     |
|    fps              | 432      |
|    time_elapsed     | 1318     |
|    total_timesteps  | 570186   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 2.33     |
|    n_updates        | 141296   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 416      |
|    mean_reward      | 4.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 580000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.195    |
|    n_updates        | 143749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 419      |
|    ep_rew_mean      | 14.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1314     |
|    fps              | 432      |
|    time_elapsed     | 1342     |
|    total_timesteps  | 580334   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.1      |
|    n_updates        | 143833   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 487      |
|    mean_reward      | 7.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 590000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0911   |
|    n_updates        | 146249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 418      |
|    ep_rew_mean      | 14.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1338     |
|    fps              | 431      |
|    time_elapsed     | 1366     |
|    total_timesteps  | 590272   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.101    |
|    n_updates        | 146317   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 367      |
|    mean_reward      | 8        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 600000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.186    |
|    n_updates        | 148749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 419      |
|    ep_rew_mean      | 15       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1362     |
|    fps              | 431      |
|    time_elapsed     | 1390     |
|    total_timesteps  | 600352   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.101    |
|    n_updates        | 148837   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 420      |
|    mean_reward      | 4.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 610000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.131    |
|    n_updates        | 151249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 416      |
|    ep_rew_mean      | 14.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1386     |
|    fps              | 431      |
|    time_elapsed     | 1414     |
|    total_timesteps  | 610120   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0978   |
|    n_updates        | 151279   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 574      |
|    mean_reward      | 12.8     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 620000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.105    |
|    n_updates        | 153749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 417      |
|    ep_rew_mean      | 15.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1410     |
|    fps              | 430      |
|    time_elapsed     | 1440     |
|    total_timesteps  | 620214   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.143    |
|    n_updates        | 153803   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 430      |
|    mean_reward      | 8        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 630000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0444   |
|    n_updates        | 156249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 419      |
|    ep_rew_mean      | 15.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1433     |
|    fps              | 430      |
|    time_elapsed     | 1464     |
|    total_timesteps  | 630290   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0649   |
|    n_updates        | 156322   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 398      |
|    mean_reward      | 7.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 640000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.104    |
|    n_updates        | 158749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 420      |
|    ep_rew_mean      | 14.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1457     |
|    fps              | 430      |
|    time_elapsed     | 1488     |
|    total_timesteps  | 640230   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.191    |
|    n_updates        | 158807   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 500      |
|    mean_reward      | 14.8     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 650000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 2.47     |
|    n_updates        | 161249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 427      |
|    ep_rew_mean      | 15.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1480     |
|    fps              | 429      |
|    time_elapsed     | 1513     |
|    total_timesteps  | 650439   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0794   |
|    n_updates        | 161359   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 488      |
|    mean_reward      | 9.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 660000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.109    |
|    n_updates        | 163749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 427      |
|    ep_rew_mean      | 14.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1503     |
|    fps              | 429      |
|    time_elapsed     | 1537     |
|    total_timesteps  | 660205   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.106    |
|    n_updates        | 163801   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 432      |
|    mean_reward      | 4        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 670000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0826   |
|    n_updates        | 166249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 422      |
|    ep_rew_mean      | 14       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1527     |
|    fps              | 429      |
|    time_elapsed     | 1561     |
|    total_timesteps  | 670073   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0486   |
|    n_updates        | 166268   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 324      |
|    mean_reward      | 8        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 680000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.243    |
|    n_updates        | 168749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 430      |
|    ep_rew_mean      | 15       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1550     |
|    fps              | 429      |
|    time_elapsed     | 1585     |
|    total_timesteps  | 680255   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 2.55     |
|    n_updates        | 168813   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 470      |
|    mean_reward      | 8.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 690000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.427    |
|    n_updates        | 171249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 426      |
|    ep_rew_mean      | 14.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1573     |
|    fps              | 428      |
|    time_elapsed     | 1609     |
|    total_timesteps  | 690115   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.029    |
|    n_updates        | 171278   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 432      |
|    mean_reward      | 9.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 700000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0491   |
|    n_updates        | 173749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 438      |
|    ep_rew_mean      | 15.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1595     |
|    fps              | 428      |
|    time_elapsed     | 1634     |
|    total_timesteps  | 700333   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.194    |
|    n_updates        | 173833   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 520      |
|    mean_reward      | 13       |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 710000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.031    |
|    n_updates        | 176249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 433      |
|    ep_rew_mean      | 15.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1619     |
|    fps              | 428      |
|    time_elapsed     | 1659     |
|    total_timesteps  | 710339   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.232    |
|    n_updates        | 176334   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 439      |
|    mean_reward      | 9.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 720000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 2.51     |
|    n_updates        | 178749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 438      |
|    ep_rew_mean      | 15.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1642     |
|    fps              | 427      |
|    time_elapsed     | 1682     |
|    total_timesteps  | 720109   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.173    |
|    n_updates        | 178777   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 409      |
|    mean_reward      | 7.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 730000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0766   |
|    n_updates        | 181249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 434      |
|    ep_rew_mean      | 15.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1665     |
|    fps              | 427      |
|    time_elapsed     | 1707     |
|    total_timesteps  | 730083   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0165   |
|    n_updates        | 181270   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 330      |
|    mean_reward      | 6.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 740000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.387    |
|    n_updates        | 183749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 425      |
|    ep_rew_mean      | 14.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1689     |
|    fps              | 427      |
|    time_elapsed     | 1730     |
|    total_timesteps  | 740057   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0842   |
|    n_updates        | 183764   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 446      |
|    mean_reward      | 7.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 750000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0429   |
|    n_updates        | 186249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 425      |
|    ep_rew_mean      | 14.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1713     |
|    fps              | 427      |
|    time_elapsed     | 1755     |
|    total_timesteps  | 750281   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.061    |
|    n_updates        | 186320   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 417      |
|    mean_reward      | 7.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 760000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0354   |
|    n_updates        | 188749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 423      |
|    ep_rew_mean      | 14.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1737     |
|    fps              | 427      |
|    time_elapsed     | 1779     |
|    total_timesteps  | 760301   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.462    |
|    n_updates        | 188825   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 475      |
|    mean_reward      | 9.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 770000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.264    |
|    n_updates        | 191249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 414      |
|    ep_rew_mean      | 14.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1761     |
|    fps              | 426      |
|    time_elapsed     | 1803     |
|    total_timesteps  | 770011   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.338    |
|    n_updates        | 191252   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 456      |
|    mean_reward      | 8.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 780000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.104    |
|    n_updates        | 193749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 425      |
|    ep_rew_mean      | 15.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1785     |
|    fps              | 426      |
|    time_elapsed     | 1828     |
|    total_timesteps  | 780235   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0583   |
|    n_updates        | 193808   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 421      |
|    mean_reward      | 4.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 790000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0863   |
|    n_updates        | 196249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 422      |
|    ep_rew_mean      | 15.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1808     |
|    fps              | 426      |
|    time_elapsed     | 1852     |
|    total_timesteps  | 790137   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0975   |
|    n_updates        | 196284   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 436      |
|    mean_reward      | 12.8     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 800000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0785   |
|    n_updates        | 198749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 420      |
|    ep_rew_mean      | 15.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1832     |
|    fps              | 426      |
|    time_elapsed     | 1877     |
|    total_timesteps  | 800323   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0843   |
|    n_updates        | 198830   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 454      |
|    mean_reward      | 8.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 810000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0627   |
|    n_updates        | 201249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 434      |
|    ep_rew_mean      | 15.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1855     |
|    fps              | 426      |
|    time_elapsed     | 1903     |
|    total_timesteps  | 811035   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.152    |
|    n_updates        | 201508   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 327      |
|    mean_reward      | 5.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 820000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.111    |
|    n_updates        | 203749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 431      |
|    ep_rew_mean      | 14.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1878     |
|    fps              | 426      |
|    time_elapsed     | 1924     |
|    total_timesteps  | 820309   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0699   |
|    n_updates        | 203827   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 452      |
|    mean_reward      | 9.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 830000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0671   |
|    n_updates        | 206249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 440      |
|    ep_rew_mean      | 15.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1900     |
|    fps              | 425      |
|    time_elapsed     | 1949     |
|    total_timesteps  | 830457   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0886   |
|    n_updates        | 206364   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 326      |
|    mean_reward      | 7.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 840000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.399    |
|    n_updates        | 208749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 446      |
|    ep_rew_mean      | 15.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1921     |
|    fps              | 425      |
|    time_elapsed     | 1972     |
|    total_timesteps  | 840163   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.152    |
|    n_updates        | 208790   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 529      |
|    mean_reward      | 11.6     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 850000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0834   |
|    n_updates        | 211249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 446      |
|    ep_rew_mean      | 15.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1944     |
|    fps              | 425      |
|    time_elapsed     | 1997     |
|    total_timesteps  | 850246   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.226    |
|    n_updates        | 211311   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 458      |
|    mean_reward      | 13.4     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 860000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.188    |
|    n_updates        | 213749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 445      |
|    ep_rew_mean      | 16       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1966     |
|    fps              | 425      |
|    time_elapsed     | 2021     |
|    total_timesteps  | 860056   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.589    |
|    n_updates        | 213763   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 321      |
|    mean_reward      | 7        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 870000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0155   |
|    n_updates        | 216249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 446      |
|    ep_rew_mean      | 17.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1990     |
|    fps              | 425      |
|    time_elapsed     | 2044     |
|    total_timesteps  | 870040   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0753   |
|    n_updates        | 216259   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 443      |
|    mean_reward      | 6.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 880000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0313   |
|    n_updates        | 218749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 429      |
|    ep_rew_mean      | 15.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2015     |
|    fps              | 425      |
|    time_elapsed     | 2069     |
|    total_timesteps  | 880270   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0387   |
|    n_updates        | 218817   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 460      |
|    mean_reward      | 6.4      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 890000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0691   |
|    n_updates        | 221249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 425      |
|    ep_rew_mean      | 15.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2038     |
|    fps              | 425      |
|    time_elapsed     | 2094     |
|    total_timesteps  | 890305   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.137    |
|    n_updates        | 221326   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 455      |
|    mean_reward      | 6.4      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 900000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.67     |
|    n_updates        | 223749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 430      |
|    ep_rew_mean      | 15.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2061     |
|    fps              | 424      |
|    time_elapsed     | 2118     |
|    total_timesteps  | 900346   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.467    |
|    n_updates        | 223836   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 454      |
|    mean_reward      | 8.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 910000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.052    |
|    n_updates        | 226249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 427      |
|    ep_rew_mean      | 15.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2084     |
|    fps              | 424      |
|    time_elapsed     | 2142     |
|    total_timesteps  | 910156   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0426   |
|    n_updates        | 226288   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 328      |
|    mean_reward      | 8        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 920000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.324    |
|    n_updates        | 228749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 428      |
|    ep_rew_mean      | 15.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2108     |
|    fps              | 424      |
|    time_elapsed     | 2166     |
|    total_timesteps  | 920366   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.237    |
|    n_updates        | 228841   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 457      |
|    mean_reward      | 11.8     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 930000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0492   |
|    n_updates        | 231249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 430      |
|    ep_rew_mean      | 15.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2131     |
|    fps              | 424      |
|    time_elapsed     | 2190     |
|    total_timesteps  | 930092   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 3.12     |
|    n_updates        | 231272   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 440      |
|    mean_reward      | 6        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 940000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0343   |
|    n_updates        | 233749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 427      |
|    ep_rew_mean      | 14.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2155     |
|    fps              | 424      |
|    time_elapsed     | 2214     |
|    total_timesteps  | 940066   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.144    |
|    n_updates        | 233766   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 452      |
|    mean_reward      | 8.4      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 950000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0478   |
|    n_updates        | 236249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 424      |
|    ep_rew_mean      | 15.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2179     |
|    fps              | 424      |
|    time_elapsed     | 2239     |
|    total_timesteps  | 950642   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0332   |
|    n_updates        | 236410   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 376      |
|    mean_reward      | 6.4      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 960000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0336   |
|    n_updates        | 238749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 436      |
|    ep_rew_mean      | 16.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2199     |
|    fps              | 424      |
|    time_elapsed     | 2262     |
|    total_timesteps  | 960134   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.1      |
|    n_updates        | 238783   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 484      |
|    mean_reward      | 9.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 970000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.436    |
|    n_updates        | 241249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 444      |
|    ep_rew_mean      | 17       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2221     |
|    fps              | 424      |
|    time_elapsed     | 2286     |
|    total_timesteps  | 970058   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0774   |
|    n_updates        | 241264   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 402      |
|    mean_reward      | 7.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 980000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.264    |
|    n_updates        | 243749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 450      |
|    ep_rew_mean      | 17.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2243     |
|    fps              | 424      |
|    time_elapsed     | 2310     |
|    total_timesteps  | 980005   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0596   |
|    n_updates        | 243751   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 412      |
|    mean_reward      | 5.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 990000   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.756    |
|    n_updates        | 246249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 453      |
|    ep_rew_mean      | 17.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2266     |
|    fps              | 424      |
|    time_elapsed     | 2335     |
|    total_timesteps  | 990103   |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0733   |
|    n_updates        | 246275   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 410      |
|    mean_reward      | 8.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1000000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.624    |
|    n_updates        | 248749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 439      |
|    ep_rew_mean      | 16.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2291     |
|    fps              | 423      |
|    time_elapsed     | 2359     |
|    total_timesteps  | 1000055  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0532   |
|    n_updates        | 248763   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 319      |
|    mean_reward      | 6.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1010000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.315    |
|    n_updates        | 251249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 424      |
|    ep_rew_mean      | 15.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2316     |
|    fps              | 423      |
|    time_elapsed     | 2383     |
|    total_timesteps  | 1010415  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0617   |
|    n_updates        | 251353   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 327      |
|    mean_reward      | 6.4      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1020000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0327   |
|    n_updates        | 253749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 414      |
|    ep_rew_mean      | 14.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2340     |
|    fps              | 423      |
|    time_elapsed     | 2405     |
|    total_timesteps  | 1020025  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.00897  |
|    n_updates        | 253756   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 423      |
|    mean_reward      | 9        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1030000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.188    |
|    n_updates        | 256249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 413      |
|    ep_rew_mean      | 14.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2363     |
|    fps              | 423      |
|    time_elapsed     | 2430     |
|    total_timesteps  | 1030189  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0819   |
|    n_updates        | 256297   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 380      |
|    mean_reward      | 7.4      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1040000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.198    |
|    n_updates        | 258749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 421      |
|    ep_rew_mean      | 15       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2386     |
|    fps              | 423      |
|    time_elapsed     | 2453     |
|    total_timesteps  | 1040004  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0837   |
|    n_updates        | 258750   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 413      |
|    mean_reward      | 8.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1050000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.104    |
|    n_updates        | 261249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 421      |
|    ep_rew_mean      | 14.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2411     |
|    fps              | 423      |
|    time_elapsed     | 2478     |
|    total_timesteps  | 1050302  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0583   |
|    n_updates        | 261325   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 470      |
|    mean_reward      | 11.6     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1060000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.769    |
|    n_updates        | 263749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 432      |
|    ep_rew_mean      | 15.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2433     |
|    fps              | 423      |
|    time_elapsed     | 2502     |
|    total_timesteps  | 1060158  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.729    |
|    n_updates        | 263789   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 456      |
|    mean_reward      | 15.2     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1070000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0659   |
|    n_updates        | 266249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 419      |
|    ep_rew_mean      | 14.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2458     |
|    fps              | 423      |
|    time_elapsed     | 2526     |
|    total_timesteps  | 1070080  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0662   |
|    n_updates        | 266269   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 455      |
|    mean_reward      | 7.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1080000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.166    |
|    n_updates        | 268749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 421      |
|    ep_rew_mean      | 14.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2481     |
|    fps              | 423      |
|    time_elapsed     | 2551     |
|    total_timesteps  | 1080260  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.217    |
|    n_updates        | 268814   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 432      |
|    mean_reward      | 6.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1090000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.895    |
|    n_updates        | 271249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 422      |
|    ep_rew_mean      | 14.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2505     |
|    fps              | 423      |
|    time_elapsed     | 2575     |
|    total_timesteps  | 1090331  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.143    |
|    n_updates        | 271332   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 446      |
|    mean_reward      | 9.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1100000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 3.81     |
|    n_updates        | 273749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 422      |
|    ep_rew_mean      | 14.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2528     |
|    fps              | 423      |
|    time_elapsed     | 2600     |
|    total_timesteps  | 1100393  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.047    |
|    n_updates        | 273848   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 457      |
|    mean_reward      | 8.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1110000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.205    |
|    n_updates        | 276249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 432      |
|    ep_rew_mean      | 15       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2550     |
|    fps              | 423      |
|    time_elapsed     | 2623     |
|    total_timesteps  | 1110023  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.334    |
|    n_updates        | 276255   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 414      |
|    mean_reward      | 4.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1120000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.73     |
|    n_updates        | 278749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 431      |
|    ep_rew_mean      | 14.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2574     |
|    fps              | 423      |
|    time_elapsed     | 2648     |
|    total_timesteps  | 1120441  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.165    |
|    n_updates        | 278860   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 485      |
|    mean_reward      | 10.6     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1130000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0415   |
|    n_updates        | 281249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 433      |
|    ep_rew_mean      | 14.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2597     |
|    fps              | 422      |
|    time_elapsed     | 2672     |
|    total_timesteps  | 1130141  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.254    |
|    n_updates        | 281285   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 414      |
|    mean_reward      | 5.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1140000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.781    |
|    n_updates        | 283749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 432      |
|    ep_rew_mean      | 14.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2620     |
|    fps              | 422      |
|    time_elapsed     | 2696     |
|    total_timesteps  | 1140035  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0456   |
|    n_updates        | 283758   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 408      |
|    mean_reward      | 4.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1150000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0805   |
|    n_updates        | 286249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 429      |
|    ep_rew_mean      | 15       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2643     |
|    fps              | 422      |
|    time_elapsed     | 2720     |
|    total_timesteps  | 1150049  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 3.83     |
|    n_updates        | 286262   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 431      |
|    mean_reward      | 5.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1160000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.222    |
|    n_updates        | 288749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 429      |
|    ep_rew_mean      | 15.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2667     |
|    fps              | 422      |
|    time_elapsed     | 2744     |
|    total_timesteps  | 1160287  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.182    |
|    n_updates        | 288821   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 482      |
|    mean_reward      | 13       |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1170000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 3.94     |
|    n_updates        | 291249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 428      |
|    ep_rew_mean      | 15.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2691     |
|    fps              | 422      |
|    time_elapsed     | 2769     |
|    total_timesteps  | 1170355  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.866    |
|    n_updates        | 291338   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 337      |
|    mean_reward      | 6.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1180000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0578   |
|    n_updates        | 293749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 424      |
|    ep_rew_mean      | 15.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2715     |
|    fps              | 422      |
|    time_elapsed     | 2792     |
|    total_timesteps  | 1180219  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.2      |
|    n_updates        | 293804   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 433      |
|    mean_reward      | 7.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1190000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.313    |
|    n_updates        | 296249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 430      |
|    ep_rew_mean      | 15.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2737     |
|    fps              | 422      |
|    time_elapsed     | 2816     |
|    total_timesteps  | 1190221  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.398    |
|    n_updates        | 296305   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 488      |
|    mean_reward      | 13.8     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1200000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0569   |
|    n_updates        | 298749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 426      |
|    ep_rew_mean      | 15.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2761     |
|    fps              | 422      |
|    time_elapsed     | 2841     |
|    total_timesteps  | 1200149  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.218    |
|    n_updates        | 298787   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 417      |
|    mean_reward      | 5.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1210000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0918   |
|    n_updates        | 301249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 423      |
|    ep_rew_mean      | 15.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2785     |
|    fps              | 422      |
|    time_elapsed     | 2865     |
|    total_timesteps  | 1210135  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0961   |
|    n_updates        | 301283   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 313      |
|    mean_reward      | 5.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1220000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0388   |
|    n_updates        | 303749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 432      |
|    ep_rew_mean      | 16.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2807     |
|    fps              | 422      |
|    time_elapsed     | 2888     |
|    total_timesteps  | 1220151  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.865    |
|    n_updates        | 303787   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 357      |
|    mean_reward      | 9        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1230000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.545    |
|    n_updates        | 306249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 423      |
|    ep_rew_mean      | 15.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2831     |
|    fps              | 422      |
|    time_elapsed     | 2912     |
|    total_timesteps  | 1230171  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.634    |
|    n_updates        | 306292   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 432      |
|    mean_reward      | 5.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1240000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0972   |
|    n_updates        | 308749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 426      |
|    ep_rew_mean      | 15.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2855     |
|    fps              | 422      |
|    time_elapsed     | 2936     |
|    total_timesteps  | 1240091  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.067    |
|    n_updates        | 308772   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 494      |
|    mean_reward      | 8        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1250000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.103    |
|    n_updates        | 311249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 438      |
|    ep_rew_mean      | 15.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2876     |
|    fps              | 422      |
|    time_elapsed     | 2961     |
|    total_timesteps  | 1250153  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0675   |
|    n_updates        | 311288   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 320      |
|    mean_reward      | 7.4      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1260000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.902    |
|    n_updates        | 313749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 433      |
|    ep_rew_mean      | 14.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2900     |
|    fps              | 422      |
|    time_elapsed     | 2985     |
|    total_timesteps  | 1260347  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0691   |
|    n_updates        | 313836   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 490      |
|    mean_reward      | 9.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1270000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0764   |
|    n_updates        | 316249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 447      |
|    ep_rew_mean      | 16.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2921     |
|    fps              | 422      |
|    time_elapsed     | 3009     |
|    total_timesteps  | 1270402  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.267    |
|    n_updates        | 316350   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 417      |
|    mean_reward      | 6.4      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1280000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.512    |
|    n_updates        | 318749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 442      |
|    ep_rew_mean      | 15.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2945     |
|    fps              | 422      |
|    time_elapsed     | 3033     |
|    total_timesteps  | 1280308  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.317    |
|    n_updates        | 318826   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 418      |
|    mean_reward      | 6.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1290000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 3.92     |
|    n_updates        | 321249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 428      |
|    ep_rew_mean      | 15.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2969     |
|    fps              | 422      |
|    time_elapsed     | 3057     |
|    total_timesteps  | 1290090  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 3.87     |
|    n_updates        | 321272   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 336      |
|    mean_reward      | 6.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1300000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.129    |
|    n_updates        | 323749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 434      |
|    ep_rew_mean      | 15.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2992     |
|    fps              | 422      |
|    time_elapsed     | 3080     |
|    total_timesteps  | 1300003  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 3.89     |
|    n_updates        | 323750   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 492      |
|    mean_reward      | 10.4     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1310000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0466   |
|    n_updates        | 326249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 426      |
|    ep_rew_mean      | 14.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3016     |
|    fps              | 421      |
|    time_elapsed     | 3105     |
|    total_timesteps  | 1310215  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.155    |
|    n_updates        | 326303   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 470      |
|    mean_reward      | 10       |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1320000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.349    |
|    n_updates        | 328749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 427      |
|    ep_rew_mean      | 15.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3038     |
|    fps              | 421      |
|    time_elapsed     | 3129     |
|    total_timesteps  | 1320151  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.138    |
|    n_updates        | 328787   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 484      |
|    mean_reward      | 7.4      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1330000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 1.13     |
|    n_updates        | 331249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 429      |
|    ep_rew_mean      | 15.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3062     |
|    fps              | 421      |
|    time_elapsed     | 3153     |
|    total_timesteps  | 1330011  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0394   |
|    n_updates        | 331252   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 451      |
|    mean_reward      | 10.6     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1340000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0439   |
|    n_updates        | 333749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 430      |
|    ep_rew_mean      | 15.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3085     |
|    fps              | 421      |
|    time_elapsed     | 3178     |
|    total_timesteps  | 1340125  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.151    |
|    n_updates        | 333781   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 432      |
|    mean_reward      | 9.6      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1350000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.15     |
|    n_updates        | 336249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 430      |
|    ep_rew_mean      | 15.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3109     |
|    fps              | 421      |
|    time_elapsed     | 3202     |
|    total_timesteps  | 1350191  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 4.05     |
|    n_updates        | 336297   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 448      |
|    mean_reward      | 12.6     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1360000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0679   |
|    n_updates        | 338749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 426      |
|    ep_rew_mean      | 14.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3132     |
|    fps              | 421      |
|    time_elapsed     | 3226     |
|    total_timesteps  | 1360241  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.112    |
|    n_updates        | 338810   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 468      |
|    mean_reward      | 12.8     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1370000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.144    |
|    n_updates        | 341249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 430      |
|    ep_rew_mean      | 15.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3155     |
|    fps              | 421      |
|    time_elapsed     | 3251     |
|    total_timesteps  | 1370359  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.189    |
|    n_updates        | 341339   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 316      |
|    mean_reward      | 6.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1380000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.404    |
|    n_updates        | 343749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 437      |
|    ep_rew_mean      | 15.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3177     |
|    fps              | 421      |
|    time_elapsed     | 3274     |
|    total_timesteps  | 1380231  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.025    |
|    n_updates        | 343807   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 431      |
|    mean_reward      | 8.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1390000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.209    |
|    n_updates        | 346249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 433      |
|    ep_rew_mean      | 15.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3201     |
|    fps              | 421      |
|    time_elapsed     | 3298     |
|    total_timesteps  | 1390301  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0421   |
|    n_updates        | 346325   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 480      |
|    mean_reward      | 9        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1400000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.067    |
|    n_updates        | 348749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 430      |
|    ep_rew_mean      | 15.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3224     |
|    fps              | 421      |
|    time_elapsed     | 3321     |
|    total_timesteps  | 1400073  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.091    |
|    n_updates        | 348768   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 434      |
|    mean_reward      | 10.2     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1410000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0791   |
|    n_updates        | 351249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 435      |
|    ep_rew_mean      | 16.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3247     |
|    fps              | 421      |
|    time_elapsed     | 3345     |
|    total_timesteps  | 1410178  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0477   |
|    n_updates        | 351294   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 401      |
|    mean_reward      | 12.6     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1420000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.225    |
|    n_updates        | 353749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 421      |
|    ep_rew_mean      | 14.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3272     |
|    fps              | 421      |
|    time_elapsed     | 3369     |
|    total_timesteps  | 1420348  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0312   |
|    n_updates        | 353836   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 498      |
|    mean_reward      | 7.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1430000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.313    |
|    n_updates        | 356249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 423      |
|    ep_rew_mean      | 14.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3296     |
|    fps              | 421      |
|    time_elapsed     | 3394     |
|    total_timesteps  | 1430352  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0863   |
|    n_updates        | 356337   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 318      |
|    mean_reward      | 7        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1440000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0869   |
|    n_updates        | 358749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 418      |
|    ep_rew_mean      | 13.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3321     |
|    fps              | 421      |
|    time_elapsed     | 3417     |
|    total_timesteps  | 1440436  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.186    |
|    n_updates        | 358858   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 436      |
|    mean_reward      | 8.2      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1450000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.291    |
|    n_updates        | 361249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 421      |
|    ep_rew_mean      | 13.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3343     |
|    fps              | 421      |
|    time_elapsed     | 3441     |
|    total_timesteps  | 1450362  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.082    |
|    n_updates        | 361340   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 474      |
|    mean_reward      | 12.6     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1460000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.163    |
|    n_updates        | 363749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 428      |
|    ep_rew_mean      | 14.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3365     |
|    fps              | 421      |
|    time_elapsed     | 3465     |
|    total_timesteps  | 1460350  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.116    |
|    n_updates        | 363837   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 563      |
|    mean_reward      | 15.2     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1470000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 1.03     |
|    n_updates        | 366249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 426      |
|    ep_rew_mean      | 14.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3389     |
|    fps              | 421      |
|    time_elapsed     | 3489     |
|    total_timesteps  | 1470080  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0414   |
|    n_updates        | 366269   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 325      |
|    mean_reward      | 7        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1480000  |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 437      |
|    ep_rew_mean      | 15.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3412     |
|    fps              | 421      |
|    time_elapsed     | 3513     |
|    total_timesteps  | 1480432  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.102    |
|    n_updates        | 368857   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 439      |
|    ep_rew_mean      | 15.5     |
|    exploration_rat

----------------------------------
| eval/               |          |
|    mean_ep_length   | 423      |
|    mean_reward      | 6.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1490000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.093    |
|    n_updates        | 371249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 438      |
|    ep_rew_mean      | 15       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3435     |
|    fps              | 421      |
|    time_elapsed     | 3536     |
|    total_timesteps  | 1490376  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 0.0463   |
|    n_updates        | 371343   |
----------------------------------
--------------------

----------------------------------
| eval/               |          |
|    mean_ep_length   | 462      |
|    mean_reward      | 7        |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 1500000  |
| train/              |          |
|    learning_rate    | 0.0002   |
|    loss             | 1.11     |
|    n_updates        | 373749   |
----------------------------------



Evaluating final policy...
 TRAINING COMPLETE
 Config ID : 1
 Mean Reward : 9.0 ± 4.5
 Peak VRAM : 0.03 GB
 Model Saved : dqn_pacman_p100_cfg1.zip
 TensorBoard : tensorboard --logdir ./logs/tensorboard/

Starting LIVE RENDER – Close window to stop...


RuntimeError: Failed to initialize SDL