In [1]:
from rl_env_sarl import SARLDraftEnv
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.env_checker import check_env



In [2]:
dqn_params = {
    'policy': 'MlpPolicy',
    'policy_kwargs': dict(net_arch=[128, 128]),
    'learning_rate': 3e-4,
    'buffer_size': 10000,
    'learning_starts': 1000,
    'batch_size': 128,
    'tau': 5e-3,
    'gamma': 0.99,
    'train_freq': 4,
    'gradient_steps': 1,
    'target_update_interval': 1, # the default is much higher but i think since tau is so low it's okay
    'exploration_fraction': 0.1,
    'exploration_initial_eps': 1.0,
    # 'exploration_initial_eps': 0.05,
    'exploration_final_eps': 0.05,
    'max_grad_norm': 10,  # much more conservative than what i did before
    'verbose': 1,
    'stats_window_size': 100
}

In [3]:
check_env(SARLDraftEnv())

In [4]:
# env = SARLDraftEnv()
# model = DQN(env=env, **dqn_params)
# model.learn(total_timesteps=100000)

In [5]:
import os
import json
import numpy as np
from datetime import datetime
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback, EvalCallback
from stable_baselines3.common.logger import configure

# Custom callback for monitoring and saving during training
class TrainingMonitorCallback(BaseCallback):
    def __init__(self, log_interval: int, save_interval: int, draft_save_interval: int, log_dir: str, run_id: str, verbose=1):
        super(TrainingMonitorCallback, self).__init__(verbose)
        self.log_interval = log_interval
        self.save_interval = save_interval
        self.draft_save_interval = draft_save_interval
        self.log_dir = log_dir
        self.run_id = run_id
        self.best_mean_reward = -np.inf
        self.rewards_history = []
        self.losses_history = []
        self.draft_history = []
        self.episode_counter = 0

        self.model_save_path = os.path.join(log_dir, f'best_model_{run_id}')
        self.rewards_losses_path = os.path.join(log_dir, f'rewards_losses_{run_id}.json')
        self.drafts_save_path = os.path.join(log_dir, f'draft_history_{run_id}.json')

    def _init_callback(self) -> None:
        os.makedirs(self.log_dir, exist_ok=True)

    def _on_step(self) -> bool:
        # Log rewards
        current_reward = self.locals['rewards']
        self.rewards_history.append(current_reward)

        # Log losses if TD errors are available
        if 'td_errors' in self.locals:
            current_loss = np.mean(np.abs(self.locals['td_errors']))
            self.losses_history.append(current_loss)

        # Periodic logging and saving
        if self.n_calls % self.log_interval == 0:
            mean_reward = np.mean(self.rewards_history[-self.log_interval:])
            self.logger.record('mean_reward', mean_reward)

            if mean_reward > self.best_mean_reward:
                self.best_mean_reward = mean_reward
                self.model.save(self.model_save_path)
                if self.verbose > 0:
                    print(f"New best mean reward: {self.best_mean_reward}. Model saved to {self.model_save_path}")

            # Convert rewards and losses to lists before saving
            rewards_list = [float(reward) for reward in self.rewards_history]
            losses_list = [float(loss) for loss in self.losses_history]

            # Save rewards and losses to a file
            with open(self.rewards_losses_path, 'w') as f:
                json.dump({'rewards': rewards_list, 'losses': losses_list}, f)

        # Check if the episode has terminated
        if self.locals['dones'][0]:
            
            if self.episode_counter % self.draft_save_interval == 0:
                draft = self.locals['infos'][0]['draft']
                self.draft_history.append(draft)
                with open(self.drafts_save_path, 'w') as f:
                    json.dump(self.draft_history, f, indent=4)
                
            self.episode_counter += 1

        return True

# Define your environment
env = SARLDraftEnv()

# Generate a unique run ID
run_id = datetime.now().strftime("%Y%m%d-%H%M%S")

# Choose significant parameters to include in the log directory name
param_str = f"DQN_lr_{dqn_params['learning_rate']}_gamma_{dqn_params['gamma']}"
log_dir = f"./logs/{param_str}_{run_id}/"
os.makedirs(log_dir, exist_ok=True)

# Save dqn_params to a JSON file
params_save_path = os.path.join(log_dir, f'dqn_params_{run_id}.json')
with open(params_save_path, 'w') as f:
    json.dump(dqn_params, f, indent=4)

# Configure the logger
new_logger = configure(log_dir, ["stdout", "csv", "tensorboard"])

# # Reload the model from a saved checkpoint
# model_path = 'logs/DQN_lr_0.001_gamma_0.99_20240826-133826/best_model.zip'
# model = DQN.load(model_path, env=env, tensorboard_log=log_dir)
# model.set_logger(new_logger)

# Define the model and attach the logger
model = DQN(env=env, **dqn_params, tensorboard_log=log_dir)
model.set_logger(new_logger)


# Log dqn_params to TensorBoard/CSV
for key, value in dqn_params.items():
    model.logger.record(f'params/{key}', value)

# Initialize and use the custom callback
monitor_callback = TrainingMonitorCallback(log_interval=1000, save_interval=50, draft_save_interval=1000, log_dir=log_dir, run_id=run_id)

# Create a checkpoint callback to save the model periodically
checkpoint_callback = CheckpointCallback(save_freq=1000, save_path=log_dir, name_prefix='rl_model')

# Create an evaluation callback to log evaluations during training
eval_callback = EvalCallback(env, best_model_save_path=log_dir, log_path=log_dir, eval_freq=5000, deterministic=True, render=False)

# Combine all callbacks
callback = [checkpoint_callback, eval_callback, monitor_callback]

# Train the model
model.learn(total_timesteps=5e5, callback=callback)

Logging to ./logs/DQN_lr_0.0003_gamma_0.99_20240826-200734/
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------------------------------
| params/                    |                          |
|    batch_size              | 128                      |
|    buffer_size             | 10000                    |
|    exploration_final_eps   | 0.05                     |
|    exploration_fraction    | 0.1                      |
|    exploration_initial_eps | 1                        |
|    gamma                   | 0.99                     |
|    gradient_steps          | 1                        |
|    learning_rate           | 0.0003                   |
|    learning_starts         | 1000                     |
|    max_grad_norm           | 10                       |
|    policy                  | MlpPolicy                |
|    policy_kwargs           | {'net_arch': [128, 128]} |
|    stats_window_size       | 100

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | -0.03517894 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | -0.543      |
|    exploration_rate | 0.981       |
| time/               |             |
|    episodes         | 68          |
|    fps              | 14          |
|    time_elapsed     | 70          |
|    total_timesteps  | 1020        |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.0321      |
|    n_updates        | 4           |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | -0.554   |
|    exploration_rate | 0.979    |
| time/               |          |
|    episodes         | 72       |
|    fps              | 14       |
|    time_elapsed     | 74       |
|    total_timesteps  | 1080     |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.036475558 |
| rollout/            |              |
|    ep_len_mean      | 15           |
|    ep_rew_mean      | -0.546       |
|    exploration_rate | 0.961        |
| time/               |              |
|    episodes         | 136          |
|    fps              | 14           |
|    time_elapsed     | 141          |
|    total_timesteps  | 2040         |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.0051       |
|    n_updates        | 259          |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | -0.519   |
|    exploration_rate | 0.96     |
| time/               |          |
|    episodes         | 140      |
|    fps              | 14       |
|    time_elapsed     | 145      |
|    total_timesteps  | 2100     |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | -0.495   |
|    exploration_rate | 0.942    |
| time/               |          |
|    episodes         | 204      |
|    fps              | 14       |
|    time_elapsed     | 212      |
|    total_timesteps  | 3060     |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00684  |
|    n_updates        | 514      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | -0.468   |
|    exploration_rate | 0.941    |
| time/               |          |
|    episodes         | 208      |
|    fps              | 14       |
|    time_elapsed     | 217      |
|    total_timesteps  | 3120     |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00271  |
|    n_updates      

  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.027045583 |
| rollout/            |              |
|    ep_len_mean      | 15           |
|    ep_rew_mean      | -0.412       |
|    exploration_rate | 0.924        |
| time/               |              |
|    episodes         | 268          |
|    fps              | 14           |
|    time_elapsed     | 279          |
|    total_timesteps  | 4020         |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.00465      |
|    n_updates        | 754          |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | -0.397   |
|    exploration_rate | 0.922    |
| time/               |          |
|    episodes         | 272      |
|    fps              | 14       |
|    time_elapsed     | 284      |
|    total_timesteps  | 4080     |
| train/              |   



Eval num_timesteps=5000, episode_reward=-0.81 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | -0.806   |
| rollout/            |          |
|    exploration_rate | 0.905    |
| time/               |          |
|    total_timesteps  | 5000     |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.013    |
|    n_updates        | 999      |
----------------------------------
New best mean reward!
New best mean reward: -0.020423036068677902. Model saved to ./logs/DQN_lr_0.0003_gamma_0.99_20240826-200734/best_model_20240826-200734


  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.020423036 |
| rollout/            |              |
|    ep_len_mean      | 15.1         |
|    ep_rew_mean      | -0.359       |
|    exploration_rate | 0.904        |
| time/               |              |
|    episodes         | 336          |
|    fps              | 14           |
|    time_elapsed     | 356          |
|    total_timesteps  | 5045         |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.0177       |
|    n_updates        | 1011         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | -0.344   |
|    exploration_rate | 0.903    |
| time/               |          |
|    episodes         | 340      |
|    fps              | 14       |
|    time_elapsed     | 360      |
|    total_timesteps  | 5105     |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.019330177 |
| rollout/            |              |
|    ep_len_mean      | 15.1         |
|    ep_rew_mean      | -0.314       |
|    exploration_rate | 0.886        |
| time/               |              |
|    episodes         | 400          |
|    fps              | 14           |
|    time_elapsed     | 421          |
|    total_timesteps  | 6005         |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.0146       |
|    n_updates        | 1251         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | -0.316   |
|    exploration_rate | 0.885    |
| time/               |          |
|    episodes         | 404      |
|    fps              | 14       |
|    time_elapsed     | 425      |
|    total_timesteps  | 6065     |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.020615436 |
| rollout/            |              |
|    ep_len_mean      | 15           |
|    ep_rew_mean      | -0.292       |
|    exploration_rate | 0.867        |
| time/               |              |
|    episodes         | 468          |
|    fps              | 14           |
|    time_elapsed     | 493          |
|    total_timesteps  | 7025         |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.00706      |
|    n_updates        | 1506         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | -0.263   |
|    exploration_rate | 0.865    |
| time/               |          |
|    episodes         | 472      |
|    fps              | 14       |
|    time_elapsed     | 498      |
|    total_timesteps  | 7085     |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.010852284 |
| rollout/            |              |
|    ep_len_mean      | 15           |
|    ep_rew_mean      | -0.206       |
|    exploration_rate | 0.847        |
| time/               |              |
|    episodes         | 536          |
|    fps              | 14           |
|    time_elapsed     | 565          |
|    total_timesteps  | 8045         |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.00992      |
|    n_updates        | 1761         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | -0.163   |
|    exploration_rate | 0.846    |
| time/               |          |
|    episodes         | 540      |
|    fps              | 14       |
|    time_elapsed     | 569      |
|    total_timesteps  | 8105     |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


---------------------------------------
| mean_reward         | -0.0004361372 |
| rollout/            |               |
|    ep_len_mean      | 15            |
|    ep_rew_mean      | -0.0544       |
|    exploration_rate | 0.829         |
| time/               |               |
|    episodes         | 600           |
|    fps              | 14            |
|    time_elapsed     | 631           |
|    total_timesteps  | 9005          |
| train/              |               |
|    learning_rate    | 0.0003        |
|    loss             | 0.0125        |
|    n_updates        | 2001          |
---------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | -0.0543  |
|    exploration_rate | 0.828    |
| time/               |          |
|    episodes         | 604      |
|    fps              | 14       |
|    time_elapsed     | 635      |
|    total_timesteps  | 9065     |
| train/  



Eval num_timesteps=10000, episode_reward=-0.39 +/- 0.58
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | -0.386   |
| rollout/            |          |
|    exploration_rate | 0.81     |
| time/               |          |
|    total_timesteps  | 10000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00757  |
|    n_updates        | 2249     |
----------------------------------
New best mean reward!


  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.017432787 |
| rollout/            |              |
|    ep_len_mean      | 15.1         |
|    ep_rew_mean      | -0.194       |
|    exploration_rate | 0.809        |
| time/               |              |
|    episodes         | 668          |
|    fps              | 14           |
|    time_elapsed     | 709          |
|    total_timesteps  | 10030        |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.00541      |
|    n_updates        | 2257         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | -0.21    |
|    exploration_rate | 0.808    |
| time/               |          |
|    episodes         | 672      |
|    fps              | 14       |
|    time_elapsed     | 714      |
|    total_timesteps  | 10090    |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | -0.01792111 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | -0.314      |
|    exploration_rate | 0.79        |
| time/               |             |
|    episodes         | 736         |
|    fps              | 14          |
|    time_elapsed     | 783         |
|    total_timesteps  | 11050       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.0128      |
|    n_updates        | 2512        |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | -0.286   |
|    exploration_rate | 0.789    |
| time/               |          |
|    episodes         | 740      |
|    fps              | 14       |
|    time_elapsed     | 787      |
|    total_timesteps  | 11110    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.007938329 |
| rollout/            |              |
|    ep_len_mean      | 15           |
|    ep_rew_mean      | -0.232       |
|    exploration_rate | 0.772        |
| time/               |              |
|    episodes         | 800          |
|    fps              | 14           |
|    time_elapsed     | 850          |
|    total_timesteps  | 12010        |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.0119       |
|    n_updates        | 2752         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | -0.217   |
|    exploration_rate | 0.771    |
| time/               |          |
|    episodes         | 804      |
|    fps              | 14       |
|    time_elapsed     | 854      |
|    total_timesteps  | 12070    |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.007196204 |
| rollout/            |              |
|    ep_len_mean      | 15           |
|    ep_rew_mean      | -0.134       |
|    exploration_rate | 0.752        |
| time/               |              |
|    episodes         | 868          |
|    fps              | 14           |
|    time_elapsed     | 920          |
|    total_timesteps  | 13030        |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.00899      |
|    n_updates        | 3007         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | -0.163   |
|    exploration_rate | 0.751    |
| time/               |          |
|    episodes         | 872      |
|    fps              | 14       |
|    time_elapsed     | 925      |
|    total_timesteps  | 13090    |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.015863981 |
| rollout/            |              |
|    ep_len_mean      | 15           |
|    ep_rew_mean      | -0.193       |
|    exploration_rate | 0.733        |
| time/               |              |
|    episodes         | 936          |
|    fps              | 14           |
|    time_elapsed     | 990          |
|    total_timesteps  | 14050        |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.0152       |
|    n_updates        | 3262         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | -0.192   |
|    exploration_rate | 0.732    |
| time/               |          |
|    episodes         | 940      |
|    fps              | 14       |
|    time_elapsed     | 995      |
|    total_timesteps  | 14110    |
| train/              |   



Eval num_timesteps=15000, episode_reward=-0.75 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | -0.748   |
| rollout/            |          |
|    exploration_rate | 0.715    |
| time/               |          |
|    total_timesteps  | 15000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.0157   |
|    n_updates        | 3499     |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.024265716 |
| rollout/            |              |
|    ep_len_mean      | 15.1         |
|    ep_rew_mean      | -0.263       |
|    exploration_rate | 0.715        |
| time/               |              |
|    episodes         | 1000         |
|    fps              | 14           |
|    time_elapsed     | 1062         |
|    total_timesteps  | 15015        |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.0167       |
|    n_updates        | 3503         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | -0.306   |
|    exploration_rate | 0.714    |
| time/               |          |
|    episodes         | 1004     |
|    fps              | 14       |
|    time_elapsed     | 1067     |
|    total_timesteps  | 15075    |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.011216681 |
| rollout/            |              |
|    ep_len_mean      | 15.1         |
|    ep_rew_mean      | -0.231       |
|    exploration_rate | 0.695        |
| time/               |              |
|    episodes         | 1068         |
|    fps              | 14           |
|    time_elapsed     | 1133         |
|    total_timesteps  | 16035        |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.015        |
|    n_updates        | 3758         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | -0.244   |
|    exploration_rate | 0.694    |
| time/               |          |
|    episodes         | 1072     |
|    fps              | 14       |
|    time_elapsed     | 1137     |
|    total_timesteps  | 16095    |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.013918903 |
| rollout/            |              |
|    ep_len_mean      | 15           |
|    ep_rew_mean      | -0.191       |
|    exploration_rate | 0.676        |
| time/               |              |
|    episodes         | 1136         |
|    fps              | 14           |
|    time_elapsed     | 1203         |
|    total_timesteps  | 17055        |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.00644      |
|    n_updates        | 4013         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | -0.205   |
|    exploration_rate | 0.675    |
| time/               |          |
|    episodes         | 1140     |
|    fps              | 14       |
|    time_elapsed     | 1207     |
|    total_timesteps  | 17115    |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.012736162 |
| rollout/            |              |
|    ep_len_mean      | 15           |
|    ep_rew_mean      | -0.192       |
|    exploration_rate | 0.658        |
| time/               |              |
|    episodes         | 1200         |
|    fps              | 14           |
|    time_elapsed     | 1269         |
|    total_timesteps  | 18015        |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.0131       |
|    n_updates        | 4253         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | -0.192   |
|    exploration_rate | 0.657    |
| time/               |          |
|    episodes         | 1204     |
|    fps              | 14       |
|    time_elapsed     | 1273     |
|    total_timesteps  | 18075    |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | -0.00312901 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | -0.0636     |
|    exploration_rate | 0.638       |
| time/               |             |
|    episodes         | 1268        |
|    fps              | 14          |
|    time_elapsed     | 1339        |
|    total_timesteps  | 19035       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.0112      |
|    n_updates        | 4508        |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | -0.0924  |
|    exploration_rate | 0.637    |
| time/               |          |
|    episodes         | 1272     |
|    fps              | 14       |
|    time_elapsed     | 1343     |
|    total_timesteps  | 19095    |
| train/              |          |
|    le



Eval num_timesteps=20000, episode_reward=-0.40 +/- 0.54
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | -0.404   |
| rollout/            |          |
|    exploration_rate | 0.62     |
| time/               |          |
|    total_timesteps  | 20000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.0083   |
|    n_updates        | 4749     |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.012319697 |
| rollout/            |              |
|    ep_len_mean      | 15.1         |
|    ep_rew_mean      | -0.156       |
|    exploration_rate | 0.619        |
| time/               |              |
|    episodes         | 1336         |
|    fps              | 14           |
|    time_elapsed     | 1415         |
|    total_timesteps  | 20060        |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.0113       |
|    n_updates        | 4764         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | -0.171   |
|    exploration_rate | 0.618    |
| time/               |          |
|    episodes         | 1340     |
|    fps              | 14       |
|    time_elapsed     | 1419     |
|    total_timesteps  | 20120    |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | -0.002733535 |
| rollout/            |              |
|    ep_len_mean      | 15.1         |
|    ep_rew_mean      | -0.0576      |
|    exploration_rate | 0.601        |
| time/               |              |
|    episodes         | 1400         |
|    fps              | 14           |
|    time_elapsed     | 1480         |
|    total_timesteps  | 21020        |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.0128       |
|    n_updates        | 5004         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | -0.0284  |
|    exploration_rate | 0.599    |
| time/               |          |
|    episodes         | 1404     |
|    fps              | 14       |
|    time_elapsed     | 1484     |
|    total_timesteps  | 21080    |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | 0.0044588805 |
| rollout/            |              |
|    ep_len_mean      | 15           |
|    ep_rew_mean      | 0.0503       |
|    exploration_rate | 0.581        |
| time/               |              |
|    episodes         | 1468         |
|    fps              | 14           |
|    time_elapsed     | 1546         |
|    total_timesteps  | 22040        |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.0117       |
|    n_updates        | 5259         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.0783   |
|    exploration_rate | 0.58     |
| time/               |          |
|    episodes         | 1472     |
|    fps              | 14       |
|    time_elapsed     | 1550     |
|    total_timesteps  | 22100    |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.242    |
|    exploration_rate | 0.562    |
| time/               |          |
|    episodes         | 1536     |
|    fps              | 14       |
|    time_elapsed     | 1612     |
|    total_timesteps  | 23060    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00592  |
|    n_updates        | 5514     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.287    |
|    exploration_rate | 0.561    |
| time/               |          |
|    episodes         | 1540     |
|    fps              | 14       |
|    time_elapsed     | 1616     |
|    total_timesteps  | 23120    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.0176   |
|    n_updates      

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.007863741 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.192       |
|    exploration_rate | 0.544       |
| time/               |             |
|    episodes         | 1600        |
|    fps              | 14          |
|    time_elapsed     | 1674        |
|    total_timesteps  | 24020       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.0119      |
|    n_updates        | 5754        |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.163    |
|    exploration_rate | 0.542    |
| time/               |          |
|    episodes         | 1604     |
|    fps              | 14       |
|    time_elapsed     | 1678     |
|    total_timesteps  | 24080    |
| train/              |          |
|    le



Eval num_timesteps=25000, episode_reward=0.75 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.747    |
| rollout/            |          |
|    exploration_rate | 0.525    |
| time/               |          |
|    total_timesteps  | 25000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00711  |
|    n_updates        | 5999     |
----------------------------------
New best mean reward!


  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | 0.0074205087 |
| rollout/            |              |
|    ep_len_mean      | 15.1         |
|    ep_rew_mean      | 0.072        |
|    exploration_rate | 0.524        |
| time/               |              |
|    episodes         | 1668         |
|    fps              | 14           |
|    time_elapsed     | 1745         |
|    total_timesteps  | 25045        |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.00897      |
|    n_updates        | 6011         |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.0745   |
|    exploration_rate | 0.523    |
| time/               |          |
|    episodes         | 1672     |
|    fps              | 14       |
|    time_elapsed     | 1749     |
|    total_timesteps  | 25105    |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.024059461 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.317       |
|    exploration_rate | 0.506       |
| time/               |             |
|    episodes         | 1732        |
|    fps              | 14          |
|    time_elapsed     | 1807        |
|    total_timesteps  | 26005       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00661     |
|    n_updates        | 6251        |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.315    |
|    exploration_rate | 0.505    |
| time/               |          |
|    episodes         | 1736     |
|    fps              | 14       |
|    time_elapsed     | 1811     |
|    total_timesteps  | 26065    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.024323609 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.338       |
|    exploration_rate | 0.487       |
| time/               |             |
|    episodes         | 1800        |
|    fps              | 14          |
|    time_elapsed     | 1873        |
|    total_timesteps  | 27025       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.0102      |
|    n_updates        | 6506        |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.324    |
|    exploration_rate | 0.485    |
| time/               |          |
|    episodes         | 1804     |
|    fps              | 14       |
|    time_elapsed     | 1877     |
|    total_timesteps  | 27085    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.023366898 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.349       |
|    exploration_rate | 0.467       |
| time/               |             |
|    episodes         | 1868        |
|    fps              | 14          |
|    time_elapsed     | 1939        |
|    total_timesteps  | 28045       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.0114      |
|    n_updates        | 6761        |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.335    |
|    exploration_rate | 0.466    |
| time/               |          |
|    episodes         | 1872     |
|    fps              | 14       |
|    time_elapsed     | 1943     |
|    total_timesteps  | 28105    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.021004688 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.327       |
|    exploration_rate | 0.449       |
| time/               |             |
|    episodes         | 1932        |
|    fps              | 14          |
|    time_elapsed     | 2001        |
|    total_timesteps  | 29005       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00425     |
|    n_updates        | 7001        |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.326    |
|    exploration_rate | 0.448    |
| time/               |          |
|    episodes         | 1936     |
|    fps              | 14       |
|    time_elapsed     | 2005     |
|    total_timesteps  | 29065    |
| train/              |          |
|    le



Eval num_timesteps=30000, episode_reward=0.78 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.778    |
| rollout/            |          |
|    exploration_rate | 0.43     |
| time/               |          |
|    total_timesteps  | 30000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00506  |
|    n_updates        | 7249     |
----------------------------------
New best mean reward!
New best mean reward: 0.03143855556845665. Model saved to ./logs/DQN_lr_0.0003_gamma_0.99_20240826-200734/best_model_20240826-200734


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.031438556 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.469       |
|    exploration_rate | 0.429       |
| time/               |             |
|    episodes         | 2000        |
|    fps              | 14          |
|    time_elapsed     | 2072        |
|    total_timesteps  | 30030       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.0147      |
|    n_updates        | 7257        |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.482    |
|    exploration_rate | 0.428    |
| time/               |          |
|    episodes         | 2004     |
|    fps              | 14       |
|    time_elapsed     | 2076     |
|    total_timesteps  | 30090    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.03820032 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.544      |
|    exploration_rate | 0.41       |
| time/               |            |
|    episodes         | 2068       |
|    fps              | 14         |
|    time_elapsed     | 2138       |
|    total_timesteps  | 31050      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00459    |
|    n_updates        | 7512       |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.544    |
|    exploration_rate | 0.409    |
| time/               |          |
|    episodes         | 2072     |
|    fps              | 14       |
|    time_elapsed     | 2142     |
|    total_timesteps  | 31110    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.035070334 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.532       |
|    exploration_rate | 0.392       |
| time/               |             |
|    episodes         | 2132        |
|    fps              | 14          |
|    time_elapsed     | 2200        |
|    total_timesteps  | 32010       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00784     |
|    n_updates        | 7752        |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.562    |
|    exploration_rate | 0.391    |
| time/               |          |
|    episodes         | 2136     |
|    fps              | 14       |
|    time_elapsed     | 2204     |
|    total_timesteps  | 32070    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.03401394 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.501      |
|    exploration_rate | 0.372      |
| time/               |            |
|    episodes         | 2200       |
|    fps              | 14         |
|    time_elapsed     | 2266       |
|    total_timesteps  | 33030      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.0131     |
|    n_updates        | 8007       |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.5      |
|    exploration_rate | 0.371    |
| time/               |          |
|    episodes         | 2204     |
|    fps              | 14       |
|    time_elapsed     | 2270     |
|    total_timesteps  | 33090    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.03138002 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.512      |
|    exploration_rate | 0.353      |
| time/               |            |
|    episodes         | 2268       |
|    fps              | 14         |
|    time_elapsed     | 2332       |
|    total_timesteps  | 34050      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.0034     |
|    n_updates        | 8262       |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.513    |
|    exploration_rate | 0.352    |
| time/               |          |
|    episodes         | 2272     |
|    fps              | 14       |
|    time_elapsed     | 2336     |
|    total_timesteps  | 34110    |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=35000, episode_reward=0.77 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.767    |
| rollout/            |          |
|    exploration_rate | 0.335    |
| time/               |          |
|    total_timesteps  | 35000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00146  |
|    n_updates        | 8499     |
----------------------------------
New best mean reward: 0.04193916544318199. Model saved to ./logs/DQN_lr_0.0003_gamma_0.99_20240826-200734/best_model_20240826-200734


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.041939165 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.566       |
|    exploration_rate | 0.335       |
| time/               |             |
|    episodes         | 2332        |
|    fps              | 14          |
|    time_elapsed     | 2404        |
|    total_timesteps  | 35015       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00226     |
|    n_updates        | 8503        |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.579    |
|    exploration_rate | 0.334    |
| time/               |          |
|    episodes         | 2336     |
|    fps              | 14       |
|    time_elapsed     | 2408     |
|    total_timesteps  | 35075    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.039162744 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.581       |
|    exploration_rate | 0.315       |
| time/               |             |
|    episodes         | 2400        |
|    fps              | 14          |
|    time_elapsed     | 2475        |
|    total_timesteps  | 36035       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00253     |
|    n_updates        | 8758        |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.609    |
|    exploration_rate | 0.314    |
| time/               |          |
|    episodes         | 2404     |
|    fps              | 14       |
|    time_elapsed     | 2479     |
|    total_timesteps  | 36095    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04195706 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.652      |
|    exploration_rate | 0.296      |
| time/               |            |
|    episodes         | 2468       |
|    fps              | 14         |
|    time_elapsed     | 2545       |
|    total_timesteps  | 37055      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00677    |
|    n_updates        | 9013       |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.652    |
|    exploration_rate | 0.295    |
| time/               |          |
|    episodes         | 2472     |
|    fps              | 14       |
|    time_elapsed     | 2549     |
|    total_timesteps  | 37115    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04211771 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.624      |
|    exploration_rate | 0.278      |
| time/               |            |
|    episodes         | 2532       |
|    fps              | 14         |
|    time_elapsed     | 2610       |
|    total_timesteps  | 38015      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00127    |
|    n_updates        | 9253       |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.609    |
|    exploration_rate | 0.277    |
| time/               |          |
|    episodes         | 2536     |
|    fps              | 14       |
|    time_elapsed     | 2614     |
|    total_timesteps  | 38075    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.040760428 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.63        |
|    exploration_rate | 0.258       |
| time/               |             |
|    episodes         | 2600        |
|    fps              | 14          |
|    time_elapsed     | 2679        |
|    total_timesteps  | 39035       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00351     |
|    n_updates        | 9508        |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.615    |
|    exploration_rate | 0.257    |
| time/               |          |
|    episodes         | 2604     |
|    fps              | 14       |
|    time_elapsed     | 2683     |
|    total_timesteps  | 39095    |
| train/              |          |
|    le



Eval num_timesteps=40000, episode_reward=0.76 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.759    |
| rollout/            |          |
|    exploration_rate | 0.24     |
| time/               |          |
|    total_timesteps  | 40000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00473  |
|    n_updates        | 9749     |
----------------------------------
New best mean reward: 0.044645410031080246. Model saved to ./logs/DQN_lr_0.0003_gamma_0.99_20240826-200734/best_model_20240826-200734


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04464541 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.652      |
|    exploration_rate | 0.239      |
| time/               |            |
|    episodes         | 2668       |
|    fps              | 14         |
|    time_elapsed     | 2753       |
|    total_timesteps  | 40060      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00494    |
|    n_updates        | 9764       |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.651    |
|    exploration_rate | 0.238    |
| time/               |          |
|    episodes         | 2672     |
|    fps              | 14       |
|    time_elapsed     | 2757     |
|    total_timesteps  | 40120    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04130648 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.67       |
|    exploration_rate | 0.221      |
| time/               |            |
|    episodes         | 2732       |
|    fps              | 14         |
|    time_elapsed     | 2818       |
|    total_timesteps  | 41020      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000632   |
|    n_updates        | 10004      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.671    |
|    exploration_rate | 0.219    |
| time/               |          |
|    episodes         | 2736     |
|    fps              | 14       |
|    time_elapsed     | 2822     |
|    total_timesteps  | 41080    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050193276 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.722       |
|    exploration_rate | 0.201       |
| time/               |             |
|    episodes         | 2800        |
|    fps              | 14          |
|    time_elapsed     | 2887        |
|    total_timesteps  | 42040       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00239     |
|    n_updates        | 10259       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.738    |
|    exploration_rate | 0.2      |
| time/               |          |
|    episodes         | 2804     |
|    fps              | 14       |
|    time_elapsed     | 2891     |
|    total_timesteps  | 42100    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.709    |
|    exploration_rate | 0.182    |
| time/               |          |
|    episodes         | 2868     |
|    fps              | 14       |
|    time_elapsed     | 2957     |
|    total_timesteps  | 43060    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00485  |
|    n_updates        | 10514    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.71     |
|    exploration_rate | 0.181    |
| time/               |          |
|    episodes         | 2872     |
|    fps              | 14       |
|    time_elapsed     | 2961     |
|    total_timesteps  | 43120    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.0013   |
|    n_updates      

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.044435363 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.653       |
|    exploration_rate | 0.164       |
| time/               |             |
|    episodes         | 2932        |
|    fps              | 14          |
|    time_elapsed     | 3022        |
|    total_timesteps  | 44020       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000512    |
|    n_updates        | 10754       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.667    |
|    exploration_rate | 0.162    |
| time/               |          |
|    episodes         | 2936     |
|    fps              | 14       |
|    time_elapsed     | 3026     |
|    total_timesteps  | 44080    |
| train/              |          |
|    le



Eval num_timesteps=45000, episode_reward=0.76 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.764    |
| rollout/            |          |
|    exploration_rate | 0.145    |
| time/               |          |
|    total_timesteps  | 45000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000923 |
|    n_updates        | 10999    |
----------------------------------
New best mean reward: 0.05110739916563034. Model saved to ./logs/DQN_lr_0.0003_gamma_0.99_20240826-200734/best_model_20240826-200734


  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0511074 |
| rollout/            |           |
|    ep_len_mean      | 15.1      |
|    ep_rew_mean      | 0.748     |
|    exploration_rate | 0.144     |
| time/               |           |
|    episodes         | 3000      |
|    fps              | 14        |
|    time_elapsed     | 3096      |
|    total_timesteps  | 45045     |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 0.00637   |
|    n_updates        | 11011     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.749    |
|    exploration_rate | 0.143    |
| time/               |          |
|    episodes         | 3004     |
|    fps              | 14       |
|    time_elapsed     | 3100     |
|    total_timesteps  | 45105    |
| train/              |          |
|    learning_rate    | 0.0003   |
|   

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05027473 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.762      |
|    exploration_rate | 0.126      |
| time/               |            |
|    episodes         | 3064       |
|    fps              | 14         |
|    time_elapsed     | 3161       |
|    total_timesteps  | 46005      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000254   |
|    n_updates        | 11251      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.762    |
|    exploration_rate | 0.125    |
| time/               |          |
|    episodes         | 3068     |
|    fps              | 14       |
|    time_elapsed     | 3165     |
|    total_timesteps  | 46065    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


----------------------------------
| mean_reward         | 0.051159 |
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.764    |
|    exploration_rate | 0.107    |
| time/               |          |
|    episodes         | 3132     |
|    fps              | 14       |
|    time_elapsed     | 3231     |
|    total_timesteps  | 47025    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000809 |
|    n_updates        | 11506    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.765    |
|    exploration_rate | 0.105    |
| time/               |          |
|    episodes         | 3136     |
|    fps              | 14       |
|    time_elapsed     | 3235     |
|    total_timesteps  | 47085    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss           

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051355366 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.769       |
|    exploration_rate | 0.0871      |
| time/               |             |
|    episodes         | 3200        |
|    fps              | 14          |
|    time_elapsed     | 3300        |
|    total_timesteps  | 48045       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000259    |
|    n_updates        | 11761       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.77     |
|    exploration_rate | 0.086    |
| time/               |          |
|    episodes         | 3204     |
|    fps              | 14       |
|    time_elapsed     | 3304     |
|    total_timesteps  | 48105    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051278137 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.772       |
|    exploration_rate | 0.0689      |
| time/               |             |
|    episodes         | 3264        |
|    fps              | 14          |
|    time_elapsed     | 3365        |
|    total_timesteps  | 49005       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000483    |
|    n_updates        | 12001       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.772    |
|    exploration_rate | 0.0678   |
| time/               |          |
|    episodes         | 3268     |
|    fps              | 14       |
|    time_elapsed     | 3369     |
|    total_timesteps  | 49065    |
| train/              |          |
|    le



Eval num_timesteps=50000, episode_reward=0.78 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.784    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 50000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000352 |
|    n_updates        | 12249    |
----------------------------------
New best mean reward!


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050233223 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.762       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 3332        |
|    fps              | 14          |
|    time_elapsed     | 3440        |
|    total_timesteps  | 50030       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000148    |
|    n_updates        | 12257       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.762    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3336     |
|    fps              | 14       |
|    time_elapsed     | 3444     |
|    total_timesteps  | 50090    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051352937 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.775       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 3400        |
|    fps              | 14          |
|    time_elapsed     | 3509        |
|    total_timesteps  | 51050       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000109    |
|    n_updates        | 12512       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.775    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3404     |
|    fps              | 14       |
|    time_elapsed     | 3513     |
|    total_timesteps  | 51110    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052143294 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.776       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 3464        |
|    fps              | 14          |
|    time_elapsed     | 3574        |
|    total_timesteps  | 52010       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000458    |
|    n_updates        | 12752       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.775    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3468     |
|    fps              | 14       |
|    time_elapsed     | 3578     |
|    total_timesteps  | 52070    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051687106 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.777       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 3532        |
|    fps              | 14          |
|    time_elapsed     | 3643        |
|    total_timesteps  | 53030       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000121    |
|    n_updates        | 13007       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.777    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3536     |
|    fps              | 14       |
|    time_elapsed     | 3647     |
|    total_timesteps  | 53090    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052026648 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.779       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 3600        |
|    fps              | 14          |
|    time_elapsed     | 3712        |
|    total_timesteps  | 54050       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.79e-05    |
|    n_updates        | 13262       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.78     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3604     |
|    fps              | 14       |
|    time_elapsed     | 3716     |
|    total_timesteps  | 54110    |
| train/              |          |
|    le



Eval num_timesteps=55000, episode_reward=0.79 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.785    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 55000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 5.79e-05 |
|    n_updates        | 13499    |
----------------------------------
New best mean reward!
New best mean reward: 0.05239459499716759. Model saved to ./logs/DQN_lr_0.0003_gamma_0.99_20240826-200734/best_model_20240826-200734


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052394595 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.787       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 3664        |
|    fps              | 14          |
|    time_elapsed     | 3786        |
|    total_timesteps  | 55015       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.67e-05    |
|    n_updates        | 13503       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.788    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3668     |
|    fps              | 14       |
|    time_elapsed     | 3790     |
|    total_timesteps  | 55075    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05081217 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.776      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 3732       |
|    fps              | 14         |
|    time_elapsed     | 3856       |
|    total_timesteps  | 56035      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000216   |
|    n_updates        | 13758      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.776    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3736     |
|    fps              | 14       |
|    time_elapsed     | 3860     |
|    total_timesteps  | 56095    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052690096 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.772       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 3800        |
|    fps              | 14          |
|    time_elapsed     | 3928        |
|    total_timesteps  | 57055       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 6.2e-05     |
|    n_updates        | 14013       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.774    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3804     |
|    fps              | 14       |
|    time_elapsed     | 3932     |
|    total_timesteps  | 57115    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05100931 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.771      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 3864       |
|    fps              | 14         |
|    time_elapsed     | 3995       |
|    total_timesteps  | 58015      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 4.11e-05   |
|    n_updates        | 14253      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.772    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3868     |
|    fps              | 14       |
|    time_elapsed     | 3999     |
|    total_timesteps  | 58075    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05184585 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.764      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 3932       |
|    fps              | 14         |
|    time_elapsed     | 4064       |
|    total_timesteps  | 59035      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000166   |
|    n_updates        | 14508      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.764    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3936     |
|    fps              | 14       |
|    time_elapsed     | 4068     |
|    total_timesteps  | 59095    |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=60000, episode_reward=0.82 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.819    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 60000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 7.55e-05 |
|    n_updates        | 14749    |
----------------------------------
New best mean reward!


  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0525729 |
| rollout/            |           |
|    ep_len_mean      | 15.1      |
|    ep_rew_mean      | 0.788     |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 4000      |
|    fps              | 14        |
|    time_elapsed     | 4139      |
|    total_timesteps  | 60060     |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 6.3e-05   |
|    n_updates        | 14764     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.79     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4004     |
|    fps              | 14       |
|    time_elapsed     | 4143     |
|    total_timesteps  | 60120    |
| train/              |          |
|    learning_rate    | 0.0003   |
|   

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052844293 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.796       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 4064        |
|    fps              | 14          |
|    time_elapsed     | 4204        |
|    total_timesteps  | 61020       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 6.72e-05    |
|    n_updates        | 15004       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.795    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4068     |
|    fps              | 14       |
|    time_elapsed     | 4208     |
|    total_timesteps  | 61080    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05167981 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.78       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 4132       |
|    fps              | 14         |
|    time_elapsed     | 4275       |
|    total_timesteps  | 62040      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 4.35e-05   |
|    n_updates        | 15259      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.781    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4136     |
|    fps              | 14       |
|    time_elapsed     | 4279     |
|    total_timesteps  | 62100    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.781    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4200     |
|    fps              | 14       |
|    time_elapsed     | 4345     |
|    total_timesteps  | 63060    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 5.59e-05 |
|    n_updates        | 15514    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.766    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4204     |
|    fps              | 14       |
|    time_elapsed     | 4349     |
|    total_timesteps  | 63120    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 6.92e-05 |
|    n_updates      

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.047167864 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.722       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 4264        |
|    fps              | 14          |
|    time_elapsed     | 4411        |
|    total_timesteps  | 64020       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000114    |
|    n_updates        | 15754       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.722    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4268     |
|    fps              | 14       |
|    time_elapsed     | 4415     |
|    total_timesteps  | 64080    |
| train/              |          |
|    le



Eval num_timesteps=65000, episode_reward=0.45 +/- 0.58
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.452    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 65000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000193 |
|    n_updates        | 15999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04548085 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.697      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 4332       |
|    fps              | 14         |
|    time_elapsed     | 4486       |
|    total_timesteps  | 65045      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000284   |
|    n_updates        | 16011      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.683    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4336     |
|    fps              | 14       |
|    time_elapsed     | 4490     |
|    total_timesteps  | 65105    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.032444526 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.542       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 4396        |
|    fps              | 14          |
|    time_elapsed     | 4551        |
|    total_timesteps  | 66005       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00021     |
|    n_updates        | 16251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.525    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4400     |
|    fps              | 14       |
|    time_elapsed     | 4555     |
|    total_timesteps  | 66065    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.021455398 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.376       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 4464        |
|    fps              | 14          |
|    time_elapsed     | 4620        |
|    total_timesteps  | 67025       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00026     |
|    n_updates        | 16506       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.362    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4468     |
|    fps              | 14       |
|    time_elapsed     | 4625     |
|    total_timesteps  | 67085    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


--------------------------------------
| mean_reward         | 0.0073042857 |
| rollout/            |              |
|    ep_len_mean      | 15           |
|    ep_rew_mean      | 0.187        |
|    exploration_rate | 0.05         |
| time/               |              |
|    episodes         | 4532         |
|    fps              | 14           |
|    time_elapsed     | 4689         |
|    total_timesteps  | 68045        |
| train/              |              |
|    learning_rate    | 0.0003       |
|    loss             | 0.000506     |
|    n_updates        | 16761        |
--------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.174    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4536     |
|    fps              | 14       |
|    time_elapsed     | 4693     |
|    total_timesteps  | 68105    |
| train/              |   

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.014073696 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.2         |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 4596        |
|    fps              | 14          |
|    time_elapsed     | 4756        |
|    total_timesteps  | 69005       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00111     |
|    n_updates        | 17001       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.215    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4600     |
|    fps              | 14       |
|    time_elapsed     | 4760     |
|    total_timesteps  | 69065    |
| train/              |          |
|    le



Eval num_timesteps=70000, episode_reward=0.78 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.777    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 70000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00257  |
|    n_updates        | 17249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.034148045 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.402       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 4664        |
|    fps              | 14          |
|    time_elapsed     | 4834        |
|    total_timesteps  | 70030       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00429     |
|    n_updates        | 17257       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.433    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4668     |
|    fps              | 14       |
|    time_elapsed     | 4838     |
|    total_timesteps  | 70090    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050384372 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.7         |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 4732        |
|    fps              | 14          |
|    time_elapsed     | 4906        |
|    total_timesteps  | 71050       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00932     |
|    n_updates        | 17512       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.699    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4736     |
|    fps              | 14       |
|    time_elapsed     | 4910     |
|    total_timesteps  | 71110    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04880236 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.739      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 4796       |
|    fps              | 14         |
|    time_elapsed     | 4973       |
|    total_timesteps  | 72010      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00122    |
|    n_updates        | 17752      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.74     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4800     |
|    fps              | 14       |
|    time_elapsed     | 4977     |
|    total_timesteps  | 72070    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05031368 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.744      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 4864       |
|    fps              | 14         |
|    time_elapsed     | 5042       |
|    total_timesteps  | 73030      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00622    |
|    n_updates        | 18007      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.745    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4868     |
|    fps              | 14       |
|    time_elapsed     | 5046     |
|    total_timesteps  | 73090    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05112432 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.749      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 4932       |
|    fps              | 14         |
|    time_elapsed     | 5113       |
|    total_timesteps  | 74050      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00468    |
|    n_updates        | 18262      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4936     |
|    fps              | 14       |
|    time_elapsed     | 5117     |
|    total_timesteps  | 74110    |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=75000, episode_reward=0.78 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.779    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 75000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00589  |
|    n_updates        | 18499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050021708 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.756       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 4996        |
|    fps              | 14          |
|    time_elapsed     | 5186        |
|    total_timesteps  | 75015       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00169     |
|    n_updates        | 18503       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.756    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5000     |
|    fps              | 14       |
|    time_elapsed     | 5190     |
|    total_timesteps  | 75075    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049812026 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.757       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 5064        |
|    fps              | 14          |
|    time_elapsed     | 5257        |
|    total_timesteps  | 76035       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00164     |
|    n_updates        | 18758       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.756    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5068     |
|    fps              | 14       |
|    time_elapsed     | 5261     |
|    total_timesteps  | 76095    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051402595 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.77        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 5132        |
|    fps              | 14          |
|    time_elapsed     | 5327        |
|    total_timesteps  | 77055       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00516     |
|    n_updates        | 19013       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.769    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5136     |
|    fps              | 14       |
|    time_elapsed     | 5331     |
|    total_timesteps  | 77115    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051434424 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.771       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 5196        |
|    fps              | 14          |
|    time_elapsed     | 5392        |
|    total_timesteps  | 78015       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00158     |
|    n_updates        | 19253       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.771    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5200     |
|    fps              | 14       |
|    time_elapsed     | 5396     |
|    total_timesteps  | 78075    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051612448 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.775       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 5264        |
|    fps              | 14          |
|    time_elapsed     | 5462        |
|    total_timesteps  | 79035       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00229     |
|    n_updates        | 19508       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.774    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5268     |
|    fps              | 14       |
|    time_elapsed     | 5466     |
|    total_timesteps  | 79095    |
| train/              |          |
|    le



Eval num_timesteps=80000, episode_reward=0.78 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.781    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 80000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000132 |
|    n_updates        | 19749    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04914475 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.753      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 5332       |
|    fps              | 14         |
|    time_elapsed     | 5539       |
|    total_timesteps  | 80060      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000232   |
|    n_updates        | 19764      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.752    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5336     |
|    fps              | 14       |
|    time_elapsed     | 5543     |
|    total_timesteps  | 80120    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05197412 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.769      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 5396       |
|    fps              | 14         |
|    time_elapsed     | 5609       |
|    total_timesteps  | 81020      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 6.99e-05   |
|    n_updates        | 20004      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.77     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5400     |
|    fps              | 14       |
|    time_elapsed     | 5613     |
|    total_timesteps  | 81080    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05250559 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.786      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 5464       |
|    fps              | 14         |
|    time_elapsed     | 5680       |
|    total_timesteps  | 82040      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 9.59e-05   |
|    n_updates        | 20259      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.786    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5468     |
|    fps              | 14       |
|    time_elapsed     | 5684     |
|    total_timesteps  | 82100    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.784    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5532     |
|    fps              | 14       |
|    time_elapsed     | 5751     |
|    total_timesteps  | 83060    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 8.13e-05 |
|    n_updates        | 20514    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.783    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5536     |
|    fps              | 14       |
|    time_elapsed     | 5756     |
|    total_timesteps  | 83120    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000126 |
|    n_updates      

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05202802 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.785      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 5596       |
|    fps              | 14         |
|    time_elapsed     | 5820       |
|    total_timesteps  | 84020      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 3.64e-05   |
|    n_updates        | 20754      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.785    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5600     |
|    fps              | 14       |
|    time_elapsed     | 5824     |
|    total_timesteps  | 84080    |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=85000, episode_reward=0.78 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.785    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 85000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 8.12e-05 |
|    n_updates        | 20999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05244227 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.787      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 5664       |
|    fps              | 14         |
|    time_elapsed     | 5895       |
|    total_timesteps  | 85045      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 6.39e-05   |
|    n_updates        | 21011      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.785    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5668     |
|    fps              | 14       |
|    time_elapsed     | 5900     |
|    total_timesteps  | 85105    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052088108 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.788       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 5728        |
|    fps              | 14          |
|    time_elapsed     | 5963        |
|    total_timesteps  | 86005       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.52e-05    |
|    n_updates        | 21251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.788    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5732     |
|    fps              | 14       |
|    time_elapsed     | 5968     |
|    total_timesteps  | 86065    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052883524 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.788       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 5796        |
|    fps              | 14          |
|    time_elapsed     | 6037        |
|    total_timesteps  | 87025       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.85e-05    |
|    n_updates        | 21506       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.79     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5800     |
|    fps              | 14       |
|    time_elapsed     | 6042     |
|    total_timesteps  | 87085    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052546848 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.788       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 5864        |
|    fps              | 14          |
|    time_elapsed     | 6110        |
|    total_timesteps  | 88045       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.86e-05    |
|    n_updates        | 21761       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.788    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5868     |
|    fps              | 14       |
|    time_elapsed     | 6115     |
|    total_timesteps  | 88105    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0524429 |
| rollout/            |           |
|    ep_len_mean      | 15        |
|    ep_rew_mean      | 0.791     |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 5928      |
|    fps              | 14        |
|    time_elapsed     | 6179      |
|    total_timesteps  | 89005     |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 0.000193  |
|    n_updates        | 22001     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.79     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 5932     |
|    fps              | 14       |
|    time_elapsed     | 6183     |
|    total_timesteps  | 89065    |
| train/              |          |
|    learning_rate    | 0.0003   |
|   



Eval num_timesteps=90000, episode_reward=0.78 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.782    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 90000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00015  |
|    n_updates        | 22249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052394707 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.79        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 5996        |
|    fps              | 14          |
|    time_elapsed     | 6256        |
|    total_timesteps  | 90030       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000112    |
|    n_updates        | 22257       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.789    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6000     |
|    fps              | 14       |
|    time_elapsed     | 6261     |
|    total_timesteps  | 90090    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052347258 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.786       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 6064        |
|    fps              | 14          |
|    time_elapsed     | 6328        |
|    total_timesteps  | 91050       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.07e-05    |
|    n_updates        | 22512       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.787    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6068     |
|    fps              | 14       |
|    time_elapsed     | 6332     |
|    total_timesteps  | 91110    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052175026 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.783       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 6128        |
|    fps              | 14          |
|    time_elapsed     | 6396        |
|    total_timesteps  | 92010       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.99e-05    |
|    n_updates        | 22752       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.783    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6132     |
|    fps              | 14       |
|    time_elapsed     | 6400     |
|    total_timesteps  | 92070    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05203847 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.778      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 6196       |
|    fps              | 14         |
|    time_elapsed     | 6467       |
|    total_timesteps  | 93030      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 3.98e-05   |
|    n_updates        | 23007      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.779    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6200     |
|    fps              | 14       |
|    time_elapsed     | 6471     |
|    total_timesteps  | 93090    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05227162 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.785      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 6264       |
|    fps              | 14         |
|    time_elapsed     | 6540       |
|    total_timesteps  | 94050      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.97e-05   |
|    n_updates        | 23262      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.785    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6268     |
|    fps              | 14       |
|    time_elapsed     | 6545     |
|    total_timesteps  | 94110    |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=95000, episode_reward=0.79 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.794    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 95000    |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 5.38e-05 |
|    n_updates        | 23499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05275604 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.791      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 6328       |
|    fps              | 14         |
|    time_elapsed     | 6615       |
|    total_timesteps  | 95015      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.77e-05   |
|    n_updates        | 23503      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.792    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6332     |
|    fps              | 14       |
|    time_elapsed     | 6619     |
|    total_timesteps  | 95075    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052441884 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.792       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 6396        |
|    fps              | 14          |
|    time_elapsed     | 6686        |
|    total_timesteps  | 96035       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.2e-05     |
|    n_updates        | 23758       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.791    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6400     |
|    fps              | 14       |
|    time_elapsed     | 6690     |
|    total_timesteps  | 96095    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050837733 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.77        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 6464        |
|    fps              | 14          |
|    time_elapsed     | 6755        |
|    total_timesteps  | 97055       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.82e-05    |
|    n_updates        | 24013       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.77     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6468     |
|    fps              | 14       |
|    time_elapsed     | 6760     |
|    total_timesteps  | 97115    |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05208316 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.782      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 6528       |
|    fps              | 14         |
|    time_elapsed     | 6823       |
|    total_timesteps  | 98015      |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 3.24e-05   |
|    n_updates        | 24253      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.782    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6532     |
|    fps              | 14       |
|    time_elapsed     | 6827     |
|    total_timesteps  | 98075    |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052401997 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.785       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 6596        |
|    fps              | 14          |
|    time_elapsed     | 6893        |
|    total_timesteps  | 99035       |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.63e-05    |
|    n_updates        | 24508       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.786    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6600     |
|    fps              | 14       |
|    time_elapsed     | 6897     |
|    total_timesteps  | 99095    |
| train/              |          |
|    le



Eval num_timesteps=100000, episode_reward=0.80 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.802    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 100000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 2.11e-05 |
|    n_updates        | 24749    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05233799 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.785      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 6664       |
|    fps              | 14         |
|    time_elapsed     | 6970       |
|    total_timesteps  | 100060     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.62e-05   |
|    n_updates        | 24764      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.787    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6668     |
|    fps              | 14       |
|    time_elapsed     | 6974     |
|    total_timesteps  | 100120   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05246544 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.788      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 6728       |
|    fps              | 14         |
|    time_elapsed     | 7038       |
|    total_timesteps  | 101020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.4e-05    |
|    n_updates        | 25004      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.788    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6732     |
|    fps              | 14       |
|    time_elapsed     | 7042     |
|    total_timesteps  | 101080   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052593812 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.787       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 6796        |
|    fps              | 14          |
|    time_elapsed     | 7106        |
|    total_timesteps  | 102040      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.48e-05    |
|    n_updates        | 25259       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.788    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6800     |
|    fps              | 14       |
|    time_elapsed     | 7111     |
|    total_timesteps  | 102100   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.789    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6864     |
|    fps              | 14       |
|    time_elapsed     | 7177     |
|    total_timesteps  | 103060   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 2.78e-05 |
|    n_updates        | 25514    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.789    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6868     |
|    fps              | 14       |
|    time_elapsed     | 7181     |
|    total_timesteps  | 103120   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 1.98e-05 |
|    n_updates      

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.048737895 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.752       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 6928        |
|    fps              | 14          |
|    time_elapsed     | 7244        |
|    total_timesteps  | 104020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.34e-05    |
|    n_updates        | 25754       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.736    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 6932     |
|    fps              | 14       |
|    time_elapsed     | 7248     |
|    total_timesteps  | 104080   |
| train/              |          |
|    le



Eval num_timesteps=105000, episode_reward=0.75 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.752    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 105000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000119 |
|    n_updates        | 25999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.032331146 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.55        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 6996        |
|    fps              | 14          |
|    time_elapsed     | 7318        |
|    total_timesteps  | 105045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 7.78e-05    |
|    n_updates        | 26011       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.534    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7000     |
|    fps              | 14       |
|    time_elapsed     | 7322     |
|    total_timesteps  | 105105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.032784484 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.455       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 7060        |
|    fps              | 14          |
|    time_elapsed     | 7383        |
|    total_timesteps  | 106005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000288    |
|    n_updates        | 26251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.469    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7064     |
|    fps              | 14       |
|    time_elapsed     | 7387     |
|    total_timesteps  | 106065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.01775002 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.341      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 7128       |
|    fps              | 14         |
|    time_elapsed     | 7452       |
|    total_timesteps  | 107025     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.0071     |
|    n_updates        | 26506      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.328    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7132     |
|    fps              | 14       |
|    time_elapsed     | 7456     |
|    total_timesteps  | 107085   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.013248633 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.15        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 7196        |
|    fps              | 14          |
|    time_elapsed     | 7519        |
|    total_timesteps  | 108045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00673     |
|    n_updates        | 26761       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.15     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7200     |
|    fps              | 14       |
|    time_elapsed     | 7523     |
|    total_timesteps  | 108105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.034111198 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.455       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 7260        |
|    fps              | 14          |
|    time_elapsed     | 7581        |
|    total_timesteps  | 109005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.0087      |
|    n_updates        | 27001       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.485    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7264     |
|    fps              | 14       |
|    time_elapsed     | 7585     |
|    total_timesteps  | 109065   |
| train/              |          |
|    le



Eval num_timesteps=110000, episode_reward=0.45 +/- 0.56
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.453    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 110000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00332  |
|    n_updates        | 27249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.031241002 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.497       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 7328        |
|    fps              | 14          |
|    time_elapsed     | 7656        |
|    total_timesteps  | 110030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00181     |
|    n_updates        | 27257       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.499    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7332     |
|    fps              | 14       |
|    time_elapsed     | 7660     |
|    total_timesteps  | 110090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.041801993 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.598       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 7396        |
|    fps              | 14          |
|    time_elapsed     | 7724        |
|    total_timesteps  | 111050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00188     |
|    n_updates        | 27512       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.568    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7400     |
|    fps              | 14       |
|    time_elapsed     | 7727     |
|    total_timesteps  | 111110   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.039784197 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.597       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 7460        |
|    fps              | 14          |
|    time_elapsed     | 7787        |
|    total_timesteps  | 112010      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00114     |
|    n_updates        | 27752       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.595    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7464     |
|    fps              | 14       |
|    time_elapsed     | 7791     |
|    total_timesteps  | 112070   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04933099 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.728      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 7528       |
|    fps              | 14         |
|    time_elapsed     | 7857       |
|    total_timesteps  | 113030     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000822   |
|    n_updates        | 28007      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.731    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7532     |
|    fps              | 14       |
|    time_elapsed     | 7862     |
|    total_timesteps  | 113090   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050876275 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.765       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 7596        |
|    fps              | 14          |
|    time_elapsed     | 7926        |
|    total_timesteps  | 114050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00288     |
|    n_updates        | 28262       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7600     |
|    fps              | 14       |
|    time_elapsed     | 7930     |
|    total_timesteps  | 114110   |
| train/              |          |
|    le



Eval num_timesteps=115000, episode_reward=0.75 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.75     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 115000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00241  |
|    n_updates        | 28499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.043378204 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.69        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 7660        |
|    fps              | 14          |
|    time_elapsed     | 7996        |
|    total_timesteps  | 115015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00266     |
|    n_updates        | 28503       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.689    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7664     |
|    fps              | 14       |
|    time_elapsed     | 8000     |
|    total_timesteps  | 115075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050673634 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.733       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 7728        |
|    fps              | 14          |
|    time_elapsed     | 8063        |
|    total_timesteps  | 116035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00162     |
|    n_updates        | 28758       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.762    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7732     |
|    fps              | 14       |
|    time_elapsed     | 8067     |
|    total_timesteps  | 116095   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.046488747 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.716       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 7796        |
|    fps              | 14          |
|    time_elapsed     | 8131        |
|    total_timesteps  | 117055      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00281     |
|    n_updates        | 29013       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.716    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7800     |
|    fps              | 14       |
|    time_elapsed     | 8135     |
|    total_timesteps  | 117115   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049859863 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.723       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 7860        |
|    fps              | 14          |
|    time_elapsed     | 8195        |
|    total_timesteps  | 118015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00457     |
|    n_updates        | 29253       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.723    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7864     |
|    fps              | 14       |
|    time_elapsed     | 8199     |
|    total_timesteps  | 118075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04926387 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.753      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 7928       |
|    fps              | 14         |
|    time_elapsed     | 8263       |
|    total_timesteps  | 119035     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00107    |
|    n_updates        | 29508      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.754    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 7932     |
|    fps              | 14       |
|    time_elapsed     | 8267     |
|    total_timesteps  | 119095   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=120000, episode_reward=0.79 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.786    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 120000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000233 |
|    n_updates        | 29749    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049487863 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.752       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 7996        |
|    fps              | 14          |
|    time_elapsed     | 8337        |
|    total_timesteps  | 120060      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.0142      |
|    n_updates        | 29764       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.738    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8000     |
|    fps              | 14       |
|    time_elapsed     | 8341     |
|    total_timesteps  | 120120   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.046462152 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.711       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 8060        |
|    fps              | 14          |
|    time_elapsed     | 8400        |
|    total_timesteps  | 121020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000178    |
|    n_updates        | 30004       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.71     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8064     |
|    fps              | 14       |
|    time_elapsed     | 8404     |
|    total_timesteps  | 121080   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051979452 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.757       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 8128        |
|    fps              | 14          |
|    time_elapsed     | 8467        |
|    total_timesteps  | 122040      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000128    |
|    n_updates        | 30259       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.758    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8132     |
|    fps              | 14       |
|    time_elapsed     | 8471     |
|    total_timesteps  | 122100   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.749    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8196     |
|    fps              | 14       |
|    time_elapsed     | 8538     |
|    total_timesteps  | 123060   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000163 |
|    n_updates        | 30514    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.749    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8200     |
|    fps              | 14       |
|    time_elapsed     | 8542     |
|    total_timesteps  | 123120   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00517  |
|    n_updates      

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04924108 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.738      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 8260       |
|    fps              | 14         |
|    time_elapsed     | 8602       |
|    total_timesteps  | 124020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000707   |
|    n_updates        | 30754      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.74     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8264     |
|    fps              | 14       |
|    time_elapsed     | 8606     |
|    total_timesteps  | 124080   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=125000, episode_reward=0.75 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.751    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 125000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000167 |
|    n_updates        | 30999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04687416 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.719      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 8328       |
|    fps              | 14         |
|    time_elapsed     | 8677       |
|    total_timesteps  | 125045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00025    |
|    n_updates        | 31011      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.719    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8332     |
|    fps              | 14       |
|    time_elapsed     | 8681     |
|    total_timesteps  | 125105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.044039074 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.683       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 8392        |
|    fps              | 14          |
|    time_elapsed     | 8744        |
|    total_timesteps  | 126005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000167    |
|    n_updates        | 31251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.682    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8396     |
|    fps              | 14       |
|    time_elapsed     | 8748     |
|    total_timesteps  | 126065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.046446413 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.702       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 8460        |
|    fps              | 14          |
|    time_elapsed     | 8816        |
|    total_timesteps  | 127025      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.0064      |
|    n_updates        | 31506       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.703    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8464     |
|    fps              | 14       |
|    time_elapsed     | 8820     |
|    total_timesteps  | 127085   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050287507 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.731       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 8528        |
|    fps              | 14          |
|    time_elapsed     | 8886        |
|    total_timesteps  | 128045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000302    |
|    n_updates        | 31761       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.716    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8532     |
|    fps              | 14       |
|    time_elapsed     | 8890     |
|    total_timesteps  | 128105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.040375613 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.666       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 8592        |
|    fps              | 14          |
|    time_elapsed     | 8951        |
|    total_timesteps  | 129005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000192    |
|    n_updates        | 32001       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.663    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8596     |
|    fps              | 14       |
|    time_elapsed     | 8955     |
|    total_timesteps  | 129065   |
| train/              |          |
|    le



Eval num_timesteps=130000, episode_reward=0.81 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.809    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 130000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000551 |
|    n_updates        | 32249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.042036284 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.598       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 8660        |
|    fps              | 14          |
|    time_elapsed     | 9024        |
|    total_timesteps  | 130030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000205    |
|    n_updates        | 32257       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.614    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8664     |
|    fps              | 14       |
|    time_elapsed     | 9028     |
|    total_timesteps  | 130090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05092093 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.755      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 8728       |
|    fps              | 14         |
|    time_elapsed     | 9092       |
|    total_timesteps  | 131050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000467   |
|    n_updates        | 32512      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.77     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8732     |
|    fps              | 14       |
|    time_elapsed     | 9096     |
|    total_timesteps  | 131110   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050904706 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.763       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 8792        |
|    fps              | 14          |
|    time_elapsed     | 9155        |
|    total_timesteps  | 132010      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000256    |
|    n_updates        | 32752       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.762    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8796     |
|    fps              | 14       |
|    time_elapsed     | 9159     |
|    total_timesteps  | 132070   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.048202332 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.734       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 8860        |
|    fps              | 14          |
|    time_elapsed     | 9225        |
|    total_timesteps  | 133030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.0005      |
|    n_updates        | 33007       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.733    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8864     |
|    fps              | 14       |
|    time_elapsed     | 9229     |
|    total_timesteps  | 133090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.047789928 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.738       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 8928        |
|    fps              | 14          |
|    time_elapsed     | 9295        |
|    total_timesteps  | 134050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000141    |
|    n_updates        | 33262       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.74     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8932     |
|    fps              | 14       |
|    time_elapsed     | 9299     |
|    total_timesteps  | 134110   |
| train/              |          |
|    le



Eval num_timesteps=135000, episode_reward=0.77 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.775    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 135000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000203 |
|    n_updates        | 33499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050241333 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.741       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 8992        |
|    fps              | 14          |
|    time_elapsed     | 9366        |
|    total_timesteps  | 135015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000117    |
|    n_updates        | 33503       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.744    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8996     |
|    fps              | 14       |
|    time_elapsed     | 9370     |
|    total_timesteps  | 135075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04967263 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.745      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 9060       |
|    fps              | 14         |
|    time_elapsed     | 9433       |
|    total_timesteps  | 136035     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000116   |
|    n_updates        | 33758      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.76     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9064     |
|    fps              | 14       |
|    time_elapsed     | 9437     |
|    total_timesteps  | 136095   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050193064 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.743       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 9128        |
|    fps              | 14          |
|    time_elapsed     | 9501        |
|    total_timesteps  | 137055      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000136    |
|    n_updates        | 34013       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.744    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9132     |
|    fps              | 14       |
|    time_elapsed     | 9505     |
|    total_timesteps  | 137115   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0484907 |
| rollout/            |           |
|    ep_len_mean      | 15        |
|    ep_rew_mean      | 0.725     |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 9192      |
|    fps              | 14        |
|    time_elapsed     | 9565      |
|    total_timesteps  | 138015    |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 0.000509  |
|    n_updates        | 34253     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.726    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9196     |
|    fps              | 14       |
|    time_elapsed     | 9569     |
|    total_timesteps  | 138075   |
| train/              |          |
|    learning_rate    | 0.0003   |
|   

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04826002 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.711      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 9260       |
|    fps              | 14         |
|    time_elapsed     | 9635       |
|    total_timesteps  | 139035     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000124   |
|    n_updates        | 34508      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.71     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9264     |
|    fps              | 14       |
|    time_elapsed     | 9639     |
|    total_timesteps  | 139095   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=140000, episode_reward=0.81 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.807    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 140000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00059  |
|    n_updates        | 34749    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050601143 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.751       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 9328        |
|    fps              | 14          |
|    time_elapsed     | 9709        |
|    total_timesteps  | 140060      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000276    |
|    n_updates        | 34764       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.751    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9332     |
|    fps              | 14       |
|    time_elapsed     | 9713     |
|    total_timesteps  | 140120   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050630298 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.771       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 9392        |
|    fps              | 14          |
|    time_elapsed     | 9775        |
|    total_timesteps  | 141020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 9.39e-05    |
|    n_updates        | 35004       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.77     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9396     |
|    fps              | 14       |
|    time_elapsed     | 9779     |
|    total_timesteps  | 141080   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.048412353 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.729       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 9460        |
|    fps              | 14          |
|    time_elapsed     | 9844        |
|    total_timesteps  | 142040      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 6.61e-05    |
|    n_updates        | 35259       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.744    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9464     |
|    fps              | 14       |
|    time_elapsed     | 9848     |
|    total_timesteps  | 142100   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.774    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9528     |
|    fps              | 14       |
|    time_elapsed     | 9912     |
|    total_timesteps  | 143060   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000462 |
|    n_updates        | 35514    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.787    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9532     |
|    fps              | 14       |
|    time_elapsed     | 9916     |
|    total_timesteps  | 143120   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000218 |
|    n_updates      

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05176866 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.767      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 9592       |
|    fps              | 14         |
|    time_elapsed     | 9978       |
|    total_timesteps  | 144020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000133   |
|    n_updates        | 35754      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.767    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9596     |
|    fps              | 14       |
|    time_elapsed     | 9982     |
|    total_timesteps  | 144080   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=145000, episode_reward=0.75 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.747    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 145000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000213 |
|    n_updates        | 35999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04930366 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.75       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 9660       |
|    fps              | 14         |
|    time_elapsed     | 10051      |
|    total_timesteps  | 145045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000146   |
|    n_updates        | 36011      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.752    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9664     |
|    fps              | 14       |
|    time_elapsed     | 10055    |
|    total_timesteps  | 145105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050091136 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.752       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 9724        |
|    fps              | 14          |
|    time_elapsed     | 10114       |
|    total_timesteps  | 146005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00119     |
|    n_updates        | 36251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.751    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9728     |
|    fps              | 14       |
|    time_elapsed     | 10118    |
|    total_timesteps  | 146065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04512162 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.708      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 9792       |
|    fps              | 14         |
|    time_elapsed     | 10180      |
|    total_timesteps  | 147025     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00123    |
|    n_updates        | 36506      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.705    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9796     |
|    fps              | 14       |
|    time_elapsed     | 10184    |
|    total_timesteps  | 147085   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.048027445 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.712       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 9860        |
|    fps              | 14          |
|    time_elapsed     | 10247       |
|    total_timesteps  | 148045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 6.93e-05    |
|    n_updates        | 36761       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.712    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9864     |
|    fps              | 14       |
|    time_elapsed     | 10251    |
|    total_timesteps  | 148105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051826466 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.777       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 9924        |
|    fps              | 14          |
|    time_elapsed     | 10310       |
|    total_timesteps  | 149005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00541     |
|    n_updates        | 37001       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.775    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9928     |
|    fps              | 14       |
|    time_elapsed     | 10314    |
|    total_timesteps  | 149065   |
| train/              |          |
|    le



Eval num_timesteps=150000, episode_reward=0.73 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.731    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 150000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000108 |
|    n_updates        | 37249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051902175 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.778       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 9992        |
|    fps              | 14          |
|    time_elapsed     | 10382       |
|    total_timesteps  | 150030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 9.39e-05    |
|    n_updates        | 37257       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.778    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 9996     |
|    fps              | 14       |
|    time_elapsed     | 10386    |
|    total_timesteps  | 150090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049638364 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.757       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 10060       |
|    fps              | 14          |
|    time_elapsed     | 10452       |
|    total_timesteps  | 151050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000117    |
|    n_updates        | 37512       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.757    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10064    |
|    fps              | 14       |
|    time_elapsed     | 10456    |
|    total_timesteps  | 151110   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.047238424 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.729       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 10124       |
|    fps              | 14          |
|    time_elapsed     | 10518       |
|    total_timesteps  | 152010      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000116    |
|    n_updates        | 37752       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.729    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10128    |
|    fps              | 14       |
|    time_elapsed     | 10523    |
|    total_timesteps  | 152070   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.046875075 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.728       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 10192       |
|    fps              | 14          |
|    time_elapsed     | 10593       |
|    total_timesteps  | 153030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000331    |
|    n_updates        | 38007       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.727    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10196    |
|    fps              | 14       |
|    time_elapsed     | 10597    |
|    total_timesteps  | 153090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050065313 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.714       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 10260       |
|    fps              | 14          |
|    time_elapsed     | 10665       |
|    total_timesteps  | 154050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000717    |
|    n_updates        | 38262       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.696    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10264    |
|    fps              | 14       |
|    time_elapsed     | 10669    |
|    total_timesteps  | 154110   |
| train/              |          |
|    le



Eval num_timesteps=155000, episode_reward=0.79 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.789    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 155000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000153 |
|    n_updates        | 38499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.042456016 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.671       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 10324       |
|    fps              | 14          |
|    time_elapsed     | 10739       |
|    total_timesteps  | 155015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000371    |
|    n_updates        | 38503       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.671    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10328    |
|    fps              | 14       |
|    time_elapsed     | 10743    |
|    total_timesteps  | 155075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04957442 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.73       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 10392      |
|    fps              | 14         |
|    time_elapsed     | 10809      |
|    total_timesteps  | 156035     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00288    |
|    n_updates        | 38758      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.731    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10396    |
|    fps              | 14       |
|    time_elapsed     | 10813    |
|    total_timesteps  | 156095   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050143838 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.74        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 10460       |
|    fps              | 14          |
|    time_elapsed     | 10878       |
|    total_timesteps  | 157055      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00013     |
|    n_updates        | 39013       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.739    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10464    |
|    fps              | 14       |
|    time_elapsed     | 10882    |
|    total_timesteps  | 157115   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051292088 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.771       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 10524       |
|    fps              | 14          |
|    time_elapsed     | 10944       |
|    total_timesteps  | 158015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00392     |
|    n_updates        | 39253       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.77     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10528    |
|    fps              | 14       |
|    time_elapsed     | 10948    |
|    total_timesteps  | 158075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051197577 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.773       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 10592       |
|    fps              | 14          |
|    time_elapsed     | 11013       |
|    total_timesteps  | 159035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000561    |
|    n_updates        | 39508       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.775    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10596    |
|    fps              | 14       |
|    time_elapsed     | 11018    |
|    total_timesteps  | 159095   |
| train/              |          |
|    le



Eval num_timesteps=160000, episode_reward=0.77 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.773    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 160000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00136  |
|    n_updates        | 39749    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052581284 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.782       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 10660       |
|    fps              | 14          |
|    time_elapsed     | 11090       |
|    total_timesteps  | 160060      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000124    |
|    n_updates        | 39764       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.782    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10664    |
|    fps              | 14       |
|    time_elapsed     | 11094    |
|    total_timesteps  | 160120   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04998326 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.764      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 10724      |
|    fps              | 14         |
|    time_elapsed     | 11155      |
|    total_timesteps  | 161020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00218    |
|    n_updates        | 40004      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10728    |
|    fps              | 14       |
|    time_elapsed     | 11159    |
|    total_timesteps  | 161080   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050790276 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.769       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 10792       |
|    fps              | 14          |
|    time_elapsed     | 11226       |
|    total_timesteps  | 162040      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.9e-05     |
|    n_updates        | 40259       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.768    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10796    |
|    fps              | 14       |
|    time_elapsed     | 11230    |
|    total_timesteps  | 162100   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.785    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10860    |
|    fps              | 14       |
|    time_elapsed     | 11296    |
|    total_timesteps  | 163060   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000151 |
|    n_updates        | 40514    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.784    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10864    |
|    fps              | 14       |
|    time_elapsed     | 11300    |
|    total_timesteps  | 163120   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 7.34e-05 |
|    n_updates      

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052484307 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.788       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 10924       |
|    fps              | 14          |
|    time_elapsed     | 11361       |
|    total_timesteps  | 164020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 6.13e-05    |
|    n_updates        | 40754       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.788    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10928    |
|    fps              | 14       |
|    time_elapsed     | 11366    |
|    total_timesteps  | 164080   |
| train/              |          |
|    le



Eval num_timesteps=165000, episode_reward=0.79 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.791    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 165000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000129 |
|    n_updates        | 40999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052667968 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.793       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 10992       |
|    fps              | 14          |
|    time_elapsed     | 11439       |
|    total_timesteps  | 165045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.98e-05    |
|    n_updates        | 41011       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.793    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 10996    |
|    fps              | 14       |
|    time_elapsed     | 11443    |
|    total_timesteps  | 165105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052995615 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.795       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 11056       |
|    fps              | 14          |
|    time_elapsed     | 11507       |
|    total_timesteps  | 166005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.78e-05    |
|    n_updates        | 41251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.794    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11060    |
|    fps              | 14       |
|    time_elapsed     | 11512    |
|    total_timesteps  | 166065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050349534 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.77        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 11124       |
|    fps              | 14          |
|    time_elapsed     | 11581       |
|    total_timesteps  | 167025      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.05e-05    |
|    n_updates        | 41506       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.77     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11128    |
|    fps              | 14       |
|    time_elapsed     | 11585    |
|    total_timesteps  | 167085   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.042737495 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.663       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 11192       |
|    fps              | 14          |
|    time_elapsed     | 11655       |
|    total_timesteps  | 168045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4e-05       |
|    n_updates        | 41761       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.663    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11196    |
|    fps              | 14       |
|    time_elapsed     | 11659    |
|    total_timesteps  | 168105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051894456 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.746       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 11256       |
|    fps              | 14          |
|    time_elapsed     | 11724       |
|    total_timesteps  | 169005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 7.79e-05    |
|    n_updates        | 42001       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.744    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11260    |
|    fps              | 14       |
|    time_elapsed     | 11728    |
|    total_timesteps  | 169065   |
| train/              |          |
|    le



Eval num_timesteps=170000, episode_reward=0.17 +/- 0.68
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.174    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 170000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000114 |
|    n_updates        | 42249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049837977 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.76        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 11324       |
|    fps              | 14          |
|    time_elapsed     | 11800       |
|    total_timesteps  | 170030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000421    |
|    n_updates        | 42257       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.759    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11328    |
|    fps              | 14       |
|    time_elapsed     | 11804    |
|    total_timesteps  | 170090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049931563 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.745       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 11392       |
|    fps              | 14          |
|    time_elapsed     | 11870       |
|    total_timesteps  | 171050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000117    |
|    n_updates        | 42512       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.746    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11396    |
|    fps              | 14       |
|    time_elapsed     | 11874    |
|    total_timesteps  | 171110   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052549094 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.785       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 11456       |
|    fps              | 14          |
|    time_elapsed     | 11937       |
|    total_timesteps  | 172010      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00132     |
|    n_updates        | 42752       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.785    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11460    |
|    fps              | 14       |
|    time_elapsed     | 11941    |
|    total_timesteps  | 172070   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05133235 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.778      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 11524      |
|    fps              | 14         |
|    time_elapsed     | 12011      |
|    total_timesteps  | 173030     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00199    |
|    n_updates        | 43007      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.78     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11528    |
|    fps              | 14       |
|    time_elapsed     | 12015    |
|    total_timesteps  | 173090   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053190712 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.799       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 11592       |
|    fps              | 14          |
|    time_elapsed     | 12084       |
|    total_timesteps  | 174050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000125    |
|    n_updates        | 43262       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.799    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11596    |
|    fps              | 14       |
|    time_elapsed     | 12088    |
|    total_timesteps  | 174110   |
| train/              |          |
|    le



Eval num_timesteps=175000, episode_reward=0.82 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.816    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 175000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000181 |
|    n_updates        | 43499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04824279 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.75       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 11656      |
|    fps              | 14         |
|    time_elapsed     | 12158      |
|    total_timesteps  | 175015     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000102   |
|    n_updates        | 43503      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.749    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11660    |
|    fps              | 14       |
|    time_elapsed     | 12163    |
|    total_timesteps  | 175075   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050105277 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.751       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 11724       |
|    fps              | 14          |
|    time_elapsed     | 12231       |
|    total_timesteps  | 176035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.97e-05    |
|    n_updates        | 43758       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.751    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11728    |
|    fps              | 14       |
|    time_elapsed     | 12236    |
|    total_timesteps  | 176095   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.048775293 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.748       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 11792       |
|    fps              | 14          |
|    time_elapsed     | 12301       |
|    total_timesteps  | 177055      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000375    |
|    n_updates        | 44013       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.747    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11796    |
|    fps              | 14       |
|    time_elapsed     | 12305    |
|    total_timesteps  | 177115   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052716736 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.759       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 11856       |
|    fps              | 14          |
|    time_elapsed     | 12367       |
|    total_timesteps  | 178015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.78e-05    |
|    n_updates        | 44253       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.773    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11860    |
|    fps              | 14       |
|    time_elapsed     | 12371    |
|    total_timesteps  | 178075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051958345 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.785       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 11924       |
|    fps              | 14          |
|    time_elapsed     | 12437       |
|    total_timesteps  | 179035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 7.24e-05    |
|    n_updates        | 44508       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.785    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11928    |
|    fps              | 14       |
|    time_elapsed     | 12441    |
|    total_timesteps  | 179095   |
| train/              |          |
|    le



Eval num_timesteps=180000, episode_reward=0.78 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.785    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 180000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.004    |
|    n_updates        | 44749    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050615713 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.766       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 11992       |
|    fps              | 14          |
|    time_elapsed     | 12512       |
|    total_timesteps  | 180060      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 6.58e-05    |
|    n_updates        | 44764       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.766    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 11996    |
|    fps              | 14       |
|    time_elapsed     | 12516    |
|    total_timesteps  | 180120   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049464237 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.746       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 12056       |
|    fps              | 14          |
|    time_elapsed     | 12578       |
|    total_timesteps  | 181020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000538    |
|    n_updates        | 45004       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.746    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12060    |
|    fps              | 14       |
|    time_elapsed     | 12582    |
|    total_timesteps  | 181080   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04834436 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.742      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 12124      |
|    fps              | 14         |
|    time_elapsed     | 12647      |
|    total_timesteps  | 182040     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00284    |
|    n_updates        | 45259      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.742    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12128    |
|    fps              | 14       |
|    time_elapsed     | 12652    |
|    total_timesteps  | 182100   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.762    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12192    |
|    fps              | 14       |
|    time_elapsed     | 12718    |
|    total_timesteps  | 183060   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.0001   |
|    n_updates        | 45514    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.763    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12196    |
|    fps              | 14       |
|    time_elapsed     | 12722    |
|    total_timesteps  | 183120   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 8.04e-05 |
|    n_updates      

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04863419 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.75       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 12256      |
|    fps              | 14         |
|    time_elapsed     | 12786      |
|    total_timesteps  | 184020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00303    |
|    n_updates        | 45754      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.748    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12260    |
|    fps              | 14       |
|    time_elapsed     | 12790    |
|    total_timesteps  | 184080   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=185000, episode_reward=0.81 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.814    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 185000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000291 |
|    n_updates        | 45999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05237658 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.783      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 12324      |
|    fps              | 14         |
|    time_elapsed     | 12863      |
|    total_timesteps  | 185045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000121   |
|    n_updates        | 46011      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.784    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12328    |
|    fps              | 14       |
|    time_elapsed     | 12867    |
|    total_timesteps  | 185105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052916087 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.796       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 12388       |
|    fps              | 14          |
|    time_elapsed     | 12931       |
|    total_timesteps  | 186005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00129     |
|    n_updates        | 46251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.796    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12392    |
|    fps              | 14       |
|    time_elapsed     | 12935    |
|    total_timesteps  | 186065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0524755 |
| rollout/            |           |
|    ep_len_mean      | 15        |
|    ep_rew_mean      | 0.775     |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 12456     |
|    fps              | 14        |
|    time_elapsed     | 13002     |
|    total_timesteps  | 187025    |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 0.00148   |
|    n_updates        | 46506     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.774    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12460    |
|    fps              | 14       |
|    time_elapsed     | 13006    |
|    total_timesteps  | 187085   |
| train/              |          |
|    learning_rate    | 0.0003   |
|   

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.046828225 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.715       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 12524       |
|    fps              | 14          |
|    time_elapsed     | 13076       |
|    total_timesteps  | 188045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 9.86e-05    |
|    n_updates        | 46761       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.702    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12528    |
|    fps              | 14       |
|    time_elapsed     | 13080    |
|    total_timesteps  | 188105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049187824 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.713       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 12588       |
|    fps              | 14          |
|    time_elapsed     | 13142       |
|    total_timesteps  | 189005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 7.42e-05    |
|    n_updates        | 47001       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.713    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12592    |
|    fps              | 14       |
|    time_elapsed     | 13146    |
|    total_timesteps  | 189065   |
| train/              |          |
|    le



Eval num_timesteps=190000, episode_reward=0.80 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.8      |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 190000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000335 |
|    n_updates        | 47249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05283166 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.79       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 12656      |
|    fps              | 14         |
|    time_elapsed     | 13217      |
|    total_timesteps  | 190030     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.0033     |
|    n_updates        | 47257      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.789    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12660    |
|    fps              | 14       |
|    time_elapsed     | 13221    |
|    total_timesteps  | 190090   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0496659 |
| rollout/            |           |
|    ep_len_mean      | 15.1      |
|    ep_rew_mean      | 0.764     |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 12724     |
|    fps              | 14        |
|    time_elapsed     | 13287     |
|    total_timesteps  | 191050    |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 4.83e-05  |
|    n_updates        | 47512     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.763    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12728    |
|    fps              | 14       |
|    time_elapsed     | 13292    |
|    total_timesteps  | 191110   |
| train/              |          |
|    learning_rate    | 0.0003   |
|   

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052684937 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.79        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 12788       |
|    fps              | 14          |
|    time_elapsed     | 13353       |
|    total_timesteps  | 192010      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.37e-05    |
|    n_updates        | 47752       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.789    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12792    |
|    fps              | 14       |
|    time_elapsed     | 13358    |
|    total_timesteps  | 192070   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05247789 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.788      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 12856      |
|    fps              | 14         |
|    time_elapsed     | 13424      |
|    total_timesteps  | 193030     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 8.13e-05   |
|    n_updates        | 48007      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.786    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12860    |
|    fps              | 14       |
|    time_elapsed     | 13428    |
|    total_timesteps  | 193090   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04970013 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.761      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 12924      |
|    fps              | 14         |
|    time_elapsed     | 13494      |
|    total_timesteps  | 194050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 8.41e-05   |
|    n_updates        | 48262      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.762    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12928    |
|    fps              | 14       |
|    time_elapsed     | 13498    |
|    total_timesteps  | 194110   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=195000, episode_reward=0.78 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.777    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 195000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 5.86e-05 |
|    n_updates        | 48499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05139715 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.781      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 12988      |
|    fps              | 14         |
|    time_elapsed     | 13565      |
|    total_timesteps  | 195015     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 3.24e-05   |
|    n_updates        | 48503      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.782    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 12992    |
|    fps              | 14       |
|    time_elapsed     | 13569    |
|    total_timesteps  | 195075   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049804486 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.754       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 13056       |
|    fps              | 14          |
|    time_elapsed     | 13635       |
|    total_timesteps  | 196035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.27e-05    |
|    n_updates        | 48758       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.753    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13060    |
|    fps              | 14       |
|    time_elapsed     | 13640    |
|    total_timesteps  | 196095   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05325026 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.78       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 13124      |
|    fps              | 14         |
|    time_elapsed     | 13707      |
|    total_timesteps  | 197055     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 7.73e-05   |
|    n_updates        | 49013      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.782    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13128    |
|    fps              | 14       |
|    time_elapsed     | 13711    |
|    total_timesteps  | 197115   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049759977 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.762       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 13188       |
|    fps              | 14          |
|    time_elapsed     | 13770       |
|    total_timesteps  | 198015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.64e-05    |
|    n_updates        | 49253       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.762    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13192    |
|    fps              | 14       |
|    time_elapsed     | 13774    |
|    total_timesteps  | 198075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05215779 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.77       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 13256      |
|    fps              | 14         |
|    time_elapsed     | 13838      |
|    total_timesteps  | 199035     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 7.96e-05   |
|    n_updates        | 49508      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.771    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13260    |
|    fps              | 14       |
|    time_elapsed     | 13841    |
|    total_timesteps  | 199095   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=200000, episode_reward=0.78 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.783    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 200000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000205 |
|    n_updates        | 49749    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052793663 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.785       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 13324       |
|    fps              | 14          |
|    time_elapsed     | 13910       |
|    total_timesteps  | 200060      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.0125      |
|    n_updates        | 49764       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.785    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13328    |
|    fps              | 14       |
|    time_elapsed     | 13914    |
|    total_timesteps  | 200120   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052680936 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.794       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 13388       |
|    fps              | 14          |
|    time_elapsed     | 13973       |
|    total_timesteps  | 201020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.73e-05    |
|    n_updates        | 50004       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.793    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13392    |
|    fps              | 14       |
|    time_elapsed     | 13977    |
|    total_timesteps  | 201080   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053238995 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.799       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 13456       |
|    fps              | 14          |
|    time_elapsed     | 14041       |
|    total_timesteps  | 202040      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000171    |
|    n_updates        | 50259       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.799    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13460    |
|    fps              | 14       |
|    time_elapsed     | 14045    |
|    total_timesteps  | 202100   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.765    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13524    |
|    fps              | 14       |
|    time_elapsed     | 14108    |
|    total_timesteps  | 203060   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 9.28e-05 |
|    n_updates        | 50514    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.764    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13528    |
|    fps              | 14       |
|    time_elapsed     | 14112    |
|    total_timesteps  | 203120   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 8.66e-05 |
|    n_updates      

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.047866248 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.743       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 13588       |
|    fps              | 14          |
|    time_elapsed     | 14171       |
|    total_timesteps  | 204020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.67e-05    |
|    n_updates        | 50754       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.744    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13592    |
|    fps              | 14       |
|    time_elapsed     | 14175    |
|    total_timesteps  | 204080   |
| train/              |          |
|    le



Eval num_timesteps=205000, episode_reward=0.78 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.776    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 205000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00168  |
|    n_updates        | 50999    |
----------------------------------
New best mean reward: 0.05325879529118538. Model saved to ./logs/DQN_lr_0.0003_gamma_0.99_20240826-200734/best_model_20240826-200734


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053258795 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.78        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 13656       |
|    fps              | 14          |
|    time_elapsed     | 14244       |
|    total_timesteps  | 205045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.1e-05     |
|    n_updates        | 51011       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.781    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13660    |
|    fps              | 14       |
|    time_elapsed     | 14248    |
|    total_timesteps  | 205105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052911643 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.797       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 13720       |
|    fps              | 14          |
|    time_elapsed     | 14307       |
|    total_timesteps  | 206005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 7.66e-05    |
|    n_updates        | 51251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.784    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13724    |
|    fps              | 14       |
|    time_elapsed     | 14311    |
|    total_timesteps  | 206065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05227147 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.787      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 13788      |
|    fps              | 14         |
|    time_elapsed     | 14374      |
|    total_timesteps  | 207025     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 4.44e-05   |
|    n_updates        | 51506      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.788    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13792    |
|    fps              | 14       |
|    time_elapsed     | 14378    |
|    total_timesteps  | 207085   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05069175 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.774      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 13856      |
|    fps              | 14         |
|    time_elapsed     | 14442      |
|    total_timesteps  | 208045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 8.7e-05    |
|    n_updates        | 51761      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.775    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13860    |
|    fps              | 14       |
|    time_elapsed     | 14446    |
|    total_timesteps  | 208105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.043727968 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.689       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 13920       |
|    fps              | 14          |
|    time_elapsed     | 14505       |
|    total_timesteps  | 209005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 6.34e-05    |
|    n_updates        | 52001       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.673    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13924    |
|    fps              | 14       |
|    time_elapsed     | 14509    |
|    total_timesteps  | 209065   |
| train/              |          |
|    le



Eval num_timesteps=210000, episode_reward=0.81 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.807    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 210000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000207 |
|    n_updates        | 52249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.036397986 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.598       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 13988       |
|    fps              | 14          |
|    time_elapsed     | 14577       |
|    total_timesteps  | 210030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 7.4e-05     |
|    n_updates        | 52257       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.598    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 13992    |
|    fps              | 14       |
|    time_elapsed     | 14581    |
|    total_timesteps  | 210090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.041838188 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.63        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 14056       |
|    fps              | 14          |
|    time_elapsed     | 14644       |
|    total_timesteps  | 211050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000105    |
|    n_updates        | 52512       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.645    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14060    |
|    fps              | 14       |
|    time_elapsed     | 14648    |
|    total_timesteps  | 211110   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.045668982 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.678       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 14120       |
|    fps              | 14          |
|    time_elapsed     | 14711       |
|    total_timesteps  | 212010      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000108    |
|    n_updates        | 52752       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.68     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14124    |
|    fps              | 14       |
|    time_elapsed     | 14716    |
|    total_timesteps  | 212070   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.048413184 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.715       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 14188       |
|    fps              | 14          |
|    time_elapsed     | 14786       |
|    total_timesteps  | 213030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00107     |
|    n_updates        | 53007       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.714    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14192    |
|    fps              | 14       |
|    time_elapsed     | 14791    |
|    total_timesteps  | 213090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.037936807 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.607       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 14256       |
|    fps              | 14          |
|    time_elapsed     | 14861       |
|    total_timesteps  | 214050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000413    |
|    n_updates        | 53262       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.605    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14260    |
|    fps              | 14       |
|    time_elapsed     | 14865    |
|    total_timesteps  | 214110   |
| train/              |          |
|    le



Eval num_timesteps=215000, episode_reward=0.46 +/- 0.57
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.462    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 215000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000215 |
|    n_updates        | 53499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.043362953 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.611       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 14320       |
|    fps              | 14          |
|    time_elapsed     | 14937       |
|    total_timesteps  | 215015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.0046      |
|    n_updates        | 53503       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.61     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14324    |
|    fps              | 14       |
|    time_elapsed     | 14942    |
|    total_timesteps  | 215075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04504551 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.695      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 14388      |
|    fps              | 14         |
|    time_elapsed     | 15013      |
|    total_timesteps  | 216035     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000603   |
|    n_updates        | 53758      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.694    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14392    |
|    fps              | 14       |
|    time_elapsed     | 15017    |
|    total_timesteps  | 216095   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050462082 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.765       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 14456       |
|    fps              | 14          |
|    time_elapsed     | 15092       |
|    total_timesteps  | 217055      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000911    |
|    n_updates        | 54013       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.765    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14460    |
|    fps              | 14       |
|    time_elapsed     | 15096    |
|    total_timesteps  | 217115   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05206935 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.763      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 14520      |
|    fps              | 14         |
|    time_elapsed     | 15164      |
|    total_timesteps  | 218015     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00151    |
|    n_updates        | 54253      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.763    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14524    |
|    fps              | 14       |
|    time_elapsed     | 15169    |
|    total_timesteps  | 218075   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050576806 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.767       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 14588       |
|    fps              | 14          |
|    time_elapsed     | 15240       |
|    total_timesteps  | 219035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00171     |
|    n_updates        | 54508       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.766    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14592    |
|    fps              | 14       |
|    time_elapsed     | 15245    |
|    total_timesteps  | 219095   |
| train/              |          |
|    le



Eval num_timesteps=220000, episode_reward=0.78 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.776    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 220000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000868 |
|    n_updates        | 54749    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052443296 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.787       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 14656       |
|    fps              | 14          |
|    time_elapsed     | 15323       |
|    total_timesteps  | 220060      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000503    |
|    n_updates        | 54764       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.788    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14660    |
|    fps              | 14       |
|    time_elapsed     | 15328    |
|    total_timesteps  | 220120   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050918233 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.773       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 14720       |
|    fps              | 14          |
|    time_elapsed     | 15393       |
|    total_timesteps  | 221020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000344    |
|    n_updates        | 55004       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.774    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14724    |
|    fps              | 14       |
|    time_elapsed     | 15397    |
|    total_timesteps  | 221080   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04697416 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.717      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 14788      |
|    fps              | 14         |
|    time_elapsed     | 15462      |
|    total_timesteps  | 222040     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000551   |
|    n_updates        | 55259      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.732    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14792    |
|    fps              | 14       |
|    time_elapsed     | 15466    |
|    total_timesteps  | 222100   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.75     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14856    |
|    fps              | 14       |
|    time_elapsed     | 15533    |
|    total_timesteps  | 223060   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00109  |
|    n_updates        | 55514    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.751    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14860    |
|    fps              | 14       |
|    time_elapsed     | 15537    |
|    total_timesteps  | 223120   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000117 |
|    n_updates      

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05228109 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.774      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 14920      |
|    fps              | 14         |
|    time_elapsed     | 15599      |
|    total_timesteps  | 224020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000216   |
|    n_updates        | 55754      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.774    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14924    |
|    fps              | 14       |
|    time_elapsed     | 15603    |
|    total_timesteps  | 224080   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=225000, episode_reward=0.80 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.799    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 225000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000243 |
|    n_updates        | 55999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05159867 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.778      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 14988      |
|    fps              | 14         |
|    time_elapsed     | 15675      |
|    total_timesteps  | 225045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000151   |
|    n_updates        | 56011      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.777    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 14992    |
|    fps              | 14       |
|    time_elapsed     | 15679    |
|    total_timesteps  | 225105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051573228 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.771       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 15052       |
|    fps              | 14          |
|    time_elapsed     | 15739       |
|    total_timesteps  | 226005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 7.34e-05    |
|    n_updates        | 56251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.786    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15056    |
|    fps              | 14       |
|    time_elapsed     | 15743    |
|    total_timesteps  | 226065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052900147 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.78        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 15120       |
|    fps              | 14          |
|    time_elapsed     | 15808       |
|    total_timesteps  | 227025      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000349    |
|    n_updates        | 56506       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.78     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15124    |
|    fps              | 14       |
|    time_elapsed     | 15812    |
|    total_timesteps  | 227085   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053679295 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.798       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 15188       |
|    fps              | 14          |
|    time_elapsed     | 15876       |
|    total_timesteps  | 228045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.14e-05    |
|    n_updates        | 56761       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.798    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15192    |
|    fps              | 14       |
|    time_elapsed     | 15880    |
|    total_timesteps  | 228105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05313044 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.799      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 15252      |
|    fps              | 14         |
|    time_elapsed     | 15942      |
|    total_timesteps  | 229005     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 3.52e-05   |
|    n_updates        | 57001      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.799    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15256    |
|    fps              | 14       |
|    time_elapsed     | 15946    |
|    total_timesteps  | 229065   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=230000, episode_reward=0.80 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.805    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 230000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 5.12e-05 |
|    n_updates        | 57249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053100795 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.798       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 15320       |
|    fps              | 14          |
|    time_elapsed     | 16016       |
|    total_timesteps  | 230030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.02e-05    |
|    n_updates        | 57257       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.798    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15324    |
|    fps              | 14       |
|    time_elapsed     | 16020    |
|    total_timesteps  | 230090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052817404 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.797       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 15388       |
|    fps              | 14          |
|    time_elapsed     | 16085       |
|    total_timesteps  | 231050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.22e-05    |
|    n_updates        | 57512       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.798    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15392    |
|    fps              | 14       |
|    time_elapsed     | 16089    |
|    total_timesteps  | 231110   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053535562 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.798       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 15452       |
|    fps              | 14          |
|    time_elapsed     | 16150       |
|    total_timesteps  | 232010      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.7e-05     |
|    n_updates        | 57752       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.8      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15456    |
|    fps              | 14       |
|    time_elapsed     | 16154    |
|    total_timesteps  | 232070   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053953502 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.806       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 15520       |
|    fps              | 14          |
|    time_elapsed     | 16218       |
|    total_timesteps  | 233030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.06e-05    |
|    n_updates        | 58007       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.806    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15524    |
|    fps              | 14       |
|    time_elapsed     | 16222    |
|    total_timesteps  | 233090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05356219 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.807      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 15588      |
|    fps              | 14         |
|    time_elapsed     | 16288      |
|    total_timesteps  | 234050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.94e-05   |
|    n_updates        | 58262      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.807    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15592    |
|    fps              | 14       |
|    time_elapsed     | 16292    |
|    total_timesteps  | 234110   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=235000, episode_reward=0.82 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.823    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 235000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 4.52e-05 |
|    n_updates        | 58499    |
----------------------------------
New best mean reward!
New best mean reward: 0.05407243221998215. Model saved to ./logs/DQN_lr_0.0003_gamma_0.99_20240826-200734/best_model_20240826-200734


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054072432 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.81        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 15652       |
|    fps              | 14          |
|    time_elapsed     | 16358       |
|    total_timesteps  | 235015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.71e-05    |
|    n_updates        | 58503       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.81     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15656    |
|    fps              | 14       |
|    time_elapsed     | 16362    |
|    total_timesteps  | 235075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053334735 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.806       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 15720       |
|    fps              | 14          |
|    time_elapsed     | 16429       |
|    total_timesteps  | 236035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.72e-05    |
|    n_updates        | 58758       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.806    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15724    |
|    fps              | 14       |
|    time_elapsed     | 16433    |
|    total_timesteps  | 236095   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05415819 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.81       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 15788      |
|    fps              | 14         |
|    time_elapsed     | 16499      |
|    total_timesteps  | 237055     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.19e-05   |
|    n_updates        | 59013      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.811    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15792    |
|    fps              | 14       |
|    time_elapsed     | 16504    |
|    total_timesteps  | 237115   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051067483 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.78        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 15852       |
|    fps              | 14          |
|    time_elapsed     | 16565       |
|    total_timesteps  | 238015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.39e-05    |
|    n_updates        | 59253       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.779    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15856    |
|    fps              | 14       |
|    time_elapsed     | 16569    |
|    total_timesteps  | 238075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052894693 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.785       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 15920       |
|    fps              | 14          |
|    time_elapsed     | 16634       |
|    total_timesteps  | 239035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.58e-05    |
|    n_updates        | 59508       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.785    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15924    |
|    fps              | 14       |
|    time_elapsed     | 16639    |
|    total_timesteps  | 239095   |
| train/              |          |
|    le



Eval num_timesteps=240000, episode_reward=0.74 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.738    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 240000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 5.9e-05  |
|    n_updates        | 59749    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049408395 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.758       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 15988       |
|    fps              | 14          |
|    time_elapsed     | 16714       |
|    total_timesteps  | 240060      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.69e-05    |
|    n_updates        | 59764       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.758    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 15992    |
|    fps              | 14       |
|    time_elapsed     | 16718    |
|    total_timesteps  | 240120   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05356136 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.792      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 16052      |
|    fps              | 14         |
|    time_elapsed     | 16782      |
|    total_timesteps  | 241020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.75e-05   |
|    n_updates        | 60004      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.792    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16056    |
|    fps              | 14       |
|    time_elapsed     | 16786    |
|    total_timesteps  | 241080   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049662605 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.764       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 16120       |
|    fps              | 14          |
|    time_elapsed     | 16854       |
|    total_timesteps  | 242040      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.3e-05     |
|    n_updates        | 60259       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.763    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16124    |
|    fps              | 14       |
|    time_elapsed     | 16858    |
|    total_timesteps  | 242100   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049855728 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.729       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 16184       |
|    fps              | 14          |
|    time_elapsed     | 16921       |
|    total_timesteps  | 243000      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 6.91e-05    |
|    n_updates        | 60499       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.729    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16188    |
|    fps              | 14       |
|    time_elapsed     | 16925    |
|    total_timesteps  | 243060   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052872535 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.796       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 16252       |
|    fps              | 14          |
|    time_elapsed     | 16992       |
|    total_timesteps  | 244020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000168    |
|    n_updates        | 60754       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.796    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16256    |
|    fps              | 14       |
|    time_elapsed     | 16997    |
|    total_timesteps  | 244080   |
| train/              |          |
|    le



Eval num_timesteps=245000, episode_reward=0.82 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.821    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 245000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00193  |
|    n_updates        | 60999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05093618 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.775      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 16320      |
|    fps              | 14         |
|    time_elapsed     | 17070      |
|    total_timesteps  | 245045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 6.32e-05   |
|    n_updates        | 61011      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.776    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16324    |
|    fps              | 14       |
|    time_elapsed     | 17074    |
|    total_timesteps  | 245105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053588703 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.807       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 16384       |
|    fps              | 14          |
|    time_elapsed     | 17137       |
|    total_timesteps  | 246005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 6.29e-05    |
|    n_updates        | 61251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.807    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16388    |
|    fps              | 14       |
|    time_elapsed     | 17141    |
|    total_timesteps  | 246065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052515063 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.794       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 16452       |
|    fps              | 14          |
|    time_elapsed     | 17207       |
|    total_timesteps  | 247025      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 6.52e-05    |
|    n_updates        | 61506       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.795    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16456    |
|    fps              | 14       |
|    time_elapsed     | 17212    |
|    total_timesteps  | 247085   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053629074 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.789       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 16520       |
|    fps              | 14          |
|    time_elapsed     | 17278       |
|    total_timesteps  | 248045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.09e-05    |
|    n_updates        | 61761       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.788    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16524    |
|    fps              | 14       |
|    time_elapsed     | 17282    |
|    total_timesteps  | 248105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0533473 |
| rollout/            |           |
|    ep_len_mean      | 15        |
|    ep_rew_mean      | 0.802     |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 16584     |
|    fps              | 14        |
|    time_elapsed     | 17345     |
|    total_timesteps  | 249005    |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 3.13e-05  |
|    n_updates        | 62001     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.803    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16588    |
|    fps              | 14       |
|    time_elapsed     | 17350    |
|    total_timesteps  | 249065   |
| train/              |          |
|    learning_rate    | 0.0003   |
|   



Eval num_timesteps=250000, episode_reward=0.81 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.809    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 250000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 5.15e-05 |
|    n_updates        | 62249    |
----------------------------------
New best mean reward: 0.054354485124349594. Model saved to ./logs/DQN_lr_0.0003_gamma_0.99_20240826-200734/best_model_20240826-200734


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054354485 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.813       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 16652       |
|    fps              | 14          |
|    time_elapsed     | 17422       |
|    total_timesteps  | 250030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.77e-05    |
|    n_updates        | 62257       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.814    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16656    |
|    fps              | 14       |
|    time_elapsed     | 17426    |
|    total_timesteps  | 250090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05405852 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.817      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 16720      |
|    fps              | 14         |
|    time_elapsed     | 17491      |
|    total_timesteps  | 251050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000121   |
|    n_updates        | 62512      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.816    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16724    |
|    fps              | 14       |
|    time_elapsed     | 17495    |
|    total_timesteps  | 251110   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054259166 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.812       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 16784       |
|    fps              | 14          |
|    time_elapsed     | 17558       |
|    total_timesteps  | 252010      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.2e-05     |
|    n_updates        | 62752       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.812    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16788    |
|    fps              | 14       |
|    time_elapsed     | 17562    |
|    total_timesteps  | 252070   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054199904 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.811       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 16852       |
|    fps              | 14          |
|    time_elapsed     | 17629       |
|    total_timesteps  | 253030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.1e-05     |
|    n_updates        | 63007       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.811    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16856    |
|    fps              | 14       |
|    time_elapsed     | 17634    |
|    total_timesteps  | 253090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054402508 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.819       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 16920       |
|    fps              | 14          |
|    time_elapsed     | 17704       |
|    total_timesteps  | 254050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.5e-05     |
|    n_updates        | 63262       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.819    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16924    |
|    fps              | 14       |
|    time_elapsed     | 17708    |
|    total_timesteps  | 254110   |
| train/              |          |
|    le



Eval num_timesteps=255000, episode_reward=0.81 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.812    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 255000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 3.34e-05 |
|    n_updates        | 63499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054288376 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.817       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 16984       |
|    fps              | 14          |
|    time_elapsed     | 17776       |
|    total_timesteps  | 255015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.48e-05    |
|    n_updates        | 63503       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.816    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 16988    |
|    fps              | 14       |
|    time_elapsed     | 17780    |
|    total_timesteps  | 255075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054376043 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.818       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 17052       |
|    fps              | 14          |
|    time_elapsed     | 17845       |
|    total_timesteps  | 256035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.46e-05    |
|    n_updates        | 63758       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.803    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17056    |
|    fps              | 14       |
|    time_elapsed     | 17849    |
|    total_timesteps  | 256095   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05016054 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.776      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 17120      |
|    fps              | 14         |
|    time_elapsed     | 17913      |
|    total_timesteps  | 257055     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 3.96e-05   |
|    n_updates        | 64013      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.775    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17124    |
|    fps              | 14       |
|    time_elapsed     | 17917    |
|    total_timesteps  | 257115   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05308261 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.772      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 17184      |
|    fps              | 14         |
|    time_elapsed     | 17978      |
|    total_timesteps  | 258015     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.64e-05   |
|    n_updates        | 64253      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.773    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17188    |
|    fps              | 14       |
|    time_elapsed     | 17982    |
|    total_timesteps  | 258075   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050793823 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.784       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 17252       |
|    fps              | 14          |
|    time_elapsed     | 18047       |
|    total_timesteps  | 259035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 9.73e-05    |
|    n_updates        | 64508       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.78     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17256    |
|    fps              | 14       |
|    time_elapsed     | 18051    |
|    total_timesteps  | 259095   |
| train/              |          |
|    le



Eval num_timesteps=260000, episode_reward=0.80 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.797    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 260000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 8.11e-05 |
|    n_updates        | 64749    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051790774 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.771       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 17320       |
|    fps              | 14          |
|    time_elapsed     | 18122       |
|    total_timesteps  | 260060      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 7.01e-05    |
|    n_updates        | 64764       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.768    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17324    |
|    fps              | 14       |
|    time_elapsed     | 18126    |
|    total_timesteps  | 260120   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05016475 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.755      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 17384      |
|    fps              | 14         |
|    time_elapsed     | 18188      |
|    total_timesteps  | 261020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.67e-05   |
|    n_updates        | 65004      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.726    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17388    |
|    fps              | 14       |
|    time_elapsed     | 18192    |
|    total_timesteps  | 261080   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.041871436 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.656       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 17452       |
|    fps              | 14          |
|    time_elapsed     | 18257       |
|    total_timesteps  | 262040      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000238    |
|    n_updates        | 65259       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.656    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17456    |
|    fps              | 14       |
|    time_elapsed     | 18262    |
|    total_timesteps  | 262100   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04440002 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.63       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 17516      |
|    fps              | 14         |
|    time_elapsed     | 18324      |
|    total_timesteps  | 263000     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000117   |
|    n_updates        | 65499      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.63     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17520    |
|    fps              | 14       |
|    time_elapsed     | 18328    |
|    total_timesteps  | 263060   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051422186 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.73        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 17584       |
|    fps              | 14          |
|    time_elapsed     | 18394       |
|    total_timesteps  | 264020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000349    |
|    n_updates        | 65754       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.745    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17588    |
|    fps              | 14       |
|    time_elapsed     | 18398    |
|    total_timesteps  | 264080   |
| train/              |          |
|    le



Eval num_timesteps=265000, episode_reward=0.81 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.806    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 265000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000115 |
|    n_updates        | 65999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05378284 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.804      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 17652      |
|    fps              | 14         |
|    time_elapsed     | 18467      |
|    total_timesteps  | 265045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 7.12e-05   |
|    n_updates        | 66011      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.804    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17656    |
|    fps              | 14       |
|    time_elapsed     | 18471    |
|    total_timesteps  | 265105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05151931 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.786      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 17716      |
|    fps              | 14         |
|    time_elapsed     | 18531      |
|    total_timesteps  | 266005     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 8.49e-05   |
|    n_updates        | 66251      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.786    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17720    |
|    fps              | 14       |
|    time_elapsed     | 18535    |
|    total_timesteps  | 266065   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04766827 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.742      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 17784      |
|    fps              | 14         |
|    time_elapsed     | 18600      |
|    total_timesteps  | 267025     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00019    |
|    n_updates        | 66506      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.741    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17788    |
|    fps              | 14       |
|    time_elapsed     | 18604    |
|    total_timesteps  | 267085   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05051998 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.711      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 17852      |
|    fps              | 14         |
|    time_elapsed     | 18667      |
|    total_timesteps  | 268045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000121   |
|    n_updates        | 66761      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.723    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17856    |
|    fps              | 14       |
|    time_elapsed     | 18671    |
|    total_timesteps  | 268105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05144091 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.758      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 17916      |
|    fps              | 14         |
|    time_elapsed     | 18732      |
|    total_timesteps  | 269005     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 6.1e-05    |
|    n_updates        | 67001      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.757    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17920    |
|    fps              | 14       |
|    time_elapsed     | 18736    |
|    total_timesteps  | 269065   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=270000, episode_reward=0.77 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.773    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 270000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 9.4e-05  |
|    n_updates        | 67249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05083046 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.769      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 17984      |
|    fps              | 14         |
|    time_elapsed     | 18805      |
|    total_timesteps  | 270030     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 8.16e-05   |
|    n_updates        | 67257      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.756    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17988    |
|    fps              | 14       |
|    time_elapsed     | 18809    |
|    total_timesteps  | 270090   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04626531 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.711      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 18052      |
|    fps              | 14         |
|    time_elapsed     | 18873      |
|    total_timesteps  | 271050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00011    |
|    n_updates        | 67512      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.711    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18056    |
|    fps              | 14       |
|    time_elapsed     | 18877    |
|    total_timesteps  | 271110   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049179807 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.741       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 18116       |
|    fps              | 14          |
|    time_elapsed     | 18937       |
|    total_timesteps  | 272010      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000153    |
|    n_updates        | 67752       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.757    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18120    |
|    fps              | 14       |
|    time_elapsed     | 18940    |
|    total_timesteps  | 272070   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05163502 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.762      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 18184      |
|    fps              | 14         |
|    time_elapsed     | 19005      |
|    total_timesteps  | 273030     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000104   |
|    n_updates        | 68007      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.763    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18188    |
|    fps              | 14       |
|    time_elapsed     | 19009    |
|    total_timesteps  | 273090   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05276029 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.765      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 18252      |
|    fps              | 14         |
|    time_elapsed     | 19073      |
|    total_timesteps  | 274050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 6.35e-05   |
|    n_updates        | 68262      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.764    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18256    |
|    fps              | 14       |
|    time_elapsed     | 19077    |
|    total_timesteps  | 274110   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=275000, episode_reward=0.83 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.827    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 275000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000665 |
|    n_updates        | 68499    |
----------------------------------
New best mean reward!


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051496826 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.781       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 18316       |
|    fps              | 14          |
|    time_elapsed     | 19143       |
|    total_timesteps  | 275015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000129    |
|    n_updates        | 68503       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.782    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18320    |
|    fps              | 14       |
|    time_elapsed     | 19147    |
|    total_timesteps  | 275075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052604724 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.794       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 18384       |
|    fps              | 14          |
|    time_elapsed     | 19211       |
|    total_timesteps  | 276035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000637    |
|    n_updates        | 68758       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.794    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18388    |
|    fps              | 14       |
|    time_elapsed     | 19215    |
|    total_timesteps  | 276095   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053538285 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.797       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 18452       |
|    fps              | 14          |
|    time_elapsed     | 19279       |
|    total_timesteps  | 277055      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000159    |
|    n_updates        | 69013       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.799    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18456    |
|    fps              | 14       |
|    time_elapsed     | 19283    |
|    total_timesteps  | 277115   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053569756 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.802       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 18516       |
|    fps              | 14          |
|    time_elapsed     | 19342       |
|    total_timesteps  | 278015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000134    |
|    n_updates        | 69253       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.801    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18520    |
|    fps              | 14       |
|    time_elapsed     | 19346    |
|    total_timesteps  | 278075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0538063 |
| rollout/            |           |
|    ep_len_mean      | 15        |
|    ep_rew_mean      | 0.808     |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 18584     |
|    fps              | 14        |
|    time_elapsed     | 19412     |
|    total_timesteps  | 279035    |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 6.4e-05   |
|    n_updates        | 69508     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.809    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18588    |
|    fps              | 14       |
|    time_elapsed     | 19416    |
|    total_timesteps  | 279095   |
| train/              |          |
|    learning_rate    | 0.0003   |
|   



Eval num_timesteps=280000, episode_reward=0.83 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.83     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 280000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 9.66e-05 |
|    n_updates        | 69749    |
----------------------------------
New best mean reward!


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05407008 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.81       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 18652      |
|    fps              | 14         |
|    time_elapsed     | 19486      |
|    total_timesteps  | 280060     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 5.85e-05   |
|    n_updates        | 69764      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.809    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18656    |
|    fps              | 14       |
|    time_elapsed     | 19490    |
|    total_timesteps  | 280120   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05369726 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.811      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 18716      |
|    fps              | 14         |
|    time_elapsed     | 19551      |
|    total_timesteps  | 281020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 8.19e-05   |
|    n_updates        | 70004      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.81     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18720    |
|    fps              | 14       |
|    time_elapsed     | 19555    |
|    total_timesteps  | 281080   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05428946 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.815      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 18784      |
|    fps              | 14         |
|    time_elapsed     | 19623      |
|    total_timesteps  | 282040     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 3.5e-05    |
|    n_updates        | 70259      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.813    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18788    |
|    fps              | 14       |
|    time_elapsed     | 19627    |
|    total_timesteps  | 282100   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054789826 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.819       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 18848       |
|    fps              | 14          |
|    time_elapsed     | 19690       |
|    total_timesteps  | 283000      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.44e-05    |
|    n_updates        | 70499       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.82     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18852    |
|    fps              | 14       |
|    time_elapsed     | 19694    |
|    total_timesteps  | 283060   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052517053 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.797       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 18916       |
|    fps              | 14          |
|    time_elapsed     | 19762       |
|    total_timesteps  | 284020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.1e-05     |
|    n_updates        | 70754       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.799    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18920    |
|    fps              | 14       |
|    time_elapsed     | 19766    |
|    total_timesteps  | 284080   |
| train/              |          |
|    le



Eval num_timesteps=285000, episode_reward=0.82 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.819    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 285000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 4.03e-05 |
|    n_updates        | 70999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05456457 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.802      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 18984      |
|    fps              | 14         |
|    time_elapsed     | 19842      |
|    total_timesteps  | 285045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.67e-05   |
|    n_updates        | 71011      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.804    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18988    |
|    fps              | 14       |
|    time_elapsed     | 19846    |
|    total_timesteps  | 285105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054070175 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.819       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 19048       |
|    fps              | 14          |
|    time_elapsed     | 19911       |
|    total_timesteps  | 286005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.34e-05    |
|    n_updates        | 71251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.818    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19052    |
|    fps              | 14       |
|    time_elapsed     | 19915    |
|    total_timesteps  | 286065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053206306 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.802       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 19116       |
|    fps              | 14          |
|    time_elapsed     | 19986       |
|    total_timesteps  | 287025      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.5e-05     |
|    n_updates        | 71506       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.802    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19120    |
|    fps              | 14       |
|    time_elapsed     | 19990    |
|    total_timesteps  | 287085   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04944772 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.753      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 19184      |
|    fps              | 14         |
|    time_elapsed     | 20062      |
|    total_timesteps  | 288045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00622    |
|    n_updates        | 71761      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.752    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19188    |
|    fps              | 14       |
|    time_elapsed     | 20066    |
|    total_timesteps  | 288105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.026149876 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.499       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 19248       |
|    fps              | 14          |
|    time_elapsed     | 20129       |
|    total_timesteps  | 289005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 6.67e-05    |
|    n_updates        | 72001       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.497    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19252    |
|    fps              | 14       |
|    time_elapsed     | 20133    |
|    total_timesteps  | 289065   |
| train/              |          |
|    le



Eval num_timesteps=290000, episode_reward=0.83 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.83     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 290000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000571 |
|    n_updates        | 72249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0498781 |
| rollout/            |           |
|    ep_len_mean      | 15.1      |
|    ep_rew_mean      | 0.586     |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 19316     |
|    fps              | 14        |
|    time_elapsed     | 20205     |
|    total_timesteps  | 290030    |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 0.000104  |
|    n_updates        | 72257     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.601    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19320    |
|    fps              | 14       |
|    time_elapsed     | 20210    |
|    total_timesteps  | 290090   |
| train/              |          |
|    learning_rate    | 0.0003   |
|   

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05207576 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.774      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 19384      |
|    fps              | 14         |
|    time_elapsed     | 20276      |
|    total_timesteps  | 291050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000172   |
|    n_updates        | 72512      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.774    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19388    |
|    fps              | 14       |
|    time_elapsed     | 20280    |
|    total_timesteps  | 291110   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05037454 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.771      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 19448      |
|    fps              | 14         |
|    time_elapsed     | 20343      |
|    total_timesteps  | 292010     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000599   |
|    n_updates        | 72752      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.773    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19452    |
|    fps              | 14       |
|    time_elapsed     | 20347    |
|    total_timesteps  | 292070   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054557484 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.778       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 19516       |
|    fps              | 14          |
|    time_elapsed     | 20414       |
|    total_timesteps  | 293030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.89e-05    |
|    n_updates        | 73007       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.78     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19520    |
|    fps              | 14       |
|    time_elapsed     | 20418    |
|    total_timesteps  | 293090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05408285 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.816      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 19584      |
|    fps              | 14         |
|    time_elapsed     | 20484      |
|    total_timesteps  | 294050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 5.3e-05    |
|    n_updates        | 73262      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.817    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19588    |
|    fps              | 14       |
|    time_elapsed     | 20489    |
|    total_timesteps  | 294110   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=295000, episode_reward=0.77 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.772    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 295000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000148 |
|    n_updates        | 73499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05381689 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.811      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 19648      |
|    fps              | 14         |
|    time_elapsed     | 20557      |
|    total_timesteps  | 295015     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 6.5e-05    |
|    n_updates        | 73503      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.81     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19652    |
|    fps              | 14       |
|    time_elapsed     | 20561    |
|    total_timesteps  | 295075   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049754623 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.768       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 19716       |
|    fps              | 14          |
|    time_elapsed     | 20628       |
|    total_timesteps  | 296035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 9.51e-05    |
|    n_updates        | 73758       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.768    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19720    |
|    fps              | 14       |
|    time_elapsed     | 20632    |
|    total_timesteps  | 296095   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05391468 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.772      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 19784      |
|    fps              | 14         |
|    time_elapsed     | 20699      |
|    total_timesteps  | 297055     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000788   |
|    n_updates        | 74013      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.772    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19788    |
|    fps              | 14       |
|    time_elapsed     | 20703    |
|    total_timesteps  | 297115   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053361755 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.802       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 19848       |
|    fps              | 14          |
|    time_elapsed     | 20765       |
|    total_timesteps  | 298015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000376    |
|    n_updates        | 74253       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.803    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19852    |
|    fps              | 14       |
|    time_elapsed     | 20770    |
|    total_timesteps  | 298075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05372806 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.804      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 19916      |
|    fps              | 14         |
|    time_elapsed     | 20834      |
|    total_timesteps  | 299035     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 8.17e-05   |
|    n_updates        | 74508      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.804    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19920    |
|    fps              | 14       |
|    time_elapsed     | 20838    |
|    total_timesteps  | 299095   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=300000, episode_reward=0.82 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.819    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 300000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000118 |
|    n_updates        | 74749    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05432705 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.812      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 19984      |
|    fps              | 14         |
|    time_elapsed     | 20908      |
|    total_timesteps  | 300060     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 7.42e-05   |
|    n_updates        | 74764      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.812    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19988    |
|    fps              | 14       |
|    time_elapsed     | 20912    |
|    total_timesteps  | 300120   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053501718 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.812       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 20048       |
|    fps              | 14          |
|    time_elapsed     | 20972       |
|    total_timesteps  | 301020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.72e-05    |
|    n_updates        | 75004       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.811    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20052    |
|    fps              | 14       |
|    time_elapsed     | 20976    |
|    total_timesteps  | 301080   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054230057 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.811       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 20116       |
|    fps              | 14          |
|    time_elapsed     | 21040       |
|    total_timesteps  | 302040      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.47e-05    |
|    n_updates        | 75259       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.811    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20120    |
|    fps              | 14       |
|    time_elapsed     | 21044    |
|    total_timesteps  | 302100   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05450584 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.816      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 20180      |
|    fps              | 14         |
|    time_elapsed     | 21104      |
|    total_timesteps  | 303000     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 3.37e-05   |
|    n_updates        | 75499      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.815    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20184    |
|    fps              | 14       |
|    time_elapsed     | 21108    |
|    total_timesteps  | 303060   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05397857 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.812      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 20248      |
|    fps              | 14         |
|    time_elapsed     | 21172      |
|    total_timesteps  | 304020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 4.7e-05    |
|    n_updates        | 75754      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.812    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20252    |
|    fps              | 14       |
|    time_elapsed     | 21176    |
|    total_timesteps  | 304080   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=305000, episode_reward=0.83 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.825    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 305000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 2.37e-05 |
|    n_updates        | 75999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054519754 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.819       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 20316       |
|    fps              | 14          |
|    time_elapsed     | 21245       |
|    total_timesteps  | 305045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.47e-05    |
|    n_updates        | 76011       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.818    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20320    |
|    fps              | 14       |
|    time_elapsed     | 21249    |
|    total_timesteps  | 305105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054050963 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.817       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 20380       |
|    fps              | 14          |
|    time_elapsed     | 21309       |
|    total_timesteps  | 306005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.37e-05    |
|    n_updates        | 76251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.818    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20384    |
|    fps              | 14       |
|    time_elapsed     | 21313    |
|    total_timesteps  | 306065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05444167 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.815      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 20448      |
|    fps              | 14         |
|    time_elapsed     | 21377      |
|    total_timesteps  | 307025     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.25e-05   |
|    n_updates        | 76506      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.815    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20452    |
|    fps              | 14       |
|    time_elapsed     | 21381    |
|    total_timesteps  | 307085   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05419734 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.814      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 20516      |
|    fps              | 14         |
|    time_elapsed     | 21445      |
|    total_timesteps  | 308045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.09e-05   |
|    n_updates        | 76761      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.813    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20520    |
|    fps              | 14       |
|    time_elapsed     | 21449    |
|    total_timesteps  | 308105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05381868 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.809      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 20580      |
|    fps              | 14         |
|    time_elapsed     | 21509      |
|    total_timesteps  | 309005     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 3.18e-05   |
|    n_updates        | 77001      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.81     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20584    |
|    fps              | 14       |
|    time_elapsed     | 21513    |
|    total_timesteps  | 309065   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=310000, episode_reward=0.84 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.837    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 310000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 2.48e-05 |
|    n_updates        | 77249    |
----------------------------------
New best mean reward!


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054689474 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.818       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 20648       |
|    fps              | 14          |
|    time_elapsed     | 21582       |
|    total_timesteps  | 310030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.25e-05    |
|    n_updates        | 77257       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.818    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20652    |
|    fps              | 14       |
|    time_elapsed     | 21586    |
|    total_timesteps  | 310090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054418825 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.82        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 20716       |
|    fps              | 14          |
|    time_elapsed     | 21651       |
|    total_timesteps  | 311050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.39e-05    |
|    n_updates        | 77512       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.82     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20720    |
|    fps              | 14       |
|    time_elapsed     | 21654    |
|    total_timesteps  | 311110   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054637145 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.818       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 20780       |
|    fps              | 14          |
|    time_elapsed     | 21714       |
|    total_timesteps  | 312010      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.22e-05    |
|    n_updates        | 77752       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.818    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20784    |
|    fps              | 14       |
|    time_elapsed     | 21718    |
|    total_timesteps  | 312070   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054268993 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.815       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 20848       |
|    fps              | 14          |
|    time_elapsed     | 21783       |
|    total_timesteps  | 313030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.32e-05    |
|    n_updates        | 78007       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.815    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20852    |
|    fps              | 14       |
|    time_elapsed     | 21787    |
|    total_timesteps  | 313090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054261602 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.816       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 20916       |
|    fps              | 14          |
|    time_elapsed     | 21851       |
|    total_timesteps  | 314050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.19e-05    |
|    n_updates        | 78262       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.817    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20920    |
|    fps              | 14       |
|    time_elapsed     | 21855    |
|    total_timesteps  | 314110   |
| train/              |          |
|    le



Eval num_timesteps=315000, episode_reward=0.83 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.83     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 315000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 1.84e-05 |
|    n_updates        | 78499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0547019 |
| rollout/            |           |
|    ep_len_mean      | 15.1      |
|    ep_rew_mean      | 0.82      |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 20980     |
|    fps              | 14        |
|    time_elapsed     | 21920     |
|    total_timesteps  | 315015    |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 1.95e-05  |
|    n_updates        | 78503     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.819    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20984    |
|    fps              | 14       |
|    time_elapsed     | 21924    |
|    total_timesteps  | 315075   |
| train/              |          |
|    learning_rate    | 0.0003   |
|   

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05391712 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.817      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 21048      |
|    fps              | 14         |
|    time_elapsed     | 21988      |
|    total_timesteps  | 316035     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.14e-05   |
|    n_updates        | 78758      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.818    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21052    |
|    fps              | 14       |
|    time_elapsed     | 21992    |
|    total_timesteps  | 316095   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05468471 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.816      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 21116      |
|    fps              | 14         |
|    time_elapsed     | 22056      |
|    total_timesteps  | 317055     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 3.11e-05   |
|    n_updates        | 79013      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.817    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21120    |
|    fps              | 14       |
|    time_elapsed     | 22060    |
|    total_timesteps  | 317115   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0543627 |
| rollout/            |           |
|    ep_len_mean      | 15        |
|    ep_rew_mean      | 0.814     |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 21180     |
|    fps              | 14        |
|    time_elapsed     | 22120     |
|    total_timesteps  | 318015    |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 1.88e-05  |
|    n_updates        | 79253     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.815    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21184    |
|    fps              | 14       |
|    time_elapsed     | 22124    |
|    total_timesteps  | 318075   |
| train/              |          |
|    learning_rate    | 0.0003   |
|   

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054480918 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.818       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 21248       |
|    fps              | 14          |
|    time_elapsed     | 22188       |
|    total_timesteps  | 319035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.01e-05    |
|    n_updates        | 79508       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.819    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21252    |
|    fps              | 14       |
|    time_elapsed     | 22192    |
|    total_timesteps  | 319095   |
| train/              |          |
|    le



Eval num_timesteps=320000, episode_reward=0.84 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.84     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 320000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 1.31e-05 |
|    n_updates        | 79749    |
----------------------------------
New best mean reward!


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05423947 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.816      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 21316      |
|    fps              | 14         |
|    time_elapsed     | 22261      |
|    total_timesteps  | 320060     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.22e-05   |
|    n_updates        | 79764      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.815    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21320    |
|    fps              | 14       |
|    time_elapsed     | 22265    |
|    total_timesteps  | 320120   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054439433 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.82        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 21380       |
|    fps              | 14          |
|    time_elapsed     | 22325       |
|    total_timesteps  | 321020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.79e-05    |
|    n_updates        | 80004       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.805    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21384    |
|    fps              | 14       |
|    time_elapsed     | 22329    |
|    total_timesteps  | 321080   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05256926 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.782      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 21448      |
|    fps              | 14         |
|    time_elapsed     | 22393      |
|    total_timesteps  | 322040     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.09e-05   |
|    n_updates        | 80259      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.782    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21452    |
|    fps              | 14       |
|    time_elapsed     | 22397    |
|    total_timesteps  | 322100   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.045270134 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.72        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 21512       |
|    fps              | 14          |
|    time_elapsed     | 22458       |
|    total_timesteps  | 323000      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00216     |
|    n_updates        | 80499       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.703    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21516    |
|    fps              | 14       |
|    time_elapsed     | 22462    |
|    total_timesteps  | 323060   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.027823979 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.488       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 21580       |
|    fps              | 14          |
|    time_elapsed     | 22525       |
|    total_timesteps  | 324020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000271    |
|    n_updates        | 80754       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.471    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21584    |
|    fps              | 14       |
|    time_elapsed     | 22529    |
|    total_timesteps  | 324080   |
| train/              |          |
|    le



Eval num_timesteps=325000, episode_reward=0.74 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.74     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 325000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00118  |
|    n_updates        | 80999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.039985675 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.508       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 21648       |
|    fps              | 14          |
|    time_elapsed     | 22599       |
|    total_timesteps  | 325045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000167    |
|    n_updates        | 81011       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.506    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21652    |
|    fps              | 14       |
|    time_elapsed     | 22603    |
|    total_timesteps  | 325105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.042343207 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.62        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 21712       |
|    fps              | 14          |
|    time_elapsed     | 22662       |
|    total_timesteps  | 326005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00042     |
|    n_updates        | 81251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.606    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21716    |
|    fps              | 14       |
|    time_elapsed     | 22666    |
|    total_timesteps  | 326065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04377043 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.623      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 21780      |
|    fps              | 14         |
|    time_elapsed     | 22730      |
|    total_timesteps  | 327025     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000125   |
|    n_updates        | 81506      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.61     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21784    |
|    fps              | 14       |
|    time_elapsed     | 22734    |
|    total_timesteps  | 327085   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051294073 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.753       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 21848       |
|    fps              | 14          |
|    time_elapsed     | 22798       |
|    total_timesteps  | 328045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.0131      |
|    n_updates        | 81761       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.74     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21852    |
|    fps              | 14       |
|    time_elapsed     | 22802    |
|    total_timesteps  | 328105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049947497 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.77        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 21912       |
|    fps              | 14          |
|    time_elapsed     | 22862       |
|    total_timesteps  | 329005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00314     |
|    n_updates        | 82001       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.769    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21916    |
|    fps              | 14       |
|    time_elapsed     | 22866    |
|    total_timesteps  | 329065   |
| train/              |          |
|    le



Eval num_timesteps=330000, episode_reward=0.77 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.767    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 330000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00079  |
|    n_updates        | 82249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053154554 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.798       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 21980       |
|    fps              | 14          |
|    time_elapsed     | 22935       |
|    total_timesteps  | 330030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000263    |
|    n_updates        | 82257       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.798    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21984    |
|    fps              | 14       |
|    time_elapsed     | 22939    |
|    total_timesteps  | 330090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052866463 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.799       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 22048       |
|    fps              | 14          |
|    time_elapsed     | 23003       |
|    total_timesteps  | 331050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000674    |
|    n_updates        | 82512       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.798    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22052    |
|    fps              | 14       |
|    time_elapsed     | 23007    |
|    total_timesteps  | 331110   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053471297 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.799       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 22112       |
|    fps              | 14          |
|    time_elapsed     | 23067       |
|    total_timesteps  | 332010      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000462    |
|    n_updates        | 82752       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.799    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22116    |
|    fps              | 14       |
|    time_elapsed     | 23071    |
|    total_timesteps  | 332070   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05343249 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.801      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 22180      |
|    fps              | 14         |
|    time_elapsed     | 23135      |
|    total_timesteps  | 333030     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000371   |
|    n_updates        | 83007      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.801    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22184    |
|    fps              | 14       |
|    time_elapsed     | 23139    |
|    total_timesteps  | 333090   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053462062 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.805       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 22248       |
|    fps              | 14          |
|    time_elapsed     | 23203       |
|    total_timesteps  | 334050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.55e-05    |
|    n_updates        | 83262       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.803    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22252    |
|    fps              | 14       |
|    time_elapsed     | 23207    |
|    total_timesteps  | 334110   |
| train/              |          |
|    le



Eval num_timesteps=335000, episode_reward=0.81 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.813    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 335000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 9.75e-05 |
|    n_updates        | 83499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053503864 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.805       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 22312       |
|    fps              | 14          |
|    time_elapsed     | 23272       |
|    total_timesteps  | 335015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 9.23e-05    |
|    n_updates        | 83503       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.805    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22316    |
|    fps              | 14       |
|    time_elapsed     | 23276    |
|    total_timesteps  | 335075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053913064 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.811       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 22380       |
|    fps              | 14          |
|    time_elapsed     | 23340       |
|    total_timesteps  | 336035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 7.37e-05    |
|    n_updates        | 83758       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.811    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22384    |
|    fps              | 14       |
|    time_elapsed     | 23344    |
|    total_timesteps  | 336095   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054282833 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.815       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 22448       |
|    fps              | 14          |
|    time_elapsed     | 23408       |
|    total_timesteps  | 337055      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.18e-05    |
|    n_updates        | 84013       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.815    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22452    |
|    fps              | 14       |
|    time_elapsed     | 23412    |
|    total_timesteps  | 337115   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0543732 |
| rollout/            |           |
|    ep_len_mean      | 15        |
|    ep_rew_mean      | 0.814     |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 22512     |
|    fps              | 14        |
|    time_elapsed     | 23472     |
|    total_timesteps  | 338015    |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 4.1e-05   |
|    n_updates        | 84253     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.814    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22516    |
|    fps              | 14       |
|    time_elapsed     | 23476    |
|    total_timesteps  | 338075   |
| train/              |          |
|    learning_rate    | 0.0003   |
|   

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05458507 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.818      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 22580      |
|    fps              | 14         |
|    time_elapsed     | 23540      |
|    total_timesteps  | 339035     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.12e-05   |
|    n_updates        | 84508      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.817    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22584    |
|    fps              | 14       |
|    time_elapsed     | 23544    |
|    total_timesteps  | 339095   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=340000, episode_reward=0.83 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.829    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 340000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 2.25e-05 |
|    n_updates        | 84749    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05453766 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.82       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 22648      |
|    fps              | 14         |
|    time_elapsed     | 23613      |
|    total_timesteps  | 340060     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 3.5e-05    |
|    n_updates        | 84764      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.82     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22652    |
|    fps              | 14       |
|    time_elapsed     | 23617    |
|    total_timesteps  | 340120   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053900514 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.815       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 22712       |
|    fps              | 14          |
|    time_elapsed     | 23677       |
|    total_timesteps  | 341020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.99e-05    |
|    n_updates        | 85004       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.814    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22716    |
|    fps              | 14       |
|    time_elapsed     | 23681    |
|    total_timesteps  | 341080   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05433722 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.812      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 22780      |
|    fps              | 14         |
|    time_elapsed     | 23745      |
|    total_timesteps  | 342040     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.4e-05    |
|    n_updates        | 85259      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.813    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22784    |
|    fps              | 14       |
|    time_elapsed     | 23749    |
|    total_timesteps  | 342100   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054045487 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.812       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 22844       |
|    fps              | 14          |
|    time_elapsed     | 23809       |
|    total_timesteps  | 343000      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.56e-05    |
|    n_updates        | 85499       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.813    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22848    |
|    fps              | 14       |
|    time_elapsed     | 23813    |
|    total_timesteps  | 343060   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05411702 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.812      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 22912      |
|    fps              | 14         |
|    time_elapsed     | 23877      |
|    total_timesteps  | 344020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.91e-05   |
|    n_updates        | 85754      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.813    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22916    |
|    fps              | 14       |
|    time_elapsed     | 23881    |
|    total_timesteps  | 344080   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=345000, episode_reward=0.83 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.828    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 345000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 1.78e-05 |
|    n_updates        | 85999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054063898 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.814       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 22980       |
|    fps              | 14          |
|    time_elapsed     | 23950       |
|    total_timesteps  | 345045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.09e-05    |
|    n_updates        | 86011       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.813    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22984    |
|    fps              | 14       |
|    time_elapsed     | 23954    |
|    total_timesteps  | 345105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054179396 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.815       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 23044       |
|    fps              | 14          |
|    time_elapsed     | 24014       |
|    total_timesteps  | 346005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.62e-05    |
|    n_updates        | 86251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.816    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23048    |
|    fps              | 14       |
|    time_elapsed     | 24018    |
|    total_timesteps  | 346065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05417689 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.814      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 23112      |
|    fps              | 14         |
|    time_elapsed     | 24082      |
|    total_timesteps  | 347025     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.9e-05    |
|    n_updates        | 86506      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.813    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23116    |
|    fps              | 14       |
|    time_elapsed     | 24086    |
|    total_timesteps  | 347085   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054158997 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.811       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 23180       |
|    fps              | 14          |
|    time_elapsed     | 24150       |
|    total_timesteps  | 348045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.29e-05    |
|    n_updates        | 86761       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.813    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23184    |
|    fps              | 14       |
|    time_elapsed     | 24154    |
|    total_timesteps  | 348105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05430362 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.815      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 23244      |
|    fps              | 14         |
|    time_elapsed     | 24214      |
|    total_timesteps  | 349005     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.48e-05   |
|    n_updates        | 87001      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.817    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23248    |
|    fps              | 14       |
|    time_elapsed     | 24218    |
|    total_timesteps  | 349065   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=350000, episode_reward=0.80 +/- 0.04
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.798    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 350000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 2.58e-05 |
|    n_updates        | 87249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054287985 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.814       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 23312       |
|    fps              | 14          |
|    time_elapsed     | 24287       |
|    total_timesteps  | 350030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.98e-05    |
|    n_updates        | 87257       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.815    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23316    |
|    fps              | 14       |
|    time_elapsed     | 24291    |
|    total_timesteps  | 350090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054352157 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.817       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 23380       |
|    fps              | 14          |
|    time_elapsed     | 24355       |
|    total_timesteps  | 351050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.98e-05    |
|    n_updates        | 87512       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.817    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23384    |
|    fps              | 14       |
|    time_elapsed     | 24359    |
|    total_timesteps  | 351110   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05438595 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.818      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 23444      |
|    fps              | 14         |
|    time_elapsed     | 24419      |
|    total_timesteps  | 352010     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.38e-05   |
|    n_updates        | 87752      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.803    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23448    |
|    fps              | 14       |
|    time_elapsed     | 24423    |
|    total_timesteps  | 352070   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.042748146 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.696       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 23512       |
|    fps              | 14          |
|    time_elapsed     | 24487       |
|    total_timesteps  | 353030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 8.04e-05    |
|    n_updates        | 88007       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.682    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23516    |
|    fps              | 14       |
|    time_elapsed     | 24491    |
|    total_timesteps  | 353090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.039040893 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.577       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 23580       |
|    fps              | 14          |
|    time_elapsed     | 24555       |
|    total_timesteps  | 354050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 6.93e-05    |
|    n_updates        | 88262       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.576    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23584    |
|    fps              | 14       |
|    time_elapsed     | 24559    |
|    total_timesteps  | 354110   |
| train/              |          |
|    le



Eval num_timesteps=355000, episode_reward=0.80 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.801    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 355000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.0069   |
|    n_updates        | 88499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.027101727 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.469       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 23644       |
|    fps              | 14          |
|    time_elapsed     | 24624       |
|    total_timesteps  | 355015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00618     |
|    n_updates        | 88503       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.468    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23648    |
|    fps              | 14       |
|    time_elapsed     | 24628    |
|    total_timesteps  | 355075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04785607 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.64       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 23712      |
|    fps              | 14         |
|    time_elapsed     | 24692      |
|    total_timesteps  | 356035     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00243    |
|    n_updates        | 88758      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.638    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23716    |
|    fps              | 14       |
|    time_elapsed     | 24696    |
|    total_timesteps  | 356095   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.035621803 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.568       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 23780       |
|    fps              | 14          |
|    time_elapsed     | 24760       |
|    total_timesteps  | 357055      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00339     |
|    n_updates        | 89013       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.596    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23784    |
|    fps              | 14       |
|    time_elapsed     | 24764    |
|    total_timesteps  | 357115   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050455943 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.662       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 23844       |
|    fps              | 14          |
|    time_elapsed     | 24824       |
|    total_timesteps  | 358015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000225    |
|    n_updates        | 89253       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.675    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23848    |
|    fps              | 14       |
|    time_elapsed     | 24828    |
|    total_timesteps  | 358075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05217799 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.784      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 23912      |
|    fps              | 14         |
|    time_elapsed     | 24892      |
|    total_timesteps  | 359035     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00147    |
|    n_updates        | 89508      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.785    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23916    |
|    fps              | 14       |
|    time_elapsed     | 24896    |
|    total_timesteps  | 359095   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=360000, episode_reward=0.73 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.735    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 360000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000138 |
|    n_updates        | 89749    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052212927 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.784       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 23980       |
|    fps              | 14          |
|    time_elapsed     | 24966       |
|    total_timesteps  | 360060      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000711    |
|    n_updates        | 89764       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.784    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23984    |
|    fps              | 14       |
|    time_elapsed     | 24970    |
|    total_timesteps  | 360120   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052236192 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.786       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 24044       |
|    fps              | 14          |
|    time_elapsed     | 25030       |
|    total_timesteps  | 361020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000292    |
|    n_updates        | 90004       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.786    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24048    |
|    fps              | 14       |
|    time_elapsed     | 25034    |
|    total_timesteps  | 361080   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05273706 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.791      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 24112      |
|    fps              | 14         |
|    time_elapsed     | 25098      |
|    total_timesteps  | 362040     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000309   |
|    n_updates        | 90259      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.792    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24116    |
|    fps              | 14       |
|    time_elapsed     | 25102    |
|    total_timesteps  | 362100   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053281162 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.796       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 24176       |
|    fps              | 14          |
|    time_elapsed     | 25162       |
|    total_timesteps  | 363000      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000755    |
|    n_updates        | 90499       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.796    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24180    |
|    fps              | 14       |
|    time_elapsed     | 25166    |
|    total_timesteps  | 363060   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05276046 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.796      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 24244      |
|    fps              | 14         |
|    time_elapsed     | 25230      |
|    total_timesteps  | 364020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 5.73e-05   |
|    n_updates        | 90754      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.796    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24248    |
|    fps              | 14       |
|    time_elapsed     | 25234    |
|    total_timesteps  | 364080   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=365000, episode_reward=0.81 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.808    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 365000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 5.38e-05 |
|    n_updates        | 90999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053097557 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.796       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 24312       |
|    fps              | 14          |
|    time_elapsed     | 25303       |
|    total_timesteps  | 365045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.42e-05    |
|    n_updates        | 91011       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.796    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24316    |
|    fps              | 14       |
|    time_elapsed     | 25307    |
|    total_timesteps  | 365105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053379197 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.805       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 24376       |
|    fps              | 14          |
|    time_elapsed     | 25367       |
|    total_timesteps  | 366005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.47e-05    |
|    n_updates        | 91251       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.804    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24380    |
|    fps              | 14       |
|    time_elapsed     | 25371    |
|    total_timesteps  | 366065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053628135 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.804       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 24444       |
|    fps              | 14          |
|    time_elapsed     | 25435       |
|    total_timesteps  | 367025      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.44e-05    |
|    n_updates        | 91506       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.803    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24448    |
|    fps              | 14       |
|    time_elapsed     | 25439    |
|    total_timesteps  | 367085   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05335475 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.802      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 24512      |
|    fps              | 14         |
|    time_elapsed     | 25503      |
|    total_timesteps  | 368045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.53e-05   |
|    n_updates        | 91761      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.802    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24516    |
|    fps              | 14       |
|    time_elapsed     | 25507    |
|    total_timesteps  | 368105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053458657 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.803       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 24576       |
|    fps              | 14          |
|    time_elapsed     | 25567       |
|    total_timesteps  | 369005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.56e-05    |
|    n_updates        | 92001       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.804    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24580    |
|    fps              | 14       |
|    time_elapsed     | 25571    |
|    total_timesteps  | 369065   |
| train/              |          |
|    le



Eval num_timesteps=370000, episode_reward=0.81 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.81     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 370000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 2.13e-05 |
|    n_updates        | 92249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05402161 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.807      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 24644      |
|    fps              | 14         |
|    time_elapsed     | 25640      |
|    total_timesteps  | 370030     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.11e-05   |
|    n_updates        | 92257      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.806    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24648    |
|    fps              | 14       |
|    time_elapsed     | 25644    |
|    total_timesteps  | 370090   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05333602 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.807      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 24712      |
|    fps              | 14         |
|    time_elapsed     | 25709      |
|    total_timesteps  | 371050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.48e-05   |
|    n_updates        | 92512      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.806    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24716    |
|    fps              | 14       |
|    time_elapsed     | 25713    |
|    total_timesteps  | 371110   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05443409 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.813      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 24776      |
|    fps              | 14         |
|    time_elapsed     | 25773      |
|    total_timesteps  | 372010     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.55e-05   |
|    n_updates        | 92752      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.811    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24780    |
|    fps              | 14       |
|    time_elapsed     | 25777    |
|    total_timesteps  | 372070   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054041658 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.811       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 24844       |
|    fps              | 14          |
|    time_elapsed     | 25841       |
|    total_timesteps  | 373030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.13e-05    |
|    n_updates        | 93007       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.811    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24848    |
|    fps              | 14       |
|    time_elapsed     | 25845    |
|    total_timesteps  | 373090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0542055 |
| rollout/            |           |
|    ep_len_mean      | 15        |
|    ep_rew_mean      | 0.816     |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 24912     |
|    fps              | 14        |
|    time_elapsed     | 25909     |
|    total_timesteps  | 374050    |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 2.35e-05  |
|    n_updates        | 93262     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.817    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24916    |
|    fps              | 14       |
|    time_elapsed     | 25913    |
|    total_timesteps  | 374110   |
| train/              |          |
|    learning_rate    | 0.0003   |
|   



Eval num_timesteps=375000, episode_reward=0.82 +/- 0.00
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.817    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 375000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 1.43e-05 |
|    n_updates        | 93499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05266979 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.8        |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 24976      |
|    fps              | 14         |
|    time_elapsed     | 25978      |
|    total_timesteps  | 375015     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.08e-05   |
|    n_updates        | 93503      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.8      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24980    |
|    fps              | 14       |
|    time_elapsed     | 25982    |
|    total_timesteps  | 375075   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05411216 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.816      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 25044      |
|    fps              | 14         |
|    time_elapsed     | 26047      |
|    total_timesteps  | 376035     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.02e-05   |
|    n_updates        | 93758      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.816    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25048    |
|    fps              | 14       |
|    time_elapsed     | 26051    |
|    total_timesteps  | 376095   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05478313 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.818      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 25112      |
|    fps              | 14         |
|    time_elapsed     | 26115      |
|    total_timesteps  | 377055     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.73e-05   |
|    n_updates        | 94013      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.818    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25116    |
|    fps              | 14       |
|    time_elapsed     | 26119    |
|    total_timesteps  | 377115   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054110177 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.815       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 25176       |
|    fps              | 14          |
|    time_elapsed     | 26179       |
|    total_timesteps  | 378015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.72e-05    |
|    n_updates        | 94253       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.815    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25180    |
|    fps              | 14       |
|    time_elapsed     | 26183    |
|    total_timesteps  | 378075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054243363 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.812       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 25244       |
|    fps              | 14          |
|    time_elapsed     | 26247       |
|    total_timesteps  | 379035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.82e-05    |
|    n_updates        | 94508       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.813    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25248    |
|    fps              | 14       |
|    time_elapsed     | 26251    |
|    total_timesteps  | 379095   |
| train/              |          |
|    le



Eval num_timesteps=380000, episode_reward=0.84 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.844    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 380000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 1.3e-05  |
|    n_updates        | 94749    |
----------------------------------
New best mean reward!


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05436152 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.816      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 25312      |
|    fps              | 14         |
|    time_elapsed     | 26320      |
|    total_timesteps  | 380060     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 8.78e-06   |
|    n_updates        | 94764      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.816    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25316    |
|    fps              | 14       |
|    time_elapsed     | 26324    |
|    total_timesteps  | 380120   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053743638 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.815       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 25376       |
|    fps              | 14          |
|    time_elapsed     | 26384       |
|    total_timesteps  | 381020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.76e-05    |
|    n_updates        | 95004       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.814    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25380    |
|    fps              | 14       |
|    time_elapsed     | 26388    |
|    total_timesteps  | 381080   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05474142 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.816      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 25444      |
|    fps              | 14         |
|    time_elapsed     | 26453      |
|    total_timesteps  | 382040     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.72e-05   |
|    n_updates        | 95259      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.816    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25448    |
|    fps              | 14       |
|    time_elapsed     | 26457    |
|    total_timesteps  | 382100   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051009957 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.782       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 25508       |
|    fps              | 14          |
|    time_elapsed     | 26517       |
|    total_timesteps  | 383000      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.68e-05    |
|    n_updates        | 95499       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.78     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25512    |
|    fps              | 14       |
|    time_elapsed     | 26521    |
|    total_timesteps  | 383060   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050473146 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.743       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 25576       |
|    fps              | 14          |
|    time_elapsed     | 26585       |
|    total_timesteps  | 384020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.16e-05    |
|    n_updates        | 95754       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.73     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25580    |
|    fps              | 14       |
|    time_elapsed     | 26589    |
|    total_timesteps  | 384080   |
| train/              |          |
|    le



Eval num_timesteps=385000, episode_reward=0.79 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.786    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 385000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 8.48e-05 |
|    n_updates        | 95999    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.03809715 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.619      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 25644      |
|    fps              | 14         |
|    time_elapsed     | 26658      |
|    total_timesteps  | 385045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 6.36e-05   |
|    n_updates        | 96011      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.603    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25648    |
|    fps              | 14       |
|    time_elapsed     | 26662    |
|    total_timesteps  | 385105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0388311 |
| rollout/            |           |
|    ep_len_mean      | 15.1      |
|    ep_rew_mean      | 0.547     |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 25708     |
|    fps              | 14        |
|    time_elapsed     | 26723     |
|    total_timesteps  | 386005    |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 0.000148  |
|    n_updates        | 96251     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.548    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25712    |
|    fps              | 14       |
|    time_elapsed     | 26727    |
|    total_timesteps  | 386065   |
| train/              |          |
|    learning_rate    | 0.0003   |
|   

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.028608786 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.465       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 25776       |
|    fps              | 14          |
|    time_elapsed     | 26791       |
|    total_timesteps  | 387025      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000294    |
|    n_updates        | 96506       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.467    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25780    |
|    fps              | 14       |
|    time_elapsed     | 26795    |
|    total_timesteps  | 387085   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049608603 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.707       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 25844       |
|    fps              | 14          |
|    time_elapsed     | 26859       |
|    total_timesteps  | 388045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00363     |
|    n_updates        | 96761       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.72     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25848    |
|    fps              | 14       |
|    time_elapsed     | 26863    |
|    total_timesteps  | 388105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.049202498 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.752       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 25908       |
|    fps              | 14          |
|    time_elapsed     | 26924       |
|    total_timesteps  | 389005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 6.42e-05    |
|    n_updates        | 97001       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.753    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25912    |
|    fps              | 14       |
|    time_elapsed     | 26928    |
|    total_timesteps  | 389065   |
| train/              |          |
|    le



Eval num_timesteps=390000, episode_reward=0.82 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.816    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 390000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00178  |
|    n_updates        | 97249    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053303577 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.775       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 25976       |
|    fps              | 14          |
|    time_elapsed     | 26998       |
|    total_timesteps  | 390030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 8.67e-05    |
|    n_updates        | 97257       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.774    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25980    |
|    fps              | 14       |
|    time_elapsed     | 27002    |
|    total_timesteps  | 390090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05284909 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.796      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 26044      |
|    fps              | 14         |
|    time_elapsed     | 27066      |
|    total_timesteps  | 391050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00118    |
|    n_updates        | 97512      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.795    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26048    |
|    fps              | 14       |
|    time_elapsed     | 27070    |
|    total_timesteps  | 391110   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053230204 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.798       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 26108       |
|    fps              | 14          |
|    time_elapsed     | 27131       |
|    total_timesteps  | 392010      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000458    |
|    n_updates        | 97752       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.798    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26112    |
|    fps              | 14       |
|    time_elapsed     | 27135    |
|    total_timesteps  | 392070   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053203333 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.798       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 26176       |
|    fps              | 14          |
|    time_elapsed     | 27199       |
|    total_timesteps  | 393030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000143    |
|    n_updates        | 98007       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.798    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26180    |
|    fps              | 14       |
|    time_elapsed     | 27203    |
|    total_timesteps  | 393090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0532806 |
| rollout/            |           |
|    ep_len_mean      | 15        |
|    ep_rew_mean      | 0.803     |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 26244     |
|    fps              | 14        |
|    time_elapsed     | 27268     |
|    total_timesteps  | 394050    |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 0.000285  |
|    n_updates        | 98262     |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.802    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26248    |
|    fps              | 14       |
|    time_elapsed     | 27272    |
|    total_timesteps  | 394110   |
| train/              |          |
|    learning_rate    | 0.0003   |
|   



Eval num_timesteps=395000, episode_reward=0.79 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.794    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 395000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000267 |
|    n_updates        | 98499    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05340435 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.801      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 26308      |
|    fps              | 14         |
|    time_elapsed     | 27338      |
|    total_timesteps  | 395015     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000254   |
|    n_updates        | 98503      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.801    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26312    |
|    fps              | 14       |
|    time_elapsed     | 27342    |
|    total_timesteps  | 395075   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053075127 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.803       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 26376       |
|    fps              | 14          |
|    time_elapsed     | 27407       |
|    total_timesteps  | 396035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.76e-05    |
|    n_updates        | 98758       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.803    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26380    |
|    fps              | 14       |
|    time_elapsed     | 27411    |
|    total_timesteps  | 396095   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05396206 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.808      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 26444      |
|    fps              | 14         |
|    time_elapsed     | 27475      |
|    total_timesteps  | 397055     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 4.45e-05   |
|    n_updates        | 99013      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.809    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26448    |
|    fps              | 14       |
|    time_elapsed     | 27479    |
|    total_timesteps  | 397115   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05268847 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.795      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 26508      |
|    fps              | 14         |
|    time_elapsed     | 27539      |
|    total_timesteps  | 398015     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.62e-05   |
|    n_updates        | 99253      |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.794    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26512    |
|    fps              | 14       |
|    time_elapsed     | 27543    |
|    total_timesteps  | 398075   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054010835 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.812       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 26576       |
|    fps              | 14          |
|    time_elapsed     | 27608       |
|    total_timesteps  | 399035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.45e-05    |
|    n_updates        | 99508       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.813    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26580    |
|    fps              | 14       |
|    time_elapsed     | 27612    |
|    total_timesteps  | 399095   |
| train/              |          |
|    le



Eval num_timesteps=400000, episode_reward=0.82 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.824    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 400000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 2.02e-05 |
|    n_updates        | 99749    |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054622546 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.82        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 26644       |
|    fps              | 14          |
|    time_elapsed     | 27682       |
|    total_timesteps  | 400060      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.78e-05    |
|    n_updates        | 99764       |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.82     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26648    |
|    fps              | 14       |
|    time_elapsed     | 27686    |
|    total_timesteps  | 400120   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054460373 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.82        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 26708       |
|    fps              | 14          |
|    time_elapsed     | 27746       |
|    total_timesteps  | 401020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000113    |
|    n_updates        | 100004      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.82     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26712    |
|    fps              | 14       |
|    time_elapsed     | 27750    |
|    total_timesteps  | 401080   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054442078 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.817       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 26776       |
|    fps              | 14          |
|    time_elapsed     | 27815       |
|    total_timesteps  | 402040      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.21e-05    |
|    n_updates        | 100259      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.817    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26780    |
|    fps              | 14       |
|    time_elapsed     | 27819    |
|    total_timesteps  | 402100   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053228594 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.803       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 26840       |
|    fps              | 14          |
|    time_elapsed     | 27880       |
|    total_timesteps  | 403000      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.13e-05    |
|    n_updates        | 100499      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.802    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26844    |
|    fps              | 14       |
|    time_elapsed     | 27884    |
|    total_timesteps  | 403060   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053951364 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.799       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 26908       |
|    fps              | 14          |
|    time_elapsed     | 27948       |
|    total_timesteps  | 404020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.84e-05    |
|    n_updates        | 100754      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.799    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26912    |
|    fps              | 14       |
|    time_elapsed     | 27952    |
|    total_timesteps  | 404080   |
| train/              |          |
|    le



Eval num_timesteps=405000, episode_reward=0.84 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.837    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 405000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 1.85e-05 |
|    n_updates        | 100999   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054435898 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.817       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 26976       |
|    fps              | 14          |
|    time_elapsed     | 28022       |
|    total_timesteps  | 405045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.11e-05    |
|    n_updates        | 101011      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.818    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26980    |
|    fps              | 14       |
|    time_elapsed     | 28026    |
|    total_timesteps  | 405105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052833702 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.802       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 27040       |
|    fps              | 14          |
|    time_elapsed     | 28087       |
|    total_timesteps  | 406005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.11e-05    |
|    n_updates        | 101251      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.803    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27044    |
|    fps              | 14       |
|    time_elapsed     | 28091    |
|    total_timesteps  | 406065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054538354 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.817       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 27108       |
|    fps              | 14          |
|    time_elapsed     | 28155       |
|    total_timesteps  | 407025      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000114    |
|    n_updates        | 101506      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.817    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27112    |
|    fps              | 14       |
|    time_elapsed     | 28159    |
|    total_timesteps  | 407085   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053308055 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.804       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 27176       |
|    fps              | 14          |
|    time_elapsed     | 28224       |
|    total_timesteps  | 408045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.41e-05    |
|    n_updates        | 101761      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.804    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27180    |
|    fps              | 14       |
|    time_elapsed     | 28228    |
|    total_timesteps  | 408105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054377813 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.803       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 27240       |
|    fps              | 14          |
|    time_elapsed     | 28288       |
|    total_timesteps  | 409005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.01e-05    |
|    n_updates        | 102001      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.804    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27244    |
|    fps              | 14       |
|    time_elapsed     | 28292    |
|    total_timesteps  | 409065   |
| train/              |          |
|    le



Eval num_timesteps=410000, episode_reward=0.84 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.839    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 410000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 1.84e-05 |
|    n_updates        | 102249   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054584328 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.821       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 27308       |
|    fps              | 14          |
|    time_elapsed     | 28362       |
|    total_timesteps  | 410030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.82e-05    |
|    n_updates        | 102257      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.821    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27312    |
|    fps              | 14       |
|    time_elapsed     | 28366    |
|    total_timesteps  | 410090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054504007 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.821       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 27376       |
|    fps              | 14          |
|    time_elapsed     | 28431       |
|    total_timesteps  | 411050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00011     |
|    n_updates        | 102512      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.82     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27380    |
|    fps              | 14       |
|    time_elapsed     | 28435    |
|    total_timesteps  | 411110   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05487064 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.823      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 27440      |
|    fps              | 14         |
|    time_elapsed     | 28496      |
|    total_timesteps  | 412010     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.41e-05   |
|    n_updates        | 102752     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.822    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27444    |
|    fps              | 14       |
|    time_elapsed     | 28500    |
|    total_timesteps  | 412070   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05468069 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.823      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 27508      |
|    fps              | 14         |
|    time_elapsed     | 28565      |
|    total_timesteps  | 413030     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.51e-05   |
|    n_updates        | 103007     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.821    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27512    |
|    fps              | 14       |
|    time_elapsed     | 28569    |
|    total_timesteps  | 413090   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05415976 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.816      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 27576      |
|    fps              | 14         |
|    time_elapsed     | 28633      |
|    total_timesteps  | 414050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.55e-05   |
|    n_updates        | 103262     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.817    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27580    |
|    fps              | 14       |
|    time_elapsed     | 28637    |
|    total_timesteps  | 414110   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=415000, episode_reward=0.82 +/- 0.00
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.819    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 415000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 1.66e-05 |
|    n_updates        | 103499   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054835975 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.821       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 27640       |
|    fps              | 14          |
|    time_elapsed     | 28703       |
|    total_timesteps  | 415015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.93e-05    |
|    n_updates        | 103503      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.819    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27644    |
|    fps              | 14       |
|    time_elapsed     | 28707    |
|    total_timesteps  | 415075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054358173 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.821       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 27708       |
|    fps              | 14          |
|    time_elapsed     | 28771       |
|    total_timesteps  | 416035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.71e-05    |
|    n_updates        | 103758      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.82     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27712    |
|    fps              | 14       |
|    time_elapsed     | 28775    |
|    total_timesteps  | 416095   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052037314 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.778       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 27776       |
|    fps              | 14          |
|    time_elapsed     | 28839       |
|    total_timesteps  | 417055      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.78e-05    |
|    n_updates        | 104013      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.777    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27780    |
|    fps              | 14       |
|    time_elapsed     | 28843    |
|    total_timesteps  | 417115   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051206592 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.758       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 27840       |
|    fps              | 14          |
|    time_elapsed     | 28904       |
|    total_timesteps  | 418015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.76e-05    |
|    n_updates        | 104253      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.757    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27844    |
|    fps              | 14       |
|    time_elapsed     | 28908    |
|    total_timesteps  | 418075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050068878 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.772       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 27908       |
|    fps              | 14          |
|    time_elapsed     | 28972       |
|    total_timesteps  | 419035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.15e-05    |
|    n_updates        | 104508      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.772    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27912    |
|    fps              | 14       |
|    time_elapsed     | 28976    |
|    total_timesteps  | 419095   |
| train/              |          |
|    le



Eval num_timesteps=420000, episode_reward=0.76 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.757    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 420000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 7.56e-05 |
|    n_updates        | 104749   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.044467863 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.658       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 27976       |
|    fps              | 14          |
|    time_elapsed     | 29046       |
|    total_timesteps  | 420060      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 6.95e-05    |
|    n_updates        | 104764      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.654    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27980    |
|    fps              | 14       |
|    time_elapsed     | 29051    |
|    total_timesteps  | 420120   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04291945 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.663      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 28040      |
|    fps              | 14         |
|    time_elapsed     | 29111      |
|    total_timesteps  | 421020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000425   |
|    n_updates        | 105004     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.676    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28044    |
|    fps              | 14       |
|    time_elapsed     | 29115    |
|    total_timesteps  | 421080   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.042886946 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.659       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 28108       |
|    fps              | 14          |
|    time_elapsed     | 29180       |
|    total_timesteps  | 422040      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000459    |
|    n_updates        | 105259      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.657    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28112    |
|    fps              | 14       |
|    time_elapsed     | 29184    |
|    total_timesteps  | 422100   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04846893 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.668      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 28172      |
|    fps              | 14         |
|    time_elapsed     | 29244      |
|    total_timesteps  | 423000     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000206   |
|    n_updates        | 105499     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.669    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28176    |
|    fps              | 14       |
|    time_elapsed     | 29248    |
|    total_timesteps  | 423060   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.041748572 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.66        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 28240       |
|    fps              | 14          |
|    time_elapsed     | 29313       |
|    total_timesteps  | 424020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000132    |
|    n_updates        | 105754      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.676    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28244    |
|    fps              | 14       |
|    time_elapsed     | 29317    |
|    total_timesteps  | 424080   |
| train/              |          |
|    le



Eval num_timesteps=425000, episode_reward=0.79 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.794    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 425000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 6.58e-05 |
|    n_updates        | 105999   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.04741394 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.693      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 28308      |
|    fps              | 14         |
|    time_elapsed     | 29386      |
|    total_timesteps  | 425045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00511    |
|    n_updates        | 106011     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.697    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28312    |
|    fps              | 14       |
|    time_elapsed     | 29391    |
|    total_timesteps  | 425105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.045852683 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.708       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 28372       |
|    fps              | 14          |
|    time_elapsed     | 29451       |
|    total_timesteps  | 426005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000191    |
|    n_updates        | 106251      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.708    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28376    |
|    fps              | 14       |
|    time_elapsed     | 29455    |
|    total_timesteps  | 426065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05197879 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.749      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 28440      |
|    fps              | 14         |
|    time_elapsed     | 29520      |
|    total_timesteps  | 427025     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000141   |
|    n_updates        | 106506     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.765    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28444    |
|    fps              | 14       |
|    time_elapsed     | 29524    |
|    total_timesteps  | 427085   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052312925 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.792       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 28508       |
|    fps              | 14          |
|    time_elapsed     | 29588       |
|    total_timesteps  | 428045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000599    |
|    n_updates        | 106761      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.794    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28512    |
|    fps              | 14       |
|    time_elapsed     | 29592    |
|    total_timesteps  | 428105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052155554 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.778       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 28572       |
|    fps              | 14          |
|    time_elapsed     | 29653       |
|    total_timesteps  | 429005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000433    |
|    n_updates        | 107001      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.778    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28576    |
|    fps              | 14       |
|    time_elapsed     | 29657    |
|    total_timesteps  | 429065   |
| train/              |          |
|    le



Eval num_timesteps=430000, episode_reward=0.82 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.82     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 430000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000191 |
|    n_updates        | 107249   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0536869 |
| rollout/            |           |
|    ep_len_mean      | 15.1      |
|    ep_rew_mean      | 0.792     |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 28640     |
|    fps              | 14        |
|    time_elapsed     | 29727     |
|    total_timesteps  | 430030    |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 0.000451  |
|    n_updates        | 107257    |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.791    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28644    |
|    fps              | 14       |
|    time_elapsed     | 29731    |
|    total_timesteps  | 430090   |
| train/              |          |
|    learning_rate    | 0.0003   |
|   

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05139672 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.787      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 28708      |
|    fps              | 14         |
|    time_elapsed     | 29795      |
|    total_timesteps  | 431050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 6.58e-05   |
|    n_updates        | 107512     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.786    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28712    |
|    fps              | 14       |
|    time_elapsed     | 29799    |
|    total_timesteps  | 431110   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054120347 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.803       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 28772       |
|    fps              | 14          |
|    time_elapsed     | 29860       |
|    total_timesteps  | 432010      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 9.69e-05    |
|    n_updates        | 107752      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.804    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28776    |
|    fps              | 14       |
|    time_elapsed     | 29864    |
|    total_timesteps  | 432070   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05388041 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.81       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 28840      |
|    fps              | 14         |
|    time_elapsed     | 29928      |
|    total_timesteps  | 433030     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 6.9e-05    |
|    n_updates        | 108007     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.811    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28844    |
|    fps              | 14       |
|    time_elapsed     | 29932    |
|    total_timesteps  | 433090   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054352745 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.817       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 28908       |
|    fps              | 14          |
|    time_elapsed     | 29997       |
|    total_timesteps  | 434050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.97e-05    |
|    n_updates        | 108262      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.817    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28912    |
|    fps              | 14       |
|    time_elapsed     | 30001    |
|    total_timesteps  | 434110   |
| train/              |          |
|    le



Eval num_timesteps=435000, episode_reward=0.84 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.84     |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 435000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 4.81e-05 |
|    n_updates        | 108499   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054401323 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.816       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 28972       |
|    fps              | 14          |
|    time_elapsed     | 30067       |
|    total_timesteps  | 435015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.6e-05     |
|    n_updates        | 108503      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.816    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28976    |
|    fps              | 14       |
|    time_elapsed     | 30071    |
|    total_timesteps  | 435075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05421704 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.818      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 29040      |
|    fps              | 14         |
|    time_elapsed     | 30135      |
|    total_timesteps  | 436035     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.42e-05   |
|    n_updates        | 108758     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.815    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29044    |
|    fps              | 14       |
|    time_elapsed     | 30139    |
|    total_timesteps  | 436095   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052890725 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.799       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 29108       |
|    fps              | 14          |
|    time_elapsed     | 30204       |
|    total_timesteps  | 437055      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.07e-05    |
|    n_updates        | 109013      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.799    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29112    |
|    fps              | 14       |
|    time_elapsed     | 30208    |
|    total_timesteps  | 437115   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054942656 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.807       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 29172       |
|    fps              | 14          |
|    time_elapsed     | 30269       |
|    total_timesteps  | 438015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.79e-05    |
|    n_updates        | 109253      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.806    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29176    |
|    fps              | 14       |
|    time_elapsed     | 30273    |
|    total_timesteps  | 438075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054350328 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.804       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 29240       |
|    fps              | 14          |
|    time_elapsed     | 30338       |
|    total_timesteps  | 439035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.8e-05     |
|    n_updates        | 109508      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.804    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29244    |
|    fps              | 14       |
|    time_elapsed     | 30342    |
|    total_timesteps  | 439095   |
| train/              |          |
|    le



Eval num_timesteps=440000, episode_reward=0.82 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.819    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 440000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 3.84e-05 |
|    n_updates        | 109749   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052191027 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.795       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 29308       |
|    fps              | 14          |
|    time_elapsed     | 30411       |
|    total_timesteps  | 440060      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3e-05       |
|    n_updates        | 109764      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.795    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29312    |
|    fps              | 14       |
|    time_elapsed     | 30415    |
|    total_timesteps  | 440120   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054185823 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.814       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 29372       |
|    fps              | 14          |
|    time_elapsed     | 30476       |
|    total_timesteps  | 441020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.98e-05    |
|    n_updates        | 110004      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.814    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29376    |
|    fps              | 14       |
|    time_elapsed     | 30480    |
|    total_timesteps  | 441080   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054763824 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.819       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 29440       |
|    fps              | 14          |
|    time_elapsed     | 30544       |
|    total_timesteps  | 442040      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.49e-05    |
|    n_updates        | 110259      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.818    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29444    |
|    fps              | 14       |
|    time_elapsed     | 30548    |
|    total_timesteps  | 442100   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05423335 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.812      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 29504      |
|    fps              | 14         |
|    time_elapsed     | 30608      |
|    total_timesteps  | 443000     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 3.14e-05   |
|    n_updates        | 110499     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.812    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29508    |
|    fps              | 14       |
|    time_elapsed     | 30612    |
|    total_timesteps  | 443060   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053922866 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.812       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 29572       |
|    fps              | 14          |
|    time_elapsed     | 30677       |
|    total_timesteps  | 444020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.49e-05    |
|    n_updates        | 110754      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.811    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29576    |
|    fps              | 14       |
|    time_elapsed     | 30681    |
|    total_timesteps  | 444080   |
| train/              |          |
|    le



Eval num_timesteps=445000, episode_reward=0.85 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.854    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 445000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 1.39e-05 |
|    n_updates        | 110999   |
----------------------------------
New best mean reward!


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054386612 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.815       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 29640       |
|    fps              | 14          |
|    time_elapsed     | 30751       |
|    total_timesteps  | 445045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.6e-05     |
|    n_updates        | 111011      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.815    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29644    |
|    fps              | 14       |
|    time_elapsed     | 30755    |
|    total_timesteps  | 445105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053968377 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.814       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 29704       |
|    fps              | 14          |
|    time_elapsed     | 30816       |
|    total_timesteps  | 446005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.79e-05    |
|    n_updates        | 111251      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.815    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29708    |
|    fps              | 14       |
|    time_elapsed     | 30820    |
|    total_timesteps  | 446065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05472364 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.817      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 29772      |
|    fps              | 14         |
|    time_elapsed     | 30884      |
|    total_timesteps  | 447025     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000125   |
|    n_updates        | 111506     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.816    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29776    |
|    fps              | 14       |
|    time_elapsed     | 30888    |
|    total_timesteps  | 447085   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054344438 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.817       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 29840       |
|    fps              | 14          |
|    time_elapsed     | 30953       |
|    total_timesteps  | 448045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.93e-05    |
|    n_updates        | 111761      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.818    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29844    |
|    fps              | 14       |
|    time_elapsed     | 30957    |
|    total_timesteps  | 448105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053852554 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.812       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 29904       |
|    fps              | 14          |
|    time_elapsed     | 31017       |
|    total_timesteps  | 449005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.68e-05    |
|    n_updates        | 112001      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.811    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29908    |
|    fps              | 14       |
|    time_elapsed     | 31021    |
|    total_timesteps  | 449065   |
| train/              |          |
|    le



Eval num_timesteps=450000, episode_reward=0.84 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.838    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 450000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 1.54e-05 |
|    n_updates        | 112249   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05458288 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.817      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 29972      |
|    fps              | 14         |
|    time_elapsed     | 31091      |
|    total_timesteps  | 450030     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.43e-05   |
|    n_updates        | 112257     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.818    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29976    |
|    fps              | 14       |
|    time_elapsed     | 31095    |
|    total_timesteps  | 450090   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05430497 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.819      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 30040      |
|    fps              | 14         |
|    time_elapsed     | 31160      |
|    total_timesteps  | 451050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.67e-05   |
|    n_updates        | 112512     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.819    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30044    |
|    fps              | 14       |
|    time_elapsed     | 31164    |
|    total_timesteps  | 451110   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05309852 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.803      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 30104      |
|    fps              | 14         |
|    time_elapsed     | 31225      |
|    total_timesteps  | 452010     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.13e-05   |
|    n_updates        | 112752     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.802    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30108    |
|    fps              | 14       |
|    time_elapsed     | 31229    |
|    total_timesteps  | 452070   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05336885 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.79       |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 30172      |
|    fps              | 14         |
|    time_elapsed     | 31293      |
|    total_timesteps  | 453030     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.75e-05   |
|    n_updates        | 113007     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.791    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30176    |
|    fps              | 14       |
|    time_elapsed     | 31297    |
|    total_timesteps  | 453090   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05461658 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.811      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 30240      |
|    fps              | 14         |
|    time_elapsed     | 31362      |
|    total_timesteps  | 454050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 1.94e-05   |
|    n_updates        | 113262     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.813    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30244    |
|    fps              | 14       |
|    time_elapsed     | 31366    |
|    total_timesteps  | 454110   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=455000, episode_reward=0.82 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.822    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 455000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 2.55e-05 |
|    n_updates        | 113499   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054679655 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.824       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 30304       |
|    fps              | 14          |
|    time_elapsed     | 31431       |
|    total_timesteps  | 455015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.82e-05    |
|    n_updates        | 113503      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.824    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30308    |
|    fps              | 14       |
|    time_elapsed     | 31435    |
|    total_timesteps  | 455075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053922325 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.816       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 30372       |
|    fps              | 14          |
|    time_elapsed     | 31500       |
|    total_timesteps  | 456035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.71e-05    |
|    n_updates        | 113758      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.816    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30376    |
|    fps              | 14       |
|    time_elapsed     | 31504    |
|    total_timesteps  | 456095   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054656457 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.815       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 30440       |
|    fps              | 14          |
|    time_elapsed     | 31569       |
|    total_timesteps  | 457055      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.62e-05    |
|    n_updates        | 114013      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.816    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30444    |
|    fps              | 14       |
|    time_elapsed     | 31573    |
|    total_timesteps  | 457115   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05492302 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.821      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 30504      |
|    fps              | 14         |
|    time_elapsed     | 31633      |
|    total_timesteps  | 458015     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.23e-05   |
|    n_updates        | 114253     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.821    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30508    |
|    fps              | 14       |
|    time_elapsed     | 31637    |
|    total_timesteps  | 458075   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054267544 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.82        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 30572       |
|    fps              | 14          |
|    time_elapsed     | 31702       |
|    total_timesteps  | 459035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.95e-05    |
|    n_updates        | 114508      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.819    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30576    |
|    fps              | 14       |
|    time_elapsed     | 31706    |
|    total_timesteps  | 459095   |
| train/              |          |
|    le



Eval num_timesteps=460000, episode_reward=0.82 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.825    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 460000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 1.87e-05 |
|    n_updates        | 114749   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054743707 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.821       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 30640       |
|    fps              | 14          |
|    time_elapsed     | 31776       |
|    total_timesteps  | 460060      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.05e-05    |
|    n_updates        | 114764      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.821    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30644    |
|    fps              | 14       |
|    time_elapsed     | 31780    |
|    total_timesteps  | 460120   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.03293117 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.591      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 30704      |
|    fps              | 14         |
|    time_elapsed     | 31840      |
|    total_timesteps  | 461020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00556    |
|    n_updates        | 115004     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.542    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30708    |
|    fps              | 14       |
|    time_elapsed     | 31845    |
|    total_timesteps  | 461080   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


----------------------------------------
| mean_reward         | -0.00048606945 |
| rollout/            |                |
|    ep_len_mean      | 15             |
|    ep_rew_mean      | 0.0945         |
|    exploration_rate | 0.05           |
| time/               |                |
|    episodes         | 30772          |
|    fps              | 14             |
|    time_elapsed     | 31909          |
|    total_timesteps  | 462040         |
| train/              |                |
|    learning_rate    | 0.0003         |
|    loss             | 9.31e-05       |
|    n_updates        | 115259         |
----------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.062    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30776    |
|    fps              | 14       |
|    time_elapsed     | 31913    |
|    total_timesteps  | 46210

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.032828987 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.289       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 30836       |
|    fps              | 14          |
|    time_elapsed     | 31974       |
|    total_timesteps  | 463000      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000596    |
|    n_updates        | 115499      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.331    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30840    |
|    fps              | 14       |
|    time_elapsed     | 31978    |
|    total_timesteps  | 463060   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05168524 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.746      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 30904      |
|    fps              | 14         |
|    time_elapsed     | 32043      |
|    total_timesteps  | 464020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00108    |
|    n_updates        | 115754     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.745    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30908    |
|    fps              | 14       |
|    time_elapsed     | 32047    |
|    total_timesteps  | 464080   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=465000, episode_reward=0.80 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.802    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 465000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00309  |
|    n_updates        | 115999   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052903756 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.796       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 30972       |
|    fps              | 14          |
|    time_elapsed     | 32117       |
|    total_timesteps  | 465045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000673    |
|    n_updates        | 116011      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.796    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30976    |
|    fps              | 14       |
|    time_elapsed     | 32121    |
|    total_timesteps  | 465105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-----------------------------------
| mean_reward         | 0.0510794 |
| rollout/            |           |
|    ep_len_mean      | 15.1      |
|    ep_rew_mean      | 0.78      |
|    exploration_rate | 0.05      |
| time/               |           |
|    episodes         | 31036     |
|    fps              | 14        |
|    time_elapsed     | 32181     |
|    total_timesteps  | 466005    |
| train/              |           |
|    learning_rate    | 0.0003    |
|    loss             | 0.00471   |
|    n_updates        | 116251    |
-----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.779    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31040    |
|    fps              | 14       |
|    time_elapsed     | 32185    |
|    total_timesteps  | 466065   |
| train/              |          |
|    learning_rate    | 0.0003   |
|   

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05157031 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.762      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 31104      |
|    fps              | 14         |
|    time_elapsed     | 32250      |
|    total_timesteps  | 467025     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000229   |
|    n_updates        | 116506     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.761    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31108    |
|    fps              | 14       |
|    time_elapsed     | 32254    |
|    total_timesteps  | 467085   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051005837 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.766       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 31172       |
|    fps              | 14          |
|    time_elapsed     | 32319       |
|    total_timesteps  | 468045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.00474     |
|    n_updates        | 116761      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.767    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31176    |
|    fps              | 14       |
|    time_elapsed     | 32323    |
|    total_timesteps  | 468105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05273896 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.796      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 31236      |
|    fps              | 14         |
|    time_elapsed     | 32384      |
|    total_timesteps  | 469005     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00136    |
|    n_updates        | 117001     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.796    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31240    |
|    fps              | 14       |
|    time_elapsed     | 32388    |
|    total_timesteps  | 469065   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=470000, episode_reward=0.79 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.794    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 470000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00058  |
|    n_updates        | 117249   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05003274 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.767      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 31304      |
|    fps              | 14         |
|    time_elapsed     | 32458      |
|    total_timesteps  | 470030     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.00203    |
|    n_updates        | 117257     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.767    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31308    |
|    fps              | 14       |
|    time_elapsed     | 32462    |
|    total_timesteps  | 470090   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052759703 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.768       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 31372       |
|    fps              | 14          |
|    time_elapsed     | 32526       |
|    total_timesteps  | 471050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000134    |
|    n_updates        | 117512      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.769    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31376    |
|    fps              | 14       |
|    time_elapsed     | 32530    |
|    total_timesteps  | 471110   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05029917 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.768      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 31436      |
|    fps              | 14         |
|    time_elapsed     | 32591      |
|    total_timesteps  | 472010     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.0002     |
|    n_updates        | 117752     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.768    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31440    |
|    fps              | 14       |
|    time_elapsed     | 32595    |
|    total_timesteps  | 472070   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054049682 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.804       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 31504       |
|    fps              | 14          |
|    time_elapsed     | 32660       |
|    total_timesteps  | 473030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.76e-05    |
|    n_updates        | 118007      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.806    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31508    |
|    fps              | 14       |
|    time_elapsed     | 32664    |
|    total_timesteps  | 473090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05398804 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.813      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 31572      |
|    fps              | 14         |
|    time_elapsed     | 32729      |
|    total_timesteps  | 474050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 4.36e-05   |
|    n_updates        | 118262     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.813    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31576    |
|    fps              | 14       |
|    time_elapsed     | 32733    |
|    total_timesteps  | 474110   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=475000, episode_reward=0.84 +/- 0.02
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.838    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 475000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 5.01e-05 |
|    n_updates        | 118499   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054002244 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.81        |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 31636       |
|    fps              | 14          |
|    time_elapsed     | 32799       |
|    total_timesteps  | 475015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 4.03e-05    |
|    n_updates        | 118503      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.812    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31640    |
|    fps              | 14       |
|    time_elapsed     | 32803    |
|    total_timesteps  | 475075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052791297 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.802       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 31704       |
|    fps              | 14          |
|    time_elapsed     | 32868       |
|    total_timesteps  | 476035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.25e-05    |
|    n_updates        | 118758      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.802    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31708    |
|    fps              | 14       |
|    time_elapsed     | 32872    |
|    total_timesteps  | 476095   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054508775 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.815       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 31772       |
|    fps              | 14          |
|    time_elapsed     | 32937       |
|    total_timesteps  | 477055      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.5e-05     |
|    n_updates        | 119013      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.816    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31776    |
|    fps              | 14       |
|    time_elapsed     | 32941    |
|    total_timesteps  | 477115   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054254305 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.815       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 31836       |
|    fps              | 14          |
|    time_elapsed     | 33001       |
|    total_timesteps  | 478015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 3.56e-05    |
|    n_updates        | 119253      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.815    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31840    |
|    fps              | 14       |
|    time_elapsed     | 33005    |
|    total_timesteps  | 478075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.050866302 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.782       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 31904       |
|    fps              | 14          |
|    time_elapsed     | 33070       |
|    total_timesteps  | 479035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 5.72e-05    |
|    n_updates        | 119508      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.768    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31908    |
|    fps              | 14       |
|    time_elapsed     | 33074    |
|    total_timesteps  | 479095   |
| train/              |          |
|    le



Eval num_timesteps=480000, episode_reward=0.83 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.825    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 480000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 6.98e-05 |
|    n_updates        | 119749   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05248043 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.762      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 31972      |
|    fps              | 14         |
|    time_elapsed     | 33144      |
|    total_timesteps  | 480060     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 7.39e-05   |
|    n_updates        | 119764     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.762    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31976    |
|    fps              | 14       |
|    time_elapsed     | 33148    |
|    total_timesteps  | 480120   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.047160376 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.742       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 32036       |
|    fps              | 14          |
|    time_elapsed     | 33209       |
|    total_timesteps  | 481020      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 7.44e-05    |
|    n_updates        | 120004      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.74     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32040    |
|    fps              | 14       |
|    time_elapsed     | 33213    |
|    total_timesteps  | 481080   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05392248 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.762      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 32104      |
|    fps              | 14         |
|    time_elapsed     | 33278      |
|    total_timesteps  | 482040     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 2.8e-05    |
|    n_updates        | 120259     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.793    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32108    |
|    fps              | 14       |
|    time_elapsed     | 33282    |
|    total_timesteps  | 482100   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05220269 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.789      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 32168      |
|    fps              | 14         |
|    time_elapsed     | 33343      |
|    total_timesteps  | 483000     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 6.72e-05   |
|    n_updates        | 120499     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.772    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32172    |
|    fps              | 14       |
|    time_elapsed     | 33347    |
|    total_timesteps  | 483060   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05188827 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.774      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 32236      |
|    fps              | 14         |
|    time_elapsed     | 33411      |
|    total_timesteps  | 484020     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000214   |
|    n_updates        | 120754     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.773    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32240    |
|    fps              | 14       |
|    time_elapsed     | 33415    |
|    total_timesteps  | 484080   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=485000, episode_reward=0.82 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.823    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 485000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.000221 |
|    n_updates        | 120999   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05433657 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.814      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 32304      |
|    fps              | 14         |
|    time_elapsed     | 33485      |
|    total_timesteps  | 485045     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000161   |
|    n_updates        | 121011     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.813    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32308    |
|    fps              | 14       |
|    time_elapsed     | 33489    |
|    total_timesteps  | 485105   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052374076 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.798       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 32368       |
|    fps              | 14          |
|    time_elapsed     | 33549       |
|    total_timesteps  | 486005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 8.96e-05    |
|    n_updates        | 121251      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.797    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32372    |
|    fps              | 14       |
|    time_elapsed     | 33553    |
|    total_timesteps  | 486065   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052334093 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.795       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 32436       |
|    fps              | 14          |
|    time_elapsed     | 33618       |
|    total_timesteps  | 487025      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000145    |
|    n_updates        | 121506      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.795    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32440    |
|    fps              | 14       |
|    time_elapsed     | 33622    |
|    total_timesteps  | 487085   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053926613 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.793       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 32504       |
|    fps              | 14          |
|    time_elapsed     | 33687       |
|    total_timesteps  | 488045      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 9.6e-05     |
|    n_updates        | 121761      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.791    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32508    |
|    fps              | 14       |
|    time_elapsed     | 33691    |
|    total_timesteps  | 488105   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053858913 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.809       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 32568       |
|    fps              | 14          |
|    time_elapsed     | 33751       |
|    total_timesteps  | 489005      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 7.42e-05    |
|    n_updates        | 122001      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.81     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32572    |
|    fps              | 14       |
|    time_elapsed     | 33755    |
|    total_timesteps  | 489065   |
| train/              |          |
|    le



Eval num_timesteps=490000, episode_reward=0.83 +/- 0.01
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.831    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 490000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 0.00341  |
|    n_updates        | 122249   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05425224 |
| rollout/            |            |
|    ep_len_mean      | 15.1       |
|    ep_rew_mean      | 0.813      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 32636      |
|    fps              | 14         |
|    time_elapsed     | 33826      |
|    total_timesteps  | 490030     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000149   |
|    n_updates        | 122257     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.812    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32640    |
|    fps              | 14       |
|    time_elapsed     | 33830    |
|    total_timesteps  | 490090   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.051727593 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.789       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 32704       |
|    fps              | 14          |
|    time_elapsed     | 33894       |
|    total_timesteps  | 491050      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.26e-05    |
|    n_updates        | 122512      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.79     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32708    |
|    fps              | 14       |
|    time_elapsed     | 33899    |
|    total_timesteps  | 491110   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05436783 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.809      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 32768      |
|    fps              | 14         |
|    time_elapsed     | 33959      |
|    total_timesteps  | 492010     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 4.03e-05   |
|    n_updates        | 122752     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.811    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32772    |
|    fps              | 14       |
|    time_elapsed     | 33963    |
|    total_timesteps  | 492070   |
| train/              |          |
|    learning_rate    |

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054269273 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.812       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 32836       |
|    fps              | 14          |
|    time_elapsed     | 34028       |
|    total_timesteps  | 493030      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 0.000123    |
|    n_updates        | 123007      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.813    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32840    |
|    fps              | 14       |
|    time_elapsed     | 34032    |
|    total_timesteps  | 493090   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05277308 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.801      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 32904      |
|    fps              | 14         |
|    time_elapsed     | 34097      |
|    total_timesteps  | 494050     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 6.09e-05   |
|    n_updates        | 123262     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.801    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32908    |
|    fps              | 14       |
|    time_elapsed     | 34101    |
|    total_timesteps  | 494110   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=495000, episode_reward=0.84 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.837    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 495000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 2.56e-05 |
|    n_updates        | 123499   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.052974287 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.788       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 32968       |
|    fps              | 14          |
|    time_elapsed     | 34167       |
|    total_timesteps  | 495015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.65e-05    |
|    n_updates        | 123503      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.786    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32972    |
|    fps              | 14       |
|    time_elapsed     | 34171    |
|    total_timesteps  | 495075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.053842694 |
| rollout/            |             |
|    ep_len_mean      | 15.1        |
|    ep_rew_mean      | 0.8         |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 33036       |
|    fps              | 14          |
|    time_elapsed     | 34235       |
|    total_timesteps  | 496035      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 2.52e-05    |
|    n_updates        | 123758      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 0.801    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 33040    |
|    fps              | 14       |
|    time_elapsed     | 34239    |
|    total_timesteps  | 496095   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054525334 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.815       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 33104       |
|    fps              | 14          |
|    time_elapsed     | 34304       |
|    total_timesteps  | 497055      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.75e-05    |
|    n_updates        | 124013      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.816    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 33108    |
|    fps              | 14       |
|    time_elapsed     | 34308    |
|    total_timesteps  | 497115   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


-------------------------------------
| mean_reward         | 0.054012123 |
| rollout/            |             |
|    ep_len_mean      | 15          |
|    ep_rew_mean      | 0.811       |
|    exploration_rate | 0.05        |
| time/               |             |
|    episodes         | 33168       |
|    fps              | 14          |
|    time_elapsed     | 34368       |
|    total_timesteps  | 498015      |
| train/              |             |
|    learning_rate    | 0.0003      |
|    loss             | 1.54e-05    |
|    n_updates        | 124253      |
-------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.811    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 33172    |
|    fps              | 14       |
|    time_elapsed     | 34372    |
|    total_timesteps  | 498075   |
| train/              |          |
|    le

  rewards_list = [float(reward) for reward in self.rewards_history]


------------------------------------
| mean_reward         | 0.05363181 |
| rollout/            |            |
|    ep_len_mean      | 15         |
|    ep_rew_mean      | 0.806      |
|    exploration_rate | 0.05       |
| time/               |            |
|    episodes         | 33236      |
|    fps              | 14         |
|    time_elapsed     | 34437      |
|    total_timesteps  | 499035     |
| train/              |            |
|    learning_rate    | 0.0003     |
|    loss             | 0.000303   |
|    n_updates        | 124508     |
------------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15       |
|    ep_rew_mean      | 0.806    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 33240    |
|    fps              | 14       |
|    time_elapsed     | 34441    |
|    total_timesteps  | 499095   |
| train/              |          |
|    learning_rate    |



Eval num_timesteps=500000, episode_reward=0.80 +/- 0.03
Episode length: 15.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 15       |
|    mean_reward      | 0.798    |
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    total_timesteps  | 500000   |
| train/              |          |
|    learning_rate    | 0.0003   |
|    loss             | 3.55e-05 |
|    n_updates        | 124749   |
----------------------------------


  rewards_list = [float(reward) for reward in self.rewards_history]


<stable_baselines3.dqn.dqn.DQN at 0x352bdc7f0>