In [1]:
from context import *
from stable_baselines3 import PPO,A2C,SAC,TD3,DQN,DDPG
from stable_baselines3.common.save_util import load_from_zip_file
from stable_baselines3.common.monitor import Monitor
import torch as th
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import SubprocVecEnv

from pprint import pprint
import enum

import rlrom.wrappers.stl_wrapper
import stlrom
from rlrom.envs import *
import rlrom.utils
import time
import matplotlib.pyplot as plt


pygame 2.6.1 (SDL 2.28.4, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html


2025-04-15 16:55:01.947019: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-15 16:55:01.960541: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-15 16:55:01.972026: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-15 16:55:01.975702: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-15 16:55:01.987367: I tensorflow/core/platform/cpu_feature_guar

In [2]:
class EnvMode(enum.Enum):
    VANILLA=0
    TERM_SLOW=1
    
def make_env(train=True, env_mode=EnvMode.VANILLA, verbose=0):
    if train:
        env = gym.make("highway-fast-v0")
    else:
        env = gym.make("highway-v0", render_mode='human')

    env.unwrapped.configure({
            "observation": {"type": "Kinematics"},
                "action": {
                    "type": "DiscreteMetaAction",
                },
                "lanes_count": 4,
                "vehicles_count": 50,
                "controlled_vehicles": 1,
                "initial_lane_id": None,
                "duration": 100,  # [s]
                "ego_spacing": 2,
                "vehicles_density": 1,
                "collision_reward": -.4,  # The reward received when colliding with a vehicle.
                "right_lane_reward": 0,  # The reward received when driving on the right-most lanes, linearly mapped to
                # zero for other lanes.
                "high_speed_reward": 1.,  # The reward received when driving at full speed, linearly mapped to zero for
                # lower speeds according to config["reward_speed_range"].
                "lane_change_reward": 0,  # The reward received at each lane change action.
                "reward_speed_range": [20, 30],
                "normalize_reward": True,
                "offroad_terminal": False,        
    })

    if env_mode==EnvMode.TERM_SLOW:
        cfg = cfg_envs['highway-env']
        driver= stlrom.STLDriver()
        driver.parse_string(cfg['specs'])        
        env = rlrom.wrappers.stl_wrapper.STLWrapper(env,driver,signals_map=cfg, terminal_formulas={'ego_slow_too_long'})

    if verbose>=1:
        pprint(cfg)
    return env

# Training

In [3]:
n_cpu = 12
batch_size = 64
neurons = 128
policy_kwargs = dict(
    #activation_fn=th.nn.ReLU,
    net_arch=dict(pi=[neurons, neurons], qf=[neurons, neurons])
)

vec_env = make_vec_env(make_env, n_envs=n_cpu, vec_env_cls=SubprocVecEnv)
model = PPO(
     "MlpPolicy",
     vec_env,
     device='cpu',
     policy_kwargs=policy_kwargs,
     n_steps=batch_size * 12 // n_cpu,
     batch_size=batch_size,
     n_epochs=10,
     learning_rate=5e-4,
     gamma=0.9,
     verbose=1,
     tensorboard_log="./highway_ppo/"
)

2025-04-15 07:55:11.857827: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-15 07:55:11.858993: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-15 07:55:11.860464: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-15 07:55:11.864645: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly 

Using cpu device


In [None]:
# Train the agent
model.learn(
    total_timesteps=200_000,
    progress_bar=True
)


Logging to ./highway_ppo/PPO_24


Output()

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 11.1     |
|    ep_rew_mean     | 6.02     |
| time/              |          |
|    fps             | 134      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 768      |
---------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.2        |
|    ep_rew_mean          | 7.44        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 2           |
|    time_elapsed         | 11          |
|    total_timesteps      | 1536        |
| train/                  |             |
|    approx_kl            | 0.013587017 |
|    clip_fraction        | 0.244       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.6        |
|    explained_variance   | 0.0174      |
|    learning_rate        | 0.0005      |
|    loss                 | 3.36        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0248     |
|    value_loss           | 8.25        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8        |
|    ep_rew_mean          | 7.27        |
| time/                   |             |
|    fps                  | 137         |
|    iterations           | 3           |
|    time_elapsed         | 16          |
|    total_timesteps      | 2304        |
| train/                  |             |
|    approx_kl            | 0.013533906 |
|    clip_fraction        | 0.195       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.57       |
|    explained_variance   | 0.011       |
|    learning_rate        | 0.0005      |
|    loss                 | 2.79        |
|    n_updates            | 20          |
|    policy_gradient_loss | -0.023      |
|    value_loss           | 7.76        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.2        |
|    ep_rew_mean          | 7.33        |
| time/                   |             |
|    fps                  | 135         |
|    iterations           | 4           |
|    time_elapsed         | 22          |
|    total_timesteps      | 3072        |
| train/                  |             |
|    approx_kl            | 0.017339248 |
|    clip_fraction        | 0.286       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.51       |
|    explained_variance   | 0.0236      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.22        |
|    n_updates            | 30          |
|    policy_gradient_loss | -0.0287     |
|    value_loss           | 5.52        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.1        |
|    ep_rew_mean          | 7.5         |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 5           |
|    time_elapsed         | 28          |
|    total_timesteps      | 3840        |
| train/                  |             |
|    approx_kl            | 0.012144707 |
|    clip_fraction        | 0.209       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.46       |
|    explained_variance   | -0.0371     |
|    learning_rate        | 0.0005      |
|    loss                 | 2.04        |
|    n_updates            | 40          |
|    policy_gradient_loss | -0.0197     |
|    value_loss           | 6.95        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.54        |
|    ep_rew_mean          | 7.51        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 6           |
|    time_elapsed         | 34          |
|    total_timesteps      | 4608        |
| train/                  |             |
|    approx_kl            | 0.013178402 |
|    clip_fraction        | 0.172       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | -0.0213     |
|    learning_rate        | 0.0005      |
|    loss                 | 2.96        |
|    n_updates            | 50          |
|    policy_gradient_loss | -0.0142     |
|    value_loss           | 6.4         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.2        |
|    ep_rew_mean          | 8.12        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 7           |
|    time_elapsed         | 40          |
|    total_timesteps      | 5376        |
| train/                  |             |
|    approx_kl            | 0.012500554 |
|    clip_fraction        | 0.115       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.36       |
|    explained_variance   | -0.00488    |
|    learning_rate        | 0.0005      |
|    loss                 | 2.46        |
|    n_updates            | 60          |
|    policy_gradient_loss | -0.0132     |
|    value_loss           | 4.78        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.6        |
|    ep_rew_mean          | 8.47        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 8           |
|    time_elapsed         | 46          |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.009157203 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.33       |
|    explained_variance   | 0.0217      |
|    learning_rate        | 0.0005      |
|    loss                 | 2           |
|    n_updates            | 70          |
|    policy_gradient_loss | -0.00798    |
|    value_loss           | 4.34        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 9.9        |
|    ep_rew_mean          | 7.77       |
| time/                   |            |
|    fps                  | 134        |
|    iterations           | 9          |
|    time_elapsed         | 51         |
|    total_timesteps      | 6912       |
| train/                  |            |
|    approx_kl            | 0.01443418 |
|    clip_fraction        | 0.168      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.3       |
|    explained_variance   | 0.00663    |
|    learning_rate        | 0.0005     |
|    loss                 | 1.69       |
|    n_updates            | 80         |
|    policy_gradient_loss | -0.0106    |
|    value_loss           | 4.14       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 9.26        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 10          |
|    time_elapsed         | 57          |
|    total_timesteps      | 7680        |
| train/                  |             |
|    approx_kl            | 0.009927949 |
|    clip_fraction        | 0.161       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.28       |
|    explained_variance   | 0.000793    |
|    learning_rate        | 0.0005      |
|    loss                 | 2.54        |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.0117     |
|    value_loss           | 4.3         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.2        |
|    ep_rew_mean          | 9.95        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 11          |
|    time_elapsed         | 63          |
|    total_timesteps      | 8448        |
| train/                  |             |
|    approx_kl            | 0.011057979 |
|    clip_fraction        | 0.167       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.24       |
|    explained_variance   | 0.0316      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.19        |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.0152     |
|    value_loss           | 4.12        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 10.6         |
|    ep_rew_mean          | 8.7          |
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 12           |
|    time_elapsed         | 69           |
|    total_timesteps      | 9216         |
| train/                  |              |
|    approx_kl            | 0.0092326645 |
|    clip_fraction        | 0.084        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.23        |
|    explained_variance   | 0.0181       |
|    learning_rate        | 0.0005       |
|    loss                 | 2.22         |
|    n_updates            | 110          |
|    policy_gradient_loss | -0.00659     |
|    value_loss           | 4.65         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.3        |
|    ep_rew_mean          | 8.54        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 13          |
|    time_elapsed         | 74          |
|    total_timesteps      | 9984        |
| train/                  |             |
|    approx_kl            | 0.012450148 |
|    clip_fraction        | 0.129       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.21       |
|    explained_variance   | 0.0352      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.06        |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.00641    |
|    value_loss           | 4.2         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.3        |
|    ep_rew_mean          | 8.7         |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 14          |
|    time_elapsed         | 80          |
|    total_timesteps      | 10752       |
| train/                  |             |
|    approx_kl            | 0.009661569 |
|    clip_fraction        | 0.115       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.18       |
|    explained_variance   | 0.0435      |
|    learning_rate        | 0.0005      |
|    loss                 | 2           |
|    n_updates            | 130         |
|    policy_gradient_loss | -0.00645    |
|    value_loss           | 4.57        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.3        |
|    ep_rew_mean          | 8.66        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 15          |
|    time_elapsed         | 86          |
|    total_timesteps      | 11520       |
| train/                  |             |
|    approx_kl            | 0.016138455 |
|    clip_fraction        | 0.19        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.14       |
|    explained_variance   | 0.0265      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.2         |
|    n_updates            | 140         |
|    policy_gradient_loss | -0.0142     |
|    value_loss           | 4.71        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10          |
|    ep_rew_mean          | 8.54        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 16          |
|    time_elapsed         | 91          |
|    total_timesteps      | 12288       |
| train/                  |             |
|    approx_kl            | 0.012737934 |
|    clip_fraction        | 0.129       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.12       |
|    explained_variance   | 0.0289      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.29        |
|    n_updates            | 150         |
|    policy_gradient_loss | -0.00806    |
|    value_loss           | 4.8         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.5        |
|    ep_rew_mean          | 9.15        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 17          |
|    time_elapsed         | 97          |
|    total_timesteps      | 13056       |
| train/                  |             |
|    approx_kl            | 0.012231865 |
|    clip_fraction        | 0.158       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | 0.0162      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.73        |
|    n_updates            | 160         |
|    policy_gradient_loss | -0.0105     |
|    value_loss           | 4.8         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.3        |
|    ep_rew_mean          | 8.87        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 18          |
|    time_elapsed         | 103         |
|    total_timesteps      | 13824       |
| train/                  |             |
|    approx_kl            | 0.008604974 |
|    clip_fraction        | 0.106       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.08       |
|    explained_variance   | 0.0297      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.41        |
|    n_updates            | 170         |
|    policy_gradient_loss | -0.0104     |
|    value_loss           | 4.73        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.75        |
|    ep_rew_mean          | 8.47        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 19          |
|    time_elapsed         | 109         |
|    total_timesteps      | 14592       |
| train/                  |             |
|    approx_kl            | 0.008112949 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.05       |
|    explained_variance   | 0.0345      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.83        |
|    n_updates            | 180         |
|    policy_gradient_loss | -0.00638    |
|    value_loss           | 5.26        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.7        |
|    ep_rew_mean          | 9.43        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 20          |
|    time_elapsed         | 114         |
|    total_timesteps      | 15360       |
| train/                  |             |
|    approx_kl            | 0.008982056 |
|    clip_fraction        | 0.0823      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.04       |
|    explained_variance   | 0.0397      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.49        |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.00589    |
|    value_loss           | 5.13        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 10.1       |
|    ep_rew_mean          | 8.79       |
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 21         |
|    time_elapsed         | 120        |
|    total_timesteps      | 16128      |
| train/                  |            |
|    approx_kl            | 0.00895415 |
|    clip_fraction        | 0.118      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.01      |
|    explained_variance   | 0.0398     |
|    learning_rate        | 0.0005     |
|    loss                 | 2.36       |
|    n_updates            | 200        |
|    policy_gradient_loss | -0.0124    |
|    value_loss           | 4.84       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.8        |
|    ep_rew_mean          | 9.5         |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 22          |
|    time_elapsed         | 126         |
|    total_timesteps      | 16896       |
| train/                  |             |
|    approx_kl            | 0.009201129 |
|    clip_fraction        | 0.104       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.01       |
|    explained_variance   | 0.029       |
|    learning_rate        | 0.0005      |
|    loss                 | 2.79        |
|    n_updates            | 210         |
|    policy_gradient_loss | -0.00826    |
|    value_loss           | 4.99        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.4        |
|    ep_rew_mean          | 9.16        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 23          |
|    time_elapsed         | 132         |
|    total_timesteps      | 17664       |
| train/                  |             |
|    approx_kl            | 0.012433327 |
|    clip_fraction        | 0.138       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.929      |
|    explained_variance   | 0.0531      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.4         |
|    n_updates            | 220         |
|    policy_gradient_loss | -0.0106     |
|    value_loss           | 4.7         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10          |
|    ep_rew_mean          | 8.84        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 24          |
|    time_elapsed         | 138         |
|    total_timesteps      | 18432       |
| train/                  |             |
|    approx_kl            | 0.008747538 |
|    clip_fraction        | 0.0924      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.955      |
|    explained_variance   | 0.0436      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.42        |
|    n_updates            | 230         |
|    policy_gradient_loss | -0.00366    |
|    value_loss           | 5.14        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.91        |
|    ep_rew_mean          | 8.62        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 25          |
|    time_elapsed         | 143         |
|    total_timesteps      | 19200       |
| train/                  |             |
|    approx_kl            | 0.009640549 |
|    clip_fraction        | 0.143       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.941      |
|    explained_variance   | 0.0225      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.85        |
|    n_updates            | 240         |
|    policy_gradient_loss | -0.00853    |
|    value_loss           | 5.31        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.58        |
|    ep_rew_mean          | 7.45        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 26          |
|    time_elapsed         | 149         |
|    total_timesteps      | 19968       |
| train/                  |             |
|    approx_kl            | 0.009194002 |
|    clip_fraction        | 0.0868      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.911      |
|    explained_variance   | 0.0538      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.47        |
|    n_updates            | 250         |
|    policy_gradient_loss | -0.0108     |
|    value_loss           | 4.79        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.64         |
|    ep_rew_mean          | 8.43         |
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 27           |
|    time_elapsed         | 155          |
|    total_timesteps      | 20736        |
| train/                  |              |
|    approx_kl            | 0.0068770032 |
|    clip_fraction        | 0.107        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.884       |
|    explained_variance   | 0.0341       |
|    learning_rate        | 0.0005       |
|    loss                 | 2.24         |
|    n_updates            | 260          |
|    policy_gradient_loss | -0.00732     |
|    value_loss           | 5.24         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.8        |
|    ep_rew_mean          | 9.53        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 28          |
|    time_elapsed         | 161         |
|    total_timesteps      | 21504       |
| train/                  |             |
|    approx_kl            | 0.010572296 |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.913      |
|    explained_variance   | 0.0671      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.54        |
|    n_updates            | 270         |
|    policy_gradient_loss | -0.0113     |
|    value_loss           | 4.64        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2        |
|    ep_rew_mean          | 9.91        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 29          |
|    time_elapsed         | 166         |
|    total_timesteps      | 22272       |
| train/                  |             |
|    approx_kl            | 0.013790101 |
|    clip_fraction        | 0.154       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.909      |
|    explained_variance   | 0.0336      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.07        |
|    n_updates            | 280         |
|    policy_gradient_loss | -0.0113     |
|    value_loss           | 5.18        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.1        |
|    ep_rew_mean          | 10.6        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 30          |
|    time_elapsed         | 172         |
|    total_timesteps      | 23040       |
| train/                  |             |
|    approx_kl            | 0.006180758 |
|    clip_fraction        | 0.114       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.871      |
|    explained_variance   | 0.05        |
|    learning_rate        | 0.0005      |
|    loss                 | 2.58        |
|    n_updates            | 290         |
|    policy_gradient_loss | -0.00846    |
|    value_loss           | 5.08        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.8        |
|    ep_rew_mean          | 9.38        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 31          |
|    time_elapsed         | 177         |
|    total_timesteps      | 23808       |
| train/                  |             |
|    approx_kl            | 0.016818495 |
|    clip_fraction        | 0.134       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.838      |
|    explained_variance   | 0.0391      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.82        |
|    n_updates            | 300         |
|    policy_gradient_loss | -0.00744    |
|    value_loss           | 5.08        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11          |
|    ep_rew_mean          | 9.62        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 32          |
|    time_elapsed         | 183         |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.009060604 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.866      |
|    explained_variance   | 0.0455      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.56        |
|    n_updates            | 310         |
|    policy_gradient_loss | -0.0104     |
|    value_loss           | 4.87        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.9        |
|    ep_rew_mean          | 10.4        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 33          |
|    time_elapsed         | 189         |
|    total_timesteps      | 25344       |
| train/                  |             |
|    approx_kl            | 0.009071275 |
|    clip_fraction        | 0.145       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.849      |
|    explained_variance   | 0.0471      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.3         |
|    n_updates            | 320         |
|    policy_gradient_loss | -0.013      |
|    value_loss           | 4.83        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 10.4        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 34          |
|    time_elapsed         | 195         |
|    total_timesteps      | 26112       |
| train/                  |             |
|    approx_kl            | 0.011677337 |
|    clip_fraction        | 0.135       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.869      |
|    explained_variance   | 0.0402      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.58        |
|    n_updates            | 330         |
|    policy_gradient_loss | -0.00735    |
|    value_loss           | 5.2         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 9.93        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 35          |
|    time_elapsed         | 201         |
|    total_timesteps      | 26880       |
| train/                  |             |
|    approx_kl            | 0.009530999 |
|    clip_fraction        | 0.126       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.878      |
|    explained_variance   | 0.059       |
|    learning_rate        | 0.0005      |
|    loss                 | 2.62        |
|    n_updates            | 340         |
|    policy_gradient_loss | -0.00839    |
|    value_loss           | 5.24        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.9        |
|    ep_rew_mean          | 10.4        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 36          |
|    time_elapsed         | 206         |
|    total_timesteps      | 27648       |
| train/                  |             |
|    approx_kl            | 0.010975479 |
|    clip_fraction        | 0.101       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.885      |
|    explained_variance   | 0.0293      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.24        |
|    n_updates            | 350         |
|    policy_gradient_loss | -0.00893    |
|    value_loss           | 5.05        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.4        |
|    ep_rew_mean          | 10.8        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 37          |
|    time_elapsed         | 212         |
|    total_timesteps      | 28416       |
| train/                  |             |
|    approx_kl            | 0.013531726 |
|    clip_fraction        | 0.137       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.829      |
|    explained_variance   | 0.0375      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.56        |
|    n_updates            | 360         |
|    policy_gradient_loss | -0.0131     |
|    value_loss           | 5.18        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.2        |
|    ep_rew_mean          | 10.5        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 38          |
|    time_elapsed         | 218         |
|    total_timesteps      | 29184       |
| train/                  |             |
|    approx_kl            | 0.008551791 |
|    clip_fraction        | 0.0958      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.848      |
|    explained_variance   | 0.052       |
|    learning_rate        | 0.0005      |
|    loss                 | 1.82        |
|    n_updates            | 370         |
|    policy_gradient_loss | -0.00611    |
|    value_loss           | 4.78        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 11.7         |
|    ep_rew_mean          | 9.92         |
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 39           |
|    time_elapsed         | 224          |
|    total_timesteps      | 29952        |
| train/                  |              |
|    approx_kl            | 0.0124066295 |
|    clip_fraction        | 0.154        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.819       |
|    explained_variance   | 0.046        |
|    learning_rate        | 0.0005       |
|    loss                 | 2.75         |
|    n_updates            | 380          |
|    policy_gradient_loss | -0.0144      |
|    value_loss           | 4.94         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 10.2        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 40          |
|    time_elapsed         | 230         |
|    total_timesteps      | 30720       |
| train/                  |             |
|    approx_kl            | 0.013906061 |
|    clip_fraction        | 0.159       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.812      |
|    explained_variance   | 0.0516      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.77        |
|    n_updates            | 390         |
|    policy_gradient_loss | -0.00814    |
|    value_loss           | 4.93        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 10.1        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 41          |
|    time_elapsed         | 235         |
|    total_timesteps      | 31488       |
| train/                  |             |
|    approx_kl            | 0.007769923 |
|    clip_fraction        | 0.114       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.797      |
|    explained_variance   | 0.0521      |
|    learning_rate        | 0.0005      |
|    loss                 | 2           |
|    n_updates            | 400         |
|    policy_gradient_loss | -0.00347    |
|    value_loss           | 4.86        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13          |
|    ep_rew_mean          | 11          |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 42          |
|    time_elapsed         | 241         |
|    total_timesteps      | 32256       |
| train/                  |             |
|    approx_kl            | 0.011463185 |
|    clip_fraction        | 0.116       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.773      |
|    explained_variance   | 0.0627      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.42        |
|    n_updates            | 410         |
|    policy_gradient_loss | -0.00949    |
|    value_loss           | 4.91        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.8        |
|    ep_rew_mean          | 10.8        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 43          |
|    time_elapsed         | 246         |
|    total_timesteps      | 33024       |
| train/                  |             |
|    approx_kl            | 0.013770991 |
|    clip_fraction        | 0.113       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.823      |
|    explained_variance   | 0.0676      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.14        |
|    n_updates            | 420         |
|    policy_gradient_loss | -0.00724    |
|    value_loss           | 4.68        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.3        |
|    ep_rew_mean          | 11.3        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 44          |
|    time_elapsed         | 252         |
|    total_timesteps      | 33792       |
| train/                  |             |
|    approx_kl            | 0.009420588 |
|    clip_fraction        | 0.0857      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.792      |
|    explained_variance   | 0.0684      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.41        |
|    n_updates            | 430         |
|    policy_gradient_loss | -0.0109     |
|    value_loss           | 4.65        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.8       |
|    ep_rew_mean          | 10         |
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 45         |
|    time_elapsed         | 258        |
|    total_timesteps      | 34560      |
| train/                  |            |
|    approx_kl            | 0.01179447 |
|    clip_fraction        | 0.115      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.711     |
|    explained_variance   | 0.0765     |
|    learning_rate        | 0.0005     |
|    loss                 | 2.67       |
|    n_updates            | 440        |
|    policy_gradient_loss | -0.00906   |
|    value_loss           | 4.76       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13          |
|    ep_rew_mean          | 11.2        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 46          |
|    time_elapsed         | 264         |
|    total_timesteps      | 35328       |
| train/                  |             |
|    approx_kl            | 0.009889281 |
|    clip_fraction        | 0.113       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.742      |
|    explained_variance   | 0.0443      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.43        |
|    n_updates            | 450         |
|    policy_gradient_loss | -0.011      |
|    value_loss           | 5.37        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.8        |
|    ep_rew_mean          | 11.9        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 47          |
|    time_elapsed         | 269         |
|    total_timesteps      | 36096       |
| train/                  |             |
|    approx_kl            | 0.016504431 |
|    clip_fraction        | 0.135       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.748      |
|    explained_variance   | 0.0546      |
|    learning_rate        | 0.0005      |
|    loss                 | 1.89        |
|    n_updates            | 460         |
|    policy_gradient_loss | -0.0153     |
|    value_loss           | 4.49        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.9       |
|    ep_rew_mean          | 11.2       |
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 48         |
|    time_elapsed         | 275        |
|    total_timesteps      | 36864      |
| train/                  |            |
|    approx_kl            | 0.01982482 |
|    clip_fraction        | 0.131      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.761     |
|    explained_variance   | 0.0577     |
|    learning_rate        | 0.0005     |
|    loss                 | 2.39       |
|    n_updates            | 470        |
|    policy_gradient_loss | -0.0102    |
|    value_loss           | 4.81       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.4        |
|    ep_rew_mean          | 11.4        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 49          |
|    time_elapsed         | 281         |
|    total_timesteps      | 37632       |
| train/                  |             |
|    approx_kl            | 0.012019389 |
|    clip_fraction        | 0.168       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.745      |
|    explained_variance   | 0.0438      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.94        |
|    n_updates            | 480         |
|    policy_gradient_loss | -0.0107     |
|    value_loss           | 5.21        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 14.5         |
|    ep_rew_mean          | 12.3         |
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 50           |
|    time_elapsed         | 286          |
|    total_timesteps      | 38400        |
| train/                  |              |
|    approx_kl            | 0.0091495635 |
|    clip_fraction        | 0.111        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.71        |
|    explained_variance   | 0.07         |
|    learning_rate        | 0.0005       |
|    loss                 | 2.21         |
|    n_updates            | 490          |
|    policy_gradient_loss | -0.0107      |
|    value_loss           | 4.95         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.5        |
|    ep_rew_mean          | 11.4        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 51          |
|    time_elapsed         | 292         |
|    total_timesteps      | 39168       |
| train/                  |             |
|    approx_kl            | 0.016189516 |
|    clip_fraction        | 0.152       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.699      |
|    explained_variance   | 0.0588      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.26        |
|    n_updates            | 500         |
|    policy_gradient_loss | -0.00871    |
|    value_loss           | 4.86        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.3        |
|    ep_rew_mean          | 11.3        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 52          |
|    time_elapsed         | 298         |
|    total_timesteps      | 39936       |
| train/                  |             |
|    approx_kl            | 0.017363762 |
|    clip_fraction        | 0.135       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.723      |
|    explained_variance   | 0.0529      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.16        |
|    n_updates            | 510         |
|    policy_gradient_loss | -0.00327    |
|    value_loss           | 4.74        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.7        |
|    ep_rew_mean          | 10.9        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 53          |
|    time_elapsed         | 303         |
|    total_timesteps      | 40704       |
| train/                  |             |
|    approx_kl            | 0.011583403 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.764      |
|    explained_variance   | 0.0659      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.03        |
|    n_updates            | 520         |
|    policy_gradient_loss | 0.0031      |
|    value_loss           | 4.65        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 12.9         |
|    ep_rew_mean          | 10.8         |
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 54           |
|    time_elapsed         | 309          |
|    total_timesteps      | 41472        |
| train/                  |              |
|    approx_kl            | 0.0139115155 |
|    clip_fraction        | 0.159        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.748       |
|    explained_variance   | 0.0652       |
|    learning_rate        | 0.0005       |
|    loss                 | 2            |
|    n_updates            | 530          |
|    policy_gradient_loss | -0.015       |
|    value_loss           | 4.74         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13          |
|    ep_rew_mean          | 10.7        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 55          |
|    time_elapsed         | 315         |
|    total_timesteps      | 42240       |
| train/                  |             |
|    approx_kl            | 0.020614024 |
|    clip_fraction        | 0.143       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.778      |
|    explained_variance   | 0.0716      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.22        |
|    n_updates            | 540         |
|    policy_gradient_loss | -0.0058     |
|    value_loss           | 4.61        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.9       |
|    ep_rew_mean          | 9.81       |
| time/                   |            |
|    fps                  | 133        |
|    iterations           | 56         |
|    time_elapsed         | 321        |
|    total_timesteps      | 43008      |
| train/                  |            |
|    approx_kl            | 0.01641938 |
|    clip_fraction        | 0.156      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.667     |
|    explained_variance   | 0.0624     |
|    learning_rate        | 0.0005     |
|    loss                 | 1.98       |
|    n_updates            | 550        |
|    policy_gradient_loss | -0.0127    |
|    value_loss           | 4.42       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.9        |
|    ep_rew_mean          | 10.9        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 57          |
|    time_elapsed         | 327         |
|    total_timesteps      | 43776       |
| train/                  |             |
|    approx_kl            | 0.011246432 |
|    clip_fraction        | 0.113       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.637      |
|    explained_variance   | 0.0769      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.27        |
|    n_updates            | 560         |
|    policy_gradient_loss | -0.0107     |
|    value_loss           | 4.76        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.3        |
|    ep_rew_mean          | 12.1        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 58          |
|    time_elapsed         | 332         |
|    total_timesteps      | 44544       |
| train/                  |             |
|    approx_kl            | 0.011581411 |
|    clip_fraction        | 0.121       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.618      |
|    explained_variance   | 0.0851      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.05        |
|    n_updates            | 570         |
|    policy_gradient_loss | -0.0116     |
|    value_loss           | 4.42        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.6        |
|    ep_rew_mean          | 12.2        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 59          |
|    time_elapsed         | 338         |
|    total_timesteps      | 45312       |
| train/                  |             |
|    approx_kl            | 0.013920876 |
|    clip_fraction        | 0.108       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.612      |
|    explained_variance   | 0.0663      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.4         |
|    n_updates            | 580         |
|    policy_gradient_loss | -0.00275    |
|    value_loss           | 4.58        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.6        |
|    ep_rew_mean          | 11.4        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 60          |
|    time_elapsed         | 344         |
|    total_timesteps      | 46080       |
| train/                  |             |
|    approx_kl            | 0.010201397 |
|    clip_fraction        | 0.133       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.612      |
|    explained_variance   | 0.0656      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.77        |
|    n_updates            | 590         |
|    policy_gradient_loss | -0.00824    |
|    value_loss           | 5.04        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.5        |
|    ep_rew_mean          | 11.3        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 61          |
|    time_elapsed         | 349         |
|    total_timesteps      | 46848       |
| train/                  |             |
|    approx_kl            | 0.009642946 |
|    clip_fraction        | 0.088       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.583      |
|    explained_variance   | 0.105       |
|    learning_rate        | 0.0005      |
|    loss                 | 1.98        |
|    n_updates            | 600         |
|    policy_gradient_loss | -0.0056     |
|    value_loss           | 4.63        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.3        |
|    ep_rew_mean          | 12.2        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 62          |
|    time_elapsed         | 355         |
|    total_timesteps      | 47616       |
| train/                  |             |
|    approx_kl            | 0.014387942 |
|    clip_fraction        | 0.11        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.571      |
|    explained_variance   | 0.0659      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.42        |
|    n_updates            | 610         |
|    policy_gradient_loss | -0.00815    |
|    value_loss           | 4.51        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.6        |
|    ep_rew_mean          | 12.4        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 63          |
|    time_elapsed         | 361         |
|    total_timesteps      | 48384       |
| train/                  |             |
|    approx_kl            | 0.013205201 |
|    clip_fraction        | 0.124       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.537      |
|    explained_variance   | 0.102       |
|    learning_rate        | 0.0005      |
|    loss                 | 2.39        |
|    n_updates            | 620         |
|    policy_gradient_loss | 0.00226     |
|    value_loss           | 4.66        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 15.4        |
|    ep_rew_mean          | 12.9        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 64          |
|    time_elapsed         | 366         |
|    total_timesteps      | 49152       |
| train/                  |             |
|    approx_kl            | 0.017984098 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.588      |
|    explained_variance   | 0.097       |
|    learning_rate        | 0.0005      |
|    loss                 | 1.65        |
|    n_updates            | 630         |
|    policy_gradient_loss | -0.0109     |
|    value_loss           | 4.22        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 15.2        |
|    ep_rew_mean          | 12.6        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 65          |
|    time_elapsed         | 372         |
|    total_timesteps      | 49920       |
| train/                  |             |
|    approx_kl            | 0.009611509 |
|    clip_fraction        | 0.0785      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.555      |
|    explained_variance   | 0.0684      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.63        |
|    n_updates            | 640         |
|    policy_gradient_loss | -0.00433    |
|    value_loss           | 4.34        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 15.2        |
|    ep_rew_mean          | 12.4        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 66          |
|    time_elapsed         | 378         |
|    total_timesteps      | 50688       |
| train/                  |             |
|    approx_kl            | 0.010471691 |
|    clip_fraction        | 0.0952      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.608      |
|    explained_variance   | 0.119       |
|    learning_rate        | 0.0005      |
|    loss                 | 2.42        |
|    n_updates            | 650         |
|    policy_gradient_loss | -0.00999    |
|    value_loss           | 4.53        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 15.2         |
|    ep_rew_mean          | 12.5         |
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 67           |
|    time_elapsed         | 384          |
|    total_timesteps      | 51456        |
| train/                  |              |
|    approx_kl            | 0.0058284937 |
|    clip_fraction        | 0.105        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.57        |
|    explained_variance   | 0.0803       |
|    learning_rate        | 0.0005       |
|    loss                 | 2.64         |
|    n_updates            | 660          |
|    policy_gradient_loss | -0.00567     |
|    value_loss           | 4.25         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 15.7         |
|    ep_rew_mean          | 13           |
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 68           |
|    time_elapsed         | 389          |
|    total_timesteps      | 52224        |
| train/                  |              |
|    approx_kl            | 0.0062502734 |
|    clip_fraction        | 0.0914       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.569       |
|    explained_variance   | 0.083        |
|    learning_rate        | 0.0005       |
|    loss                 | 2.2          |
|    n_updates            | 670          |
|    policy_gradient_loss | -0.00738     |
|    value_loss           | 4.42         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.6        |
|    ep_rew_mean          | 13.7        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 69          |
|    time_elapsed         | 395         |
|    total_timesteps      | 52992       |
| train/                  |             |
|    approx_kl            | 0.010264453 |
|    clip_fraction        | 0.0977      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.579      |
|    explained_variance   | 0.0452      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.09        |
|    n_updates            | 680         |
|    policy_gradient_loss | -0.0104     |
|    value_loss           | 4.41        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.2        |
|    ep_rew_mean          | 13.3        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 70          |
|    time_elapsed         | 400         |
|    total_timesteps      | 53760       |
| train/                  |             |
|    approx_kl            | 0.009289461 |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.596      |
|    explained_variance   | 0.118       |
|    learning_rate        | 0.0005      |
|    loss                 | 2.05        |
|    n_updates            | 690         |
|    policy_gradient_loss | -0.00775    |
|    value_loss           | 4.53        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.2        |
|    ep_rew_mean          | 13.3        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 71          |
|    time_elapsed         | 406         |
|    total_timesteps      | 54528       |
| train/                  |             |
|    approx_kl            | 0.010913826 |
|    clip_fraction        | 0.132       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.509      |
|    explained_variance   | 0.0908      |
|    learning_rate        | 0.0005      |
|    loss                 | 1.51        |
|    n_updates            | 700         |
|    policy_gradient_loss | -0.00868    |
|    value_loss           | 4.34        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.4        |
|    ep_rew_mean          | 13.9        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 72          |
|    time_elapsed         | 412         |
|    total_timesteps      | 55296       |
| train/                  |             |
|    approx_kl            | 0.015662806 |
|    clip_fraction        | 0.126       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.535      |
|    explained_variance   | 0.106       |
|    learning_rate        | 0.0005      |
|    loss                 | 2.01        |
|    n_updates            | 710         |
|    policy_gradient_loss | -0.00533    |
|    value_loss           | 4.7         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.3        |
|    ep_rew_mean          | 12.3        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 73          |
|    time_elapsed         | 418         |
|    total_timesteps      | 56064       |
| train/                  |             |
|    approx_kl            | 0.010724665 |
|    clip_fraction        | 0.108       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.587      |
|    explained_variance   | 0.0749      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.53        |
|    n_updates            | 720         |
|    policy_gradient_loss | -0.00573    |
|    value_loss           | 5.08        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 16.1         |
|    ep_rew_mean          | 13.9         |
| time/                   |              |
|    fps                  | 133          |
|    iterations           | 74           |
|    time_elapsed         | 424          |
|    total_timesteps      | 56832        |
| train/                  |              |
|    approx_kl            | 0.0073100016 |
|    clip_fraction        | 0.109        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.575       |
|    explained_variance   | 0.117        |
|    learning_rate        | 0.0005       |
|    loss                 | 1.57         |
|    n_updates            | 730          |
|    policy_gradient_loss | -0.00664     |
|    value_loss           | 4.53         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.5        |
|    ep_rew_mean          | 14.1        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 75          |
|    time_elapsed         | 429         |
|    total_timesteps      | 57600       |
| train/                  |             |
|    approx_kl            | 0.010380663 |
|    clip_fraction        | 0.0962      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.63       |
|    explained_variance   | 0.0899      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.52        |
|    n_updates            | 740         |
|    policy_gradient_loss | -0.00969    |
|    value_loss           | 4.6         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.4        |
|    ep_rew_mean          | 14          |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 76          |
|    time_elapsed         | 435         |
|    total_timesteps      | 58368       |
| train/                  |             |
|    approx_kl            | 0.010034574 |
|    clip_fraction        | 0.116       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.601      |
|    explained_variance   | 0.0754      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.07        |
|    n_updates            | 750         |
|    policy_gradient_loss | -0.00685    |
|    value_loss           | 4.84        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.8        |
|    ep_rew_mean          | 12.6        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 77          |
|    time_elapsed         | 441         |
|    total_timesteps      | 59136       |
| train/                  |             |
|    approx_kl            | 0.011273336 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.571      |
|    explained_variance   | 0.11        |
|    learning_rate        | 0.0005      |
|    loss                 | 2.64        |
|    n_updates            | 760         |
|    policy_gradient_loss | -0.00706    |
|    value_loss           | 4.53        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.7        |
|    ep_rew_mean          | 12.8        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 78          |
|    time_elapsed         | 446         |
|    total_timesteps      | 59904       |
| train/                  |             |
|    approx_kl            | 0.005597891 |
|    clip_fraction        | 0.0949      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.575      |
|    explained_variance   | 0.101       |
|    learning_rate        | 0.0005      |
|    loss                 | 1.96        |
|    n_updates            | 770         |
|    policy_gradient_loss | -0.0072     |
|    value_loss           | 4.56        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.2        |
|    ep_rew_mean          | 14.1        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 79          |
|    time_elapsed         | 452         |
|    total_timesteps      | 60672       |
| train/                  |             |
|    approx_kl            | 0.009416361 |
|    clip_fraction        | 0.0904      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.548      |
|    explained_variance   | 0.0782      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.56        |
|    n_updates            | 780         |
|    policy_gradient_loss | -0.0012     |
|    value_loss           | 5.21        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.7        |
|    ep_rew_mean          | 12.9        |
| time/                   |             |
|    fps                  | 133         |
|    iterations           | 80          |
|    time_elapsed         | 458         |
|    total_timesteps      | 61440       |
| train/                  |             |
|    approx_kl            | 0.018229231 |
|    clip_fraction        | 0.0993      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.57       |
|    explained_variance   | 0.0601      |
|    learning_rate        | 0.0005      |
|    loss                 | 3.58        |
|    n_updates            | 790         |
|    policy_gradient_loss | -0.00279    |
|    value_loss           | 5.64        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 15.1       |
|    ep_rew_mean          | 13.1       |
| time/                   |            |
|    fps                  | 134        |
|    iterations           | 81         |
|    time_elapsed         | 463        |
|    total_timesteps      | 62208      |
| train/                  |            |
|    approx_kl            | 0.01402374 |
|    clip_fraction        | 0.121      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.578     |
|    explained_variance   | 0.0651     |
|    learning_rate        | 0.0005     |
|    loss                 | 2.2        |
|    n_updates            | 800        |
|    policy_gradient_loss | -0.00922   |
|    value_loss           | 5.29       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 15.7       |
|    ep_rew_mean          | 13.6       |
| time/                   |            |
|    fps                  | 134        |
|    iterations           | 82         |
|    time_elapsed         | 469        |
|    total_timesteps      | 62976      |
| train/                  |            |
|    approx_kl            | 0.01049623 |
|    clip_fraction        | 0.116      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.538     |
|    explained_variance   | 0.0709     |
|    learning_rate        | 0.0005     |
|    loss                 | 2.98       |
|    n_updates            | 810        |
|    policy_gradient_loss | -0.00337   |
|    value_loss           | 5.02       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 14.6       |
|    ep_rew_mean          | 12.6       |
| time/                   |            |
|    fps                  | 134        |
|    iterations           | 83         |
|    time_elapsed         | 475        |
|    total_timesteps      | 63744      |
| train/                  |            |
|    approx_kl            | 0.01318634 |
|    clip_fraction        | 0.0934     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.539     |
|    explained_variance   | 0.116      |
|    learning_rate        | 0.0005     |
|    loss                 | 2.64       |
|    n_updates            | 820        |
|    policy_gradient_loss | -0.00142   |
|    value_loss           | 5.24       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.7        |
|    ep_rew_mean          | 12.7        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 84          |
|    time_elapsed         | 481         |
|    total_timesteps      | 64512       |
| train/                  |             |
|    approx_kl            | 0.010047531 |
|    clip_fraction        | 0.0988      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.542      |
|    explained_variance   | 0.154       |
|    learning_rate        | 0.0005      |
|    loss                 | 2.34        |
|    n_updates            | 830         |
|    policy_gradient_loss | -0.005      |
|    value_loss           | 4.47        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.2        |
|    ep_rew_mean          | 14.1        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 85          |
|    time_elapsed         | 486         |
|    total_timesteps      | 65280       |
| train/                  |             |
|    approx_kl            | 0.010109515 |
|    clip_fraction        | 0.109       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.559      |
|    explained_variance   | 0.118       |
|    learning_rate        | 0.0005      |
|    loss                 | 3.12        |
|    n_updates            | 840         |
|    policy_gradient_loss | -0.00445    |
|    value_loss           | 5.11        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 16           |
|    ep_rew_mean          | 14           |
| time/                   |              |
|    fps                  | 134          |
|    iterations           | 86           |
|    time_elapsed         | 491          |
|    total_timesteps      | 66048        |
| train/                  |              |
|    approx_kl            | 0.0069563533 |
|    clip_fraction        | 0.0957       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.578       |
|    explained_variance   | 0.109        |
|    learning_rate        | 0.0005       |
|    loss                 | 2.84         |
|    n_updates            | 850          |
|    policy_gradient_loss | -0.00629     |
|    value_loss           | 5.05         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.5        |
|    ep_rew_mean          | 12.7        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 87          |
|    time_elapsed         | 497         |
|    total_timesteps      | 66816       |
| train/                  |             |
|    approx_kl            | 0.012084653 |
|    clip_fraction        | 0.102       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.592      |
|    explained_variance   | 0.0899      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.87        |
|    n_updates            | 860         |
|    policy_gradient_loss | -0.00452    |
|    value_loss           | 5.38        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.5        |
|    ep_rew_mean          | 11.9        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 88          |
|    time_elapsed         | 503         |
|    total_timesteps      | 67584       |
| train/                  |             |
|    approx_kl            | 0.012747186 |
|    clip_fraction        | 0.105       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.557      |
|    explained_variance   | 0.128       |
|    learning_rate        | 0.0005      |
|    loss                 | 2.4         |
|    n_updates            | 870         |
|    policy_gradient_loss | -0.00895    |
|    value_loss           | 5.41        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.8        |
|    ep_rew_mean          | 13.3        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 89          |
|    time_elapsed         | 509         |
|    total_timesteps      | 68352       |
| train/                  |             |
|    approx_kl            | 0.022016773 |
|    clip_fraction        | 0.127       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.552      |
|    explained_variance   | 0.12        |
|    learning_rate        | 0.0005      |
|    loss                 | 2.22        |
|    n_updates            | 880         |
|    policy_gradient_loss | -0.00356    |
|    value_loss           | 5.32        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 15.7        |
|    ep_rew_mean          | 14.1        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 90          |
|    time_elapsed         | 515         |
|    total_timesteps      | 69120       |
| train/                  |             |
|    approx_kl            | 0.017738516 |
|    clip_fraction        | 0.129       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.525      |
|    explained_variance   | 0.113       |
|    learning_rate        | 0.0005      |
|    loss                 | 2.47        |
|    n_updates            | 890         |
|    policy_gradient_loss | -0.00932    |
|    value_loss           | 4.98        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 17.3        |
|    ep_rew_mean          | 15.7        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 91          |
|    time_elapsed         | 521         |
|    total_timesteps      | 69888       |
| train/                  |             |
|    approx_kl            | 0.010678458 |
|    clip_fraction        | 0.0945      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.527      |
|    explained_variance   | 0.0703      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.57        |
|    n_updates            | 900         |
|    policy_gradient_loss | -0.0101     |
|    value_loss           | 5.69        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.1        |
|    ep_rew_mean          | 14.5        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 92          |
|    time_elapsed         | 525         |
|    total_timesteps      | 70656       |
| train/                  |             |
|    approx_kl            | 0.010068656 |
|    clip_fraction        | 0.0934      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.554      |
|    explained_variance   | 0.104       |
|    learning_rate        | 0.0005      |
|    loss                 | 2.94        |
|    n_updates            | 910         |
|    policy_gradient_loss | -0.00301    |
|    value_loss           | 5.46        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.6        |
|    ep_rew_mean          | 13          |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 93          |
|    time_elapsed         | 531         |
|    total_timesteps      | 71424       |
| train/                  |             |
|    approx_kl            | 0.022640051 |
|    clip_fraction        | 0.097       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.544      |
|    explained_variance   | 0.105       |
|    learning_rate        | 0.0005      |
|    loss                 | 3.01        |
|    n_updates            | 920         |
|    policy_gradient_loss | -0.00295    |
|    value_loss           | 5.47        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 15.2        |
|    ep_rew_mean          | 13.4        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 94          |
|    time_elapsed         | 537         |
|    total_timesteps      | 72192       |
| train/                  |             |
|    approx_kl            | 0.010189914 |
|    clip_fraction        | 0.102       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.539      |
|    explained_variance   | 0.0966      |
|    learning_rate        | 0.0005      |
|    loss                 | 2.46        |
|    n_updates            | 930         |
|    policy_gradient_loss | -0.00865    |
|    value_loss           | 5.25        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.1        |
|    ep_rew_mean          | 14.2        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 95          |
|    time_elapsed         | 543         |
|    total_timesteps      | 72960       |
| train/                  |             |
|    approx_kl            | 0.011785704 |
|    clip_fraction        | 0.0953      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.569      |
|    explained_variance   | 0.14        |
|    learning_rate        | 0.0005      |
|    loss                 | 2.47        |
|    n_updates            | 940         |
|    policy_gradient_loss | -0.00568    |
|    value_loss           | 4.96        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 17.9        |
|    ep_rew_mean          | 15.9        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 96          |
|    time_elapsed         | 549         |
|    total_timesteps      | 73728       |
| train/                  |             |
|    approx_kl            | 0.014611352 |
|    clip_fraction        | 0.0999      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.596      |
|    explained_variance   | 0.101       |
|    learning_rate        | 0.0005      |
|    loss                 | 1.89        |
|    n_updates            | 950         |
|    policy_gradient_loss | -0.0064     |
|    value_loss           | 5.21        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 18.5        |
|    ep_rew_mean          | 16.6        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 97          |
|    time_elapsed         | 554         |
|    total_timesteps      | 74496       |
| train/                  |             |
|    approx_kl            | 0.014690272 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.576      |
|    explained_variance   | 0.106       |
|    learning_rate        | 0.0005      |
|    loss                 | 3.04        |
|    n_updates            | 960         |
|    policy_gradient_loss | -0.00424    |
|    value_loss           | 5.19        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.5        |
|    ep_rew_mean          | 14.9        |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 98          |
|    time_elapsed         | 560         |
|    total_timesteps      | 75264       |
| train/                  |             |
|    approx_kl            | 0.013857766 |
|    clip_fraction        | 0.125       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.625      |
|    explained_variance   | 0.125       |
|    learning_rate        | 0.0005      |
|    loss                 | 3.05        |
|    n_updates            | 970         |
|    policy_gradient_loss | -0.012      |
|    value_loss           | 5.28        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 16.3       |
|    ep_rew_mean          | 14.9       |
| time/                   |            |
|    fps                  | 134        |
|    iterations           | 99         |
|    time_elapsed         | 565        |
|    total_timesteps      | 76032      |
| train/                  |            |
|    approx_kl            | 0.02237595 |
|    clip_fraction        | 0.111      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.58      |
|    explained_variance   | 0.163      |
|    learning_rate        | 0.0005     |
|    loss                 | 2.57       |
|    n_updates            | 980        |
|    policy_gradient_loss | -0.00561   |
|    value_loss           | 5.18       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.8        |
|    ep_rew_mean          | 15          |
| time/                   |             |
|    fps                  | 134         |
|    iterations           | 100         |
|    time_elapsed         | 571         |
|    total_timesteps      | 76800       |
| train/                  |             |
|    approx_kl            | 0.015567064 |
|    clip_fraction        | 0.113       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.582      |
|    explained_variance   | 0.111       |
|    learning_rate        | 0.0005      |
|    loss                 | 2.59        |
|    n_updates            | 990         |
|    policy_gradient_loss | -0.00742    |
|    value_loss           | 5.6         |
-----------------------------------------


In [None]:

model.save('ppo_model')

# Testing

In [None]:
model = PPO.load('ppo_model.zip')

In [None]:
env_mode = EnvMode.TERM_SLOW
env = make_env(train=False,env_mode=EnvMode.TERM_SLOW, verbose=0)
env.unwrapped.configure({
            "observation": {"type": "Kinematics"},
                "action": {
                    "type": "DiscreteMetaAction",
                },
                "lanes_count": 4,
                "vehicles_count": 50,
                "controlled_vehicles": 1,
                "initial_lane_id": None,
                "duration": 100,  # [s]
                "ego_spacing": 2,
                "vehicles_density": 1,
                "collision_reward": -.1,  # The reward received when colliding with a vehicle.
                "right_lane_reward": 0,  # The reward received when driving on the right-most lanes, linearly mapped to
                # zero for other lanes.
                "high_speed_reward": 0.,  # The reward received when driving at full speed, linearly mapped to zero for
                # lower speeds according to config["reward_speed_range"].
                "lane_change_reward": 0.,  # The reward received at each lane change action.
                "reward_speed_range": [20, 30],
                "normalize_reward": False,
                "offroad_terminal": False,
                "manual_control": True        
    })

obs, info = env.reset()
env.stl_driver.set_param('v_slow', 0.3)
env.stl_driver.set_param('v_fast', 0.35)
#wobs = env.wrapped_obs
for _ in range(100):    
    #action, _states = model.predict(wobs, deterministic=True)
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)    
    wobs= env.wrapped_obs

    if terminated:
        print('Crash')
        break    
env.close()

lay = """
 ego_x
 ego_slow
 ego_slow_too_long
"""
lay = utils.get_layout_from_string(lay)

width = 12
height = 4
fig, axs = plt.subplots(len(lay),1, figsize=(width, height))

idx_ax =0
for sig_list in lay:
    for sig in sig_list:
        if len(lay)>1:
            env.plot_signal(sig, axs[idx_ax])
        else:
            env.plot_signal(sig, axs)
    idx_ax +=1

In [None]:
print(model.observation_space)

In [None]:
env.close()

In [None]:

fig, ax = plt.subplots()
env.plot_signal("reward", ax)
# Show the plot with interactive features
plt.show()


In [None]:
def get_fig(env, signals_layout):
    lay = utils.get_layout_from_string(signals_layout)
    status = "Plot ok. Hit reset on top right if not visible."            
    #f= figure(height=200)
    figs = []
    colors = itertools.cycle(palette)    
    for signal_list in enumerate(lay):
        f=None
        for signal in signal_list[1]:                
            #try: 
                color=colors.__next__()                    
                #tr_idx = self.trace_idx
                print(signal.strip())
                if signal.strip().startswith("set_trace_idx(") or signal.strip().startswith("_tr("):            
                    tr_idx = int(signal.split('(')[1][:-1])                         
                    env.set_current_trace(tr_idx)                        
                else: 
                    if f is None:
                        if figs == []:
                            f = figure(height=200)
                        else:
                            f = figure(height=200, x_range=figs[0][0].x_range)
                        figs.append([f])
                    env.plot_signal(f, signal, color=color)
            #except:
            #     status = "Warning: error getting values for " + signal
    fig = gridplot(figs, sizing_mode='stretch_width')        
    
    return fig, status
                

In [None]:
def db_plot_signal(self, signal, fig=None,label=None,  color=None, online=False, horizon=0):
    # signal should be part of the "signal" declaration or a valid formula id 
     
        if self.stl_driver.data == []:
            raise ValueError("No data to plot.")
                 
        time = self.get_time()

        if signal in self.signals_map:
            signal_index = list(self.signals_map.keys()).index(signal)+1        
            sig_values = [s[signal_index] for s in self.stl_driver.data]
            if label is None:
                label=signal
        elif signal in self.formulas:
            sig_values = self.get_rob(signal, online=online,horizon=horizon)
            signal_index = self.formulas.index(signal)+len(self.signals_map)        
            if label is None:
                label=signal
        elif isinstance(signal, np.ndarray) and signal.shape == (len(self.get_time()),):
            sig_values = signal
        elif isinstance(signal, stlrom.Signal):
            pass
        else:
            try:
                sig_values = self.get_rob(signal, online=online,horizon=horizon)
            except Exception as e:
               raise ValueError(f"Name '{signal}' not in signals_map nor in parsed formulas")

        if fig is None:
            fig = figure(height=200)

        fig.xaxis.axis_label = "Time"
        print(sig_values)
        fig.step(time, sig_values)            
        
        
        return fig


In [None]:
f = figure(height=200)
db_plot_signal(env, "ego_x")


In [None]:
f.

In [None]:
fig, status = get_fig(env, lay)