In [1]:
# to access functions from root directory
import sys
sys.path.append('/data/ad181/RemoteDir/multilevel_ppo')

In [2]:
%matplotlib notebook
import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt
from copy import copy, deepcopy
from tqdm.notebook import trange, tqdm

import gym
from stable_baselines3.ppo import PPO, MlpPolicy
from stable_baselines3.ppo_multi_level import PPO_ML
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env.subproc_vec_multi_level_env import SubprocVecMultiLevelEnv
from stable_baselines3.common.envs.multi_level_ressim_env import MultiLevelRessimEnv
from stable_baselines3.common.logger import configure

from utils.custom_eval_callback import CustomEvalCallback, CustomEvalCallbackParallel
from utils.plot_functions import plot_learning
from utils.env_evaluate_functions import eval_actions

In [3]:
seed=1
case='ppo_1l'
data_dir='./data'
log_dir='./data/'+case

In [4]:
os.makedirs(data_dir, exist_ok=True)
os.makedirs(log_dir, exist_ok=True)

In [5]:
with open('../envs_params/env_data_v1/env_train_dict.pkl', 'rb') as input:
    env_ck_dict = pickle.load(input)

In [6]:
# generate dictionaries for env (env_dict_), n_steps (T_ml) and minibatch (M_ml) for `n_level`s
n_levels=1
fine_level = len(env_ck_dict)
env_dict_ = {}
for i,l in enumerate(range(fine_level-n_levels, fine_level)):
    print(i+1,'->',l+1)
    env_dict_[i+1] = env_ck_dict[l+1]

1 -> 4


In [None]:
for seed in range(1,4):
    if True: 
        print(f'seed {seed}')
        log_dir = './data/'+case+'/seed_'+str(seed)
        os.makedirs(log_dir, exist_ok=True)
        T = 100 # n_steps
        N = 50 # number of actors
        M = 500 # minibatch size
        I = 1200 # number of iterations
        K = 20 # number of epochs
        
        log_interval = I/120
        
        fine_level = len(env_dict_)
        
        print('generate callback ...')
        eval_callback = CustomEvalCallback( env_dict_[fine_level], 
                                            best_model_save_path=str(log_dir)+'/best_model', 
                                            n_eval_episodes=1,
                                            log_path=str(log_dir)+'/results_eval', 
                                            eval_freq=log_interval*T)
        
        print('vectorize environment ...')
        
        # generate PPO_ML parameters for MLMC analysis. 
        # we choose same n_steps and batch_size values on levels because only fine level values are used in the analysis
        env_dict = {}
        n_steps_dict = {}
        batch_size_dict = {}
        for env, level in zip(env_dict_.values(), env_dict_.keys()):
            print(f"vectorize env level {level}")
            env_dict[level] = make_vec_env( MultiLevelRessimEnv, 
                                    n_envs=N, 
                                    seed=seed, 
                                    env_kwargs= {"ressim_params":env.ressim_params, "level":env.level}, 
                                    vec_env_cls=SubprocVecMultiLevelEnv )
            n_steps_dict[level] = T
            batch_size_dict[level] = M
        
        print(env_dict_[level].observation_space)
        print('model definition ..')
        model = PPO_ML(policy=MlpPolicy,
                           env=env_dict,
                           learning_rate = 1e-6,
                           n_steps = n_steps_dict,
                           batch_size = batch_size_dict,
                           n_epochs = K,
                           clip_range = 0.1,
                           ent_coef = 0.001,
                           vf_coef = 0.5,
                           policy_kwargs = dict(net_arch=[70,70,50], log_std_init=-2.9),
                           verbose = 1,
                           seed = seed,
                           target_kl = 0.05,
                           device = "auto")
        # set logger for the model
        new_logger = configure(log_dir)
        model.set_logger(new_logger)
        print('policy learning ..')
        model.learn(total_timesteps=N*T*I, callback=eval_callback)
        model.save(log_dir+'/PPO', exclude=['env_dict'])
        del model
        for level in env_dict.keys():
            env_dict[level].close()


seed 1
generate callback ...
vectorize environment ...
vectorize env level 1
Box(-1.0, 1.0, (35,), float64)
model definition ..
Using cuda device
Logging to ./data/ppo_1l/seed_1
policy learning ..




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5        |
|    ep_rew_mean     | 0.679    |
| time/              |          |
|    fps             | 56       |
|    iterations      | 1        |
|    time_elapsed    | 88       |
|    total_timesteps | 5000     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.683       |
| time/                   |             |
|    fps                  | 61          |
|    iterations           | 2           |
|    time_elapsed         | 163         |
|    total_timesteps      | 10000       |
| train/                  |             |
|    approx_kl            | 0.001055611 |
|    clip_fraction        | 0.0089      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.1       |
|    explained_variance   | -7.87       |
|    learning_rate        | 1e

  for j in range(len(p_1)-1):


Eval num_timesteps=50000, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.686        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.678        |
| time/                   |              |
|    fps                  | 64           |
|    iterations           | 10           |
|    time_elapsed         | 778          |
|    total_timesteps      | 50000        |
| train/                  |              |
|    approx_kl            | 0.0010675994 |
|    clip_fraction        | 0.00679      |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -3.92        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.125        |
|    n_updates            | 180  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.686        |
| time/                   |              |
|    fps                  | 65           |
|    iterations           | 19           |
|    time_elapsed         | 1447         |
|    total_timesteps      | 95000        |
| train/                  |              |
|    approx_kl            | 0.0013296006 |
|    clip_fraction        | 0.0136       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -2.22        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0978       |
|    n_updates            | 360          |
|    policy_gradient_loss | -0.00443     |
|    std                  | 0.055        |
|    value_loss           | 0.145        |
------------------------------------------
Eval num_timesteps=100000, episode_reward=0.70 +/- 0.0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.684        |
| time/                   |              |
|    fps                  | 65           |
|    iterations           | 28           |
|    time_elapsed         | 2135         |
|    total_timesteps      | 140000       |
| train/                  |              |
|    approx_kl            | 0.0010838854 |
|    clip_fraction        | 0.00797      |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -1.36        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0729       |
|    n_updates            | 540          |
|    policy_gradient_loss | -0.00412     |
|    std                  | 0.055        |
|    value_loss           | 0.107        |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.685        |
| time/                   |              |
|    fps                  | 65           |
|    iterations           | 37           |
|    time_elapsed         | 2821         |
|    total_timesteps      | 185000       |
| train/                  |              |
|    approx_kl            | 0.0010381336 |
|    clip_fraction        | 0.00634      |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -0.835       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0639       |
|    n_updates            | 720          |
|    policy_gradient_loss | -0.00366     |
|    std                  | 0.055        |
|    value_loss           | 0.0826       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.688        |
| time/                   |              |
|    fps                  | 65           |
|    iterations           | 46           |
|    time_elapsed         | 3500         |
|    total_timesteps      | 230000       |
| train/                  |              |
|    approx_kl            | 0.0012352491 |
|    clip_fraction        | 0.00782      |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -0.395       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0515       |
|    n_updates            | 900          |
|    policy_gradient_loss | -0.00404     |
|    std                  | 0.055        |
|    value_loss           | 0.0628       |
------------------------------------------
-----------------------------------------
| rollout/  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.687      |
| time/                   |            |
|    fps                  | 65         |
|    iterations           | 55         |
|    time_elapsed         | 4168       |
|    total_timesteps      | 275000     |
| train/                  |            |
|    approx_kl            | 0.00110775 |
|    clip_fraction        | 0.0105     |
|    clip_range           | 0.1        |
|    entropy_loss         | -31.1      |
|    explained_variance   | -0.159     |
|    learning_rate        | 1e-06      |
|    loss                 | 0.0543     |
|    n_updates            | 1080       |
|    policy_gradient_loss | -0.00406   |
|    std                  | 0.055      |
|    value_loss           | 0.0519     |
----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.69         |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 64           |
|    time_elapsed         | 4831         |
|    total_timesteps      | 320000       |
| train/                  |              |
|    approx_kl            | 0.0012554462 |
|    clip_fraction        | 0.00999      |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.0576       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0467       |
|    n_updates            | 1260         |
|    policy_gradient_loss | -0.0041      |
|    std                  | 0.055        |
|    value_loss           | 0.0422       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.691        |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 73           |
|    time_elapsed         | 5495         |
|    total_timesteps      | 365000       |
| train/                  |              |
|    approx_kl            | 0.0010661578 |
|    clip_fraction        | 0.00677      |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.227        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.04         |
|    n_updates            | 1440         |
|    policy_gradient_loss | -0.00373     |
|    std                  | 0.055        |
|    value_loss           | 0.0347       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.693        |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 82           |
|    time_elapsed         | 6159         |
|    total_timesteps      | 410000       |
| train/                  |              |
|    approx_kl            | 0.0010344686 |
|    clip_fraction        | 0.00516      |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.345        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0381       |
|    n_updates            | 1620         |
|    policy_gradient_loss | -0.00369     |
|    std                  | 0.055        |
|    value_loss           | 0.0294       |
------------------------------------------
-------------------------------------------
| rollout/

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.698        |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 91           |
|    time_elapsed         | 6825         |
|    total_timesteps      | 455000       |
| train/                  |              |
|    approx_kl            | 0.0012344695 |
|    clip_fraction        | 0.00614      |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.41         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0368       |
|    n_updates            | 1800         |
|    policy_gradient_loss | -0.00362     |
|    std                  | 0.055        |
|    value_loss           | 0.0267       |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=500000, episode_reward=0.71 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.706        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.699        |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 100          |
|    time_elapsed         | 7488         |
|    total_timesteps      | 500000       |
| train/                  |              |
|    approx_kl            | 0.0013518205 |
|    clip_fraction        | 0.0157       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.493        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0328       |
|    n_updates            | 1980         |
|    policy

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.696        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 109          |
|    time_elapsed         | 8132         |
|    total_timesteps      | 545000       |
| train/                  |              |
|    approx_kl            | 0.0014700023 |
|    clip_fraction        | 0.0225       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.55         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0352       |
|    n_updates            | 2160         |
|    policy_gradient_loss | -0.00469     |
|    std                  | 0.055        |
|    value_loss           | 0.0206       |
------------------------------------------
Eval num_timesteps=550000, episode_reward=0.71 +/- 0.0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.706        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 118          |
|    time_elapsed         | 8797         |
|    total_timesteps      | 590000       |
| train/                  |              |
|    approx_kl            | 0.0014019193 |
|    clip_fraction        | 0.0138       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.627        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0322       |
|    n_updates            | 2340         |
|    policy_gradient_loss | -0.00408     |
|    std                  | 0.055        |
|    value_loss           | 0.0175       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.712        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 127          |
|    time_elapsed         | 9459         |
|    total_timesteps      | 635000       |
| train/                  |              |
|    approx_kl            | 0.0013259979 |
|    clip_fraction        | 0.0205       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.648        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.032        |
|    n_updates            | 2520         |
|    policy_gradient_loss | -0.00417     |
|    std                  | 0.055        |
|    value_loss           | 0.0166       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.716        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 136          |
|    time_elapsed         | 10119        |
|    total_timesteps      | 680000       |
| train/                  |              |
|    approx_kl            | 0.0017319453 |
|    clip_fraction        | 0.0356       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.696        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0346       |
|    n_updates            | 2700         |
|    policy_gradient_loss | -0.00489     |
|    std                  | 0.055        |
|    value_loss           | 0.0147       |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.729        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 145          |
|    time_elapsed         | 10783        |
|    total_timesteps      | 725000       |
| train/                  |              |
|    approx_kl            | 0.0016849799 |
|    clip_fraction        | 0.0353       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.717        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0297       |
|    n_updates            | 2880         |
|    policy_gradient_loss | -0.00489     |
|    std                  | 0.055        |
|    value_loss           | 0.0139       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.73         |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 154          |
|    time_elapsed         | 11445        |
|    total_timesteps      | 770000       |
| train/                  |              |
|    approx_kl            | 0.0017681314 |
|    clip_fraction        | 0.0274       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.756        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0281       |
|    n_updates            | 3060         |
|    policy_gradient_loss | -0.00442     |
|    std                  | 0.055        |
|    value_loss           | 0.0122       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.737        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 163          |
|    time_elapsed         | 12108        |
|    total_timesteps      | 815000       |
| train/                  |              |
|    approx_kl            | 0.0017540634 |
|    clip_fraction        | 0.0362       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.771        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0289       |
|    n_updates            | 3240         |
|    policy_gradient_loss | -0.0046      |
|    std                  | 0.055        |
|    value_loss           | 0.0117       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.745        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 172          |
|    time_elapsed         | 12773        |
|    total_timesteps      | 860000       |
| train/                  |              |
|    approx_kl            | 0.0015352219 |
|    clip_fraction        | 0.03         |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.798        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0325       |
|    n_updates            | 3420         |
|    policy_gradient_loss | -0.00374     |
|    std                  | 0.055        |
|    value_loss           | 0.0105       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.748        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 181          |
|    time_elapsed         | 13438        |
|    total_timesteps      | 905000       |
| train/                  |              |
|    approx_kl            | 0.0016831727 |
|    clip_fraction        | 0.04         |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.816        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0292       |
|    n_updates            | 3600         |
|    policy_gradient_loss | -0.00436     |
|    std                  | 0.055        |
|    value_loss           | 0.00975      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=950000, episode_reward=0.76 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.758        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.756        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 190          |
|    time_elapsed         | 14102        |
|    total_timesteps      | 950000       |
| train/                  |              |
|    approx_kl            | 0.0016017519 |
|    clip_fraction        | 0.0383       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.828        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0286       |
|    n_updates            | 3780

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.764        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 199          |
|    time_elapsed         | 14746        |
|    total_timesteps      | 995000       |
| train/                  |              |
|    approx_kl            | 0.0014205874 |
|    clip_fraction        | 0.0271       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.845        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.031        |
|    n_updates            | 3960         |
|    policy_gradient_loss | -0.00364     |
|    std                  | 0.055        |
|    value_loss           | 0.00844      |
------------------------------------------
Eval num_timesteps=1000000, episode_reward=0.77 +/- 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.77        |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 208         |
|    time_elapsed         | 15408       |
|    total_timesteps      | 1040000     |
| train/                  |             |
|    approx_kl            | 0.001518752 |
|    clip_fraction        | 0.0375      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.1       |
|    explained_variance   | 0.857       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0244      |
|    n_updates            | 4140        |
|    policy_gradient_loss | -0.0039     |
|    std                  | 0.055       |
|    value_loss           | 0.00795     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.771        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 217          |
|    time_elapsed         | 16072        |
|    total_timesteps      | 1085000      |
| train/                  |              |
|    approx_kl            | 0.0015927666 |
|    clip_fraction        | 0.0404       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.864        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0268       |
|    n_updates            | 4320         |
|    policy_gradient_loss | -0.00363     |
|    std                  | 0.055        |
|    value_loss           | 0.00764      |
------------------------------------------
------------------------------------------
| rollout/ 

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.782      |
| time/                   |            |
|    fps                  | 67         |
|    iterations           | 226        |
|    time_elapsed         | 16735      |
|    total_timesteps      | 1130000    |
| train/                  |            |
|    approx_kl            | 0.00156314 |
|    clip_fraction        | 0.0414     |
|    clip_range           | 0.1        |
|    entropy_loss         | -31.1      |
|    explained_variance   | 0.876      |
|    learning_rate        | 1e-06      |
|    loss                 | 0.0267     |
|    n_updates            | 4500       |
|    policy_gradient_loss | -0.00393   |
|    std                  | 0.0549     |
|    value_loss           | 0.00703    |
----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.785       |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 235         |
|    time_elapsed         | 17398       |
|    total_timesteps      | 1175000     |
| train/                  |             |
|    approx_kl            | 0.001245504 |
|    clip_fraction        | 0.0284      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.1       |
|    explained_variance   | 0.889       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.032       |
|    n_updates            | 4680        |
|    policy_gradient_loss | -0.00306    |
|    std                  | 0.0549      |
|    value_loss           | 0.00638     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.784        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 244          |
|    time_elapsed         | 18061        |
|    total_timesteps      | 1220000      |
| train/                  |              |
|    approx_kl            | 0.0015364279 |
|    clip_fraction        | 0.0376       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.888        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0286       |
|    n_updates            | 4860         |
|    policy_gradient_loss | -0.0037      |
|    std                  | 0.0549       |
|    value_loss           | 0.0065       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.791        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 253          |
|    time_elapsed         | 18725        |
|    total_timesteps      | 1265000      |
| train/                  |              |
|    approx_kl            | 0.0015749722 |
|    clip_fraction        | 0.0351       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.896        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.028        |
|    n_updates            | 5040         |
|    policy_gradient_loss | -0.00342     |
|    std                  | 0.0549       |
|    value_loss           | 0.00606      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.794        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 262          |
|    time_elapsed         | 19390        |
|    total_timesteps      | 1310000      |
| train/                  |              |
|    approx_kl            | 0.0015804213 |
|    clip_fraction        | 0.0386       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.9          |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0287       |
|    n_updates            | 5220         |
|    policy_gradient_loss | -0.00355     |
|    std                  | 0.0549       |
|    value_loss           | 0.00593      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.794        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 271          |
|    time_elapsed         | 20055        |
|    total_timesteps      | 1355000      |
| train/                  |              |
|    approx_kl            | 0.0016061779 |
|    clip_fraction        | 0.0529       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.902        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0229       |
|    n_updates            | 5400         |
|    policy_gradient_loss | -0.00424     |
|    std                  | 0.0549       |
|    value_loss           | 0.00585      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=1400000, episode_reward=0.82 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.821        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.802        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 280          |
|    time_elapsed         | 20720        |
|    total_timesteps      | 1400000      |
| train/                  |              |
|    approx_kl            | 0.0017157078 |
|    clip_fraction        | 0.0497       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.907        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0314       |
|    n_updates            | 558

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.804        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 289          |
|    time_elapsed         | 21365        |
|    total_timesteps      | 1445000      |
| train/                  |              |
|    approx_kl            | 0.0018489371 |
|    clip_fraction        | 0.0514       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.906        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0289       |
|    n_updates            | 5760         |
|    policy_gradient_loss | -0.00409     |
|    std                  | 0.0549       |
|    value_loss           | 0.00572      |
------------------------------------------
Eval num_timesteps=1450000, episode_reward=0.83 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.807        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 298          |
|    time_elapsed         | 22029        |
|    total_timesteps      | 1490000      |
| train/                  |              |
|    approx_kl            | 0.0016208047 |
|    clip_fraction        | 0.0451       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.916        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0257       |
|    n_updates            | 5940         |
|    policy_gradient_loss | -0.00401     |
|    std                  | 0.0549       |
|    value_loss           | 0.00515      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.812        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 307          |
|    time_elapsed         | 22693        |
|    total_timesteps      | 1535000      |
| train/                  |              |
|    approx_kl            | 0.0015340802 |
|    clip_fraction        | 0.0438       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.916        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0268       |
|    n_updates            | 6120         |
|    policy_gradient_loss | -0.0041      |
|    std                  | 0.0549       |
|    value_loss           | 0.00517      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.814        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 316          |
|    time_elapsed         | 23356        |
|    total_timesteps      | 1580000      |
| train/                  |              |
|    approx_kl            | 0.0014586985 |
|    clip_fraction        | 0.0395       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.918        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0249       |
|    n_updates            | 6300         |
|    policy_gradient_loss | -0.00377     |
|    std                  | 0.0548       |
|    value_loss           | 0.00509      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.814        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 325          |
|    time_elapsed         | 24018        |
|    total_timesteps      | 1625000      |
| train/                  |              |
|    approx_kl            | 0.0014507374 |
|    clip_fraction        | 0.0335       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.925        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0252       |
|    n_updates            | 6480         |
|    policy_gradient_loss | -0.00323     |
|    std                  | 0.0548       |
|    value_loss           | 0.00465      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.817        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 334          |
|    time_elapsed         | 24683        |
|    total_timesteps      | 1670000      |
| train/                  |              |
|    approx_kl            | 0.0013906388 |
|    clip_fraction        | 0.0244       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.925        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.032        |
|    n_updates            | 6660         |
|    policy_gradient_loss | -0.00262     |
|    std                  | 0.0548       |
|    value_loss           | 0.0047       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.816        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 343          |
|    time_elapsed         | 25343        |
|    total_timesteps      | 1715000      |
| train/                  |              |
|    approx_kl            | 0.0015972666 |
|    clip_fraction        | 0.0576       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.929        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0251       |
|    n_updates            | 6840         |
|    policy_gradient_loss | -0.0045      |
|    std                  | 0.0548       |
|    value_loss           | 0.00446      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.819        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 352          |
|    time_elapsed         | 26005        |
|    total_timesteps      | 1760000      |
| train/                  |              |
|    approx_kl            | 0.0014887162 |
|    clip_fraction        | 0.0331       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.929        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0263       |
|    n_updates            | 7020         |
|    policy_gradient_loss | -0.0029      |
|    std                  | 0.0548       |
|    value_loss           | 0.00446      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.822        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 361          |
|    time_elapsed         | 26671        |
|    total_timesteps      | 1805000      |
| train/                  |              |
|    approx_kl            | 0.0014603854 |
|    clip_fraction        | 0.0483       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.931        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.022        |
|    n_updates            | 7200         |
|    policy_gradient_loss | -0.00397     |
|    std                  | 0.0548       |
|    value_loss           | 0.00435      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=1850000, episode_reward=0.83 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.834        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.822        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 370          |
|    time_elapsed         | 27334        |
|    total_timesteps      | 1850000      |
| train/                  |              |
|    approx_kl            | 0.0014361556 |
|    clip_fraction        | 0.033        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.934        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0289       |
|    n_updates            | 7380         |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.827        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 379          |
|    time_elapsed         | 27980        |
|    total_timesteps      | 1895000      |
| train/                  |              |
|    approx_kl            | 0.0013546356 |
|    clip_fraction        | 0.0398       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.935        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0298       |
|    n_updates            | 7560         |
|    policy_gradient_loss | -0.00348     |
|    std                  | 0.0548       |
|    value_loss           | 0.00411      |
------------------------------------------
Eval num_timesteps=1900000, episode_reward=0.84 +/- 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.827       |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 388         |
|    time_elapsed         | 28644       |
|    total_timesteps      | 1940000     |
| train/                  |             |
|    approx_kl            | 0.001556072 |
|    clip_fraction        | 0.034       |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.2       |
|    explained_variance   | 0.933       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0285      |
|    n_updates            | 7740        |
|    policy_gradient_loss | -0.00301    |
|    std                  | 0.0548      |
|    value_loss           | 0.00425     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.827        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 397          |
|    time_elapsed         | 29308        |
|    total_timesteps      | 1985000      |
| train/                  |              |
|    approx_kl            | 0.0013547204 |
|    clip_fraction        | 0.0218       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.935        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.03         |
|    n_updates            | 7920         |
|    policy_gradient_loss | -0.00248     |
|    std                  | 0.0547       |
|    value_loss           | 0.00415      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.825        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 406          |
|    time_elapsed         | 29975        |
|    total_timesteps      | 2030000      |
| train/                  |              |
|    approx_kl            | 0.0016058312 |
|    clip_fraction        | 0.0485       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.938        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0307       |
|    n_updates            | 8100         |
|    policy_gradient_loss | -0.00351     |
|    std                  | 0.0547       |
|    value_loss           | 0.00398      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.828        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 415          |
|    time_elapsed         | 30638        |
|    total_timesteps      | 2075000      |
| train/                  |              |
|    approx_kl            | 0.0014015074 |
|    clip_fraction        | 0.0337       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.937        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0304       |
|    n_updates            | 8280         |
|    policy_gradient_loss | -0.00321     |
|    std                  | 0.0547       |
|    value_loss           | 0.00402      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.83         |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 424          |
|    time_elapsed         | 31303        |
|    total_timesteps      | 2120000      |
| train/                  |              |
|    approx_kl            | 0.0014112229 |
|    clip_fraction        | 0.042        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.939        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.028        |
|    n_updates            | 8460         |
|    policy_gradient_loss | -0.00371     |
|    std                  | 0.0547       |
|    value_loss           | 0.00394      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.834        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 433          |
|    time_elapsed         | 31969        |
|    total_timesteps      | 2165000      |
| train/                  |              |
|    approx_kl            | 0.0019211861 |
|    clip_fraction        | 0.062        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.939        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.029        |
|    n_updates            | 8640         |
|    policy_gradient_loss | -0.00473     |
|    std                  | 0.0547       |
|    value_loss           | 0.00392      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.832        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 442          |
|    time_elapsed         | 32636        |
|    total_timesteps      | 2210000      |
| train/                  |              |
|    approx_kl            | 0.0013180916 |
|    clip_fraction        | 0.0374       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.939        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0293       |
|    n_updates            | 8820         |
|    policy_gradient_loss | -0.00358     |
|    std                  | 0.0547       |
|    value_loss           | 0.00395      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.828        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 451          |
|    time_elapsed         | 33298        |
|    total_timesteps      | 2255000      |
| train/                  |              |
|    approx_kl            | 0.0012240021 |
|    clip_fraction        | 0.0198       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.942        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0278       |
|    n_updates            | 9000         |
|    policy_gradient_loss | -0.00193     |
|    std                  | 0.0547       |
|    value_loss           | 0.00371      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=2300000, episode_reward=0.84 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.836        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.832        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 460          |
|    time_elapsed         | 33962        |
|    total_timesteps      | 2300000      |
| train/                  |              |
|    approx_kl            | 0.0012931643 |
|    clip_fraction        | 0.0254       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.943        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0319       |
|    n_updates            | 9180         |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.831        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 469          |
|    time_elapsed         | 34607        |
|    total_timesteps      | 2345000      |
| train/                  |              |
|    approx_kl            | 0.0014645298 |
|    clip_fraction        | 0.0345       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.943        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0288       |
|    n_updates            | 9360         |
|    policy_gradient_loss | -0.00326     |
|    std                  | 0.0547       |
|    value_loss           | 0.00369      |
------------------------------------------
Eval num_timesteps=2350000, episode_reward=0.84 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.831        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 478          |
|    time_elapsed         | 35273        |
|    total_timesteps      | 2390000      |
| train/                  |              |
|    approx_kl            | 0.0012763294 |
|    clip_fraction        | 0.0338       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.944        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0293       |
|    n_updates            | 9540         |
|    policy_gradient_loss | -0.00291     |
|    std                  | 0.0547       |
|    value_loss           | 0.0036       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.833        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 487          |
|    time_elapsed         | 35939        |
|    total_timesteps      | 2435000      |
| train/                  |              |
|    approx_kl            | 0.0014688626 |
|    clip_fraction        | 0.0335       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.944        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0278       |
|    n_updates            | 9720         |
|    policy_gradient_loss | -0.00271     |
|    std                  | 0.0547       |
|    value_loss           | 0.00364      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.835        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 496          |
|    time_elapsed         | 36605        |
|    total_timesteps      | 2480000      |
| train/                  |              |
|    approx_kl            | 0.0014503375 |
|    clip_fraction        | 0.0395       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.947        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0284       |
|    n_updates            | 9900         |
|    policy_gradient_loss | -0.0032      |
|    std                  | 0.0547       |
|    value_loss           | 0.00347      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.834        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 505          |
|    time_elapsed         | 37270        |
|    total_timesteps      | 2525000      |
| train/                  |              |
|    approx_kl            | 0.0016385993 |
|    clip_fraction        | 0.0527       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.95         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0271       |
|    n_updates            | 10080        |
|    policy_gradient_loss | -0.00398     |
|    std                  | 0.0547       |
|    value_loss           | 0.00327      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.835        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 514          |
|    time_elapsed         | 37936        |
|    total_timesteps      | 2570000      |
| train/                  |              |
|    approx_kl            | 0.0016215744 |
|    clip_fraction        | 0.0405       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.949        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0304       |
|    n_updates            | 10260        |
|    policy_gradient_loss | -0.00361     |
|    std                  | 0.0546       |
|    value_loss           | 0.00331      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.835        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 523          |
|    time_elapsed         | 38600        |
|    total_timesteps      | 2615000      |
| train/                  |              |
|    approx_kl            | 0.0012409122 |
|    clip_fraction        | 0.0285       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.949        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0278       |
|    n_updates            | 10440        |
|    policy_gradient_loss | -0.00292     |
|    std                  | 0.0546       |
|    value_loss           | 0.00335      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.837        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 532          |
|    time_elapsed         | 39267        |
|    total_timesteps      | 2660000      |
| train/                  |              |
|    approx_kl            | 0.0015014135 |
|    clip_fraction        | 0.0375       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.95         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0285       |
|    n_updates            | 10620        |
|    policy_gradient_loss | -0.00343     |
|    std                  | 0.0546       |
|    value_loss           | 0.00322      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.836        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 541          |
|    time_elapsed         | 39932        |
|    total_timesteps      | 2705000      |
| train/                  |              |
|    approx_kl            | 0.0015044258 |
|    clip_fraction        | 0.0343       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.951        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0291       |
|    n_updates            | 10800        |
|    policy_gradient_loss | -0.00273     |
|    std                  | 0.0546       |
|    value_loss           | 0.00317      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=2750000, episode_reward=0.84 +/- 0.00
Episode length: 5.00 +/- 0.00
----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 5          |
|    mean_reward          | 0.839      |
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.834      |
| time/                   |            |
|    fps                  | 67         |
|    iterations           | 550        |
|    time_elapsed         | 40600      |
|    total_timesteps      | 2750000    |
| train/                  |            |
|    approx_kl            | 0.00146961 |
|    clip_fraction        | 0.0352     |
|    clip_range           | 0.1        |
|    entropy_loss         | -31.3      |
|    explained_variance   | 0.951      |
|    learning_rate        | 1e-06      |
|    loss                 | 0.0282     |
|    n_updates            | 10980      |
|    policy_gradient_loss | -0.00283   |
|    std   

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.834        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 559          |
|    time_elapsed         | 41247        |
|    total_timesteps      | 2795000      |
| train/                  |              |
|    approx_kl            | 0.0013565052 |
|    clip_fraction        | 0.0289       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.952        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0281       |
|    n_updates            | 11160        |
|    policy_gradient_loss | -0.00261     |
|    std                  | 0.0546       |
|    value_loss           | 0.00314      |
------------------------------------------
Eval num_timesteps=2800000, episode_reward=0.84 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.838        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 568          |
|    time_elapsed         | 41914        |
|    total_timesteps      | 2840000      |
| train/                  |              |
|    approx_kl            | 0.0017180499 |
|    clip_fraction        | 0.0505       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.952        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0236       |
|    n_updates            | 11340        |
|    policy_gradient_loss | -0.00374     |
|    std                  | 0.0546       |
|    value_loss           | 0.00312      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.838        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 577          |
|    time_elapsed         | 42582        |
|    total_timesteps      | 2885000      |
| train/                  |              |
|    approx_kl            | 0.0013634065 |
|    clip_fraction        | 0.0355       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.954        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0281       |
|    n_updates            | 11520        |
|    policy_gradient_loss | -0.00316     |
|    std                  | 0.0546       |
|    value_loss           | 0.00299      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.841        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 586          |
|    time_elapsed         | 43248        |
|    total_timesteps      | 2930000      |
| train/                  |              |
|    approx_kl            | 0.0012942825 |
|    clip_fraction        | 0.0242       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.954        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0306       |
|    n_updates            | 11700        |
|    policy_gradient_loss | -0.00229     |
|    std                  | 0.0546       |
|    value_loss           | 0.00301      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.841        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 595          |
|    time_elapsed         | 43916        |
|    total_timesteps      | 2975000      |
| train/                  |              |
|    approx_kl            | 0.0011455051 |
|    clip_fraction        | 0.0334       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.954        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0295       |
|    n_updates            | 11880        |
|    policy_gradient_loss | -0.00337     |
|    std                  | 0.0546       |
|    value_loss           | 0.00302      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.842        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 604          |
|    time_elapsed         | 44582        |
|    total_timesteps      | 3020000      |
| train/                  |              |
|    approx_kl            | 0.0014187391 |
|    clip_fraction        | 0.0369       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.955        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0302       |
|    n_updates            | 12060        |
|    policy_gradient_loss | -0.00331     |
|    std                  | 0.0546       |
|    value_loss           | 0.00298      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.839        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 613          |
|    time_elapsed         | 45246        |
|    total_timesteps      | 3065000      |
| train/                  |              |
|    approx_kl            | 0.0014203686 |
|    clip_fraction        | 0.0335       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.954        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0252       |
|    n_updates            | 12240        |
|    policy_gradient_loss | -0.00306     |
|    std                  | 0.0546       |
|    value_loss           | 0.00304      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.841       |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 622         |
|    time_elapsed         | 45906       |
|    total_timesteps      | 3110000     |
| train/                  |             |
|    approx_kl            | 0.001786504 |
|    clip_fraction        | 0.0564      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.955       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0279      |
|    n_updates            | 12420       |
|    policy_gradient_loss | -0.00398    |
|    std                  | 0.0546      |
|    value_loss           | 0.00294     |
-----------------------------------------
----------------------------------------
| rollout/                |        

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.835        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 631          |
|    time_elapsed         | 46563        |
|    total_timesteps      | 3155000      |
| train/                  |              |
|    approx_kl            | 0.0015138394 |
|    clip_fraction        | 0.025        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.954        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0249       |
|    n_updates            | 12600        |
|    policy_gradient_loss | -0.0024      |
|    std                  | 0.0546       |
|    value_loss           | 0.003        |
------------------------------------------
-----------------------------------------
| rollout/  

Eval num_timesteps=3200000, episode_reward=0.84 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.844        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.841        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 640          |
|    time_elapsed         | 47219        |
|    total_timesteps      | 3200000      |
| train/                  |              |
|    approx_kl            | 0.0013732492 |
|    clip_fraction        | 0.0335       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.956        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0296       |
|    n_updates            | 12780        |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.84         |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 649          |
|    time_elapsed         | 47858        |
|    total_timesteps      | 3245000      |
| train/                  |              |
|    approx_kl            | 0.0012663045 |
|    clip_fraction        | 0.0348       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.955        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0268       |
|    n_updates            | 12960        |
|    policy_gradient_loss | -0.00282     |
|    std                  | 0.0545       |
|    value_loss           | 0.00294      |
------------------------------------------
Eval num_timesteps=3250000, episode_reward=0.84 +/- 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.836       |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 658         |
|    time_elapsed         | 48515       |
|    total_timesteps      | 3290000     |
| train/                  |             |
|    approx_kl            | 0.001629966 |
|    clip_fraction        | 0.0505      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.956       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0248      |
|    n_updates            | 13140       |
|    policy_gradient_loss | -0.00374    |
|    std                  | 0.0545      |
|    value_loss           | 0.0029      |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.842        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 667          |
|    time_elapsed         | 49174        |
|    total_timesteps      | 3335000      |
| train/                  |              |
|    approx_kl            | 0.0013876589 |
|    clip_fraction        | 0.0322       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.956        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0278       |
|    n_updates            | 13320        |
|    policy_gradient_loss | -0.00298     |
|    std                  | 0.0545       |
|    value_loss           | 0.00289      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.838        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 676          |
|    time_elapsed         | 49830        |
|    total_timesteps      | 3380000      |
| train/                  |              |
|    approx_kl            | 0.0014811829 |
|    clip_fraction        | 0.0241       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.957        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0289       |
|    n_updates            | 13500        |
|    policy_gradient_loss | -0.00243     |
|    std                  | 0.0545       |
|    value_loss           | 0.00282      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.838        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 685          |
|    time_elapsed         | 50490        |
|    total_timesteps      | 3425000      |
| train/                  |              |
|    approx_kl            | 0.0014272575 |
|    clip_fraction        | 0.0285       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.959        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0305       |
|    n_updates            | 13680        |
|    policy_gradient_loss | -0.00272     |
|    std                  | 0.0545       |
|    value_loss           | 0.00271      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.843        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 694          |
|    time_elapsed         | 51147        |
|    total_timesteps      | 3470000      |
| train/                  |              |
|    approx_kl            | 0.0012401661 |
|    clip_fraction        | 0.0257       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.958        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0276       |
|    n_updates            | 13860        |
|    policy_gradient_loss | -0.0023      |
|    std                  | 0.0545       |
|    value_loss           | 0.00279      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.839        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 703          |
|    time_elapsed         | 51805        |
|    total_timesteps      | 3515000      |
| train/                  |              |
|    approx_kl            | 0.0012062031 |
|    clip_fraction        | 0.0259       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.959        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0285       |
|    n_updates            | 14040        |
|    policy_gradient_loss | -0.00248     |
|    std                  | 0.0545       |
|    value_loss           | 0.00273      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.843        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 712          |
|    time_elapsed         | 52463        |
|    total_timesteps      | 3560000      |
| train/                  |              |
|    approx_kl            | 0.0015604153 |
|    clip_fraction        | 0.0386       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.96         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0265       |
|    n_updates            | 14220        |
|    policy_gradient_loss | -0.00323     |
|    std                  | 0.0545       |
|    value_loss           | 0.00266      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.846        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 721          |
|    time_elapsed         | 53123        |
|    total_timesteps      | 3605000      |
| train/                  |              |
|    approx_kl            | 0.0014673654 |
|    clip_fraction        | 0.0303       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.961        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0274       |
|    n_updates            | 14400        |
|    policy_gradient_loss | -0.00282     |
|    std                  | 0.0545       |
|    value_loss           | 0.00257      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=3650000, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.847        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 730          |
|    time_elapsed         | 53780        |
|    total_timesteps      | 3650000      |
| train/                  |              |
|    approx_kl            | 0.0013802293 |
|    clip_fraction        | 0.0518       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.961        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0323       |
|    n_updates            | 145

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 739          |
|    time_elapsed         | 54420        |
|    total_timesteps      | 3695000      |
| train/                  |              |
|    approx_kl            | 0.0015083415 |
|    clip_fraction        | 0.0452       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.96         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0265       |
|    n_updates            | 14760        |
|    policy_gradient_loss | -0.00343     |
|    std                  | 0.0545       |
|    value_loss           | 0.00266      |
------------------------------------------
Eval num_timesteps=3700000, episode_reward=0.85 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 748          |
|    time_elapsed         | 55079        |
|    total_timesteps      | 3740000      |
| train/                  |              |
|    approx_kl            | 0.0013428712 |
|    clip_fraction        | 0.0354       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.961        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0282       |
|    n_updates            | 14940        |
|    policy_gradient_loss | -0.00303     |
|    std                  | 0.0545       |
|    value_loss           | 0.0026       |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.845       |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 757         |
|    time_elapsed         | 55739       |
|    total_timesteps      | 3785000     |
| train/                  |             |
|    approx_kl            | 0.001258174 |
|    clip_fraction        | 0.038       |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.96        |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0276      |
|    n_updates            | 15120       |
|    policy_gradient_loss | -0.00326    |
|    std                  | 0.0545      |
|    value_loss           | 0.00263     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 766          |
|    time_elapsed         | 56399        |
|    total_timesteps      | 3830000      |
| train/                  |              |
|    approx_kl            | 0.0015001858 |
|    clip_fraction        | 0.0363       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.963        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0276       |
|    n_updates            | 15300        |
|    policy_gradient_loss | -0.00328     |
|    std                  | 0.0545       |
|    value_loss           | 0.00245      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 775          |
|    time_elapsed         | 57060        |
|    total_timesteps      | 3875000      |
| train/                  |              |
|    approx_kl            | 0.0015765813 |
|    clip_fraction        | 0.0594       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.962        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 15480        |
|    policy_gradient_loss | -0.00403     |
|    std                  | 0.0545       |
|    value_loss           | 0.00253      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 784          |
|    time_elapsed         | 57721        |
|    total_timesteps      | 3920000      |
| train/                  |              |
|    approx_kl            | 0.0012921185 |
|    clip_fraction        | 0.0328       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.963        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 15660        |
|    policy_gradient_loss | -0.00277     |
|    std                  | 0.0545       |
|    value_loss           | 0.00246      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 793          |
|    time_elapsed         | 58381        |
|    total_timesteps      | 3965000      |
| train/                  |              |
|    approx_kl            | 0.0013127688 |
|    clip_fraction        | 0.0353       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.962        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0292       |
|    n_updates            | 15840        |
|    policy_gradient_loss | -0.00305     |
|    std                  | 0.0545       |
|    value_loss           | 0.00254      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 802          |
|    time_elapsed         | 59041        |
|    total_timesteps      | 4010000      |
| train/                  |              |
|    approx_kl            | 0.0014711312 |
|    clip_fraction        | 0.0334       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.963        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.029        |
|    n_updates            | 16020        |
|    policy_gradient_loss | -0.00302     |
|    std                  | 0.0545       |
|    value_loss           | 0.00245      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.849        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 811          |
|    time_elapsed         | 59700        |
|    total_timesteps      | 4055000      |
| train/                  |              |
|    approx_kl            | 0.0015200533 |
|    clip_fraction        | 0.0486       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.963        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0291       |
|    n_updates            | 16200        |
|    policy_gradient_loss | -0.00365     |
|    std                  | 0.0545       |
|    value_loss           | 0.00246      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=4100000, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.85         |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 820          |
|    time_elapsed         | 60361        |
|    total_timesteps      | 4100000      |
| train/                  |              |
|    approx_kl            | 0.0015032805 |
|    clip_fraction        | 0.0358       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.964        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.03         |
|    n_updates            | 16380        |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.846        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 829          |
|    time_elapsed         | 61000        |
|    total_timesteps      | 4145000      |
| train/                  |              |
|    approx_kl            | 0.0012830754 |
|    clip_fraction        | 0.0267       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.964        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0296       |
|    n_updates            | 16560        |
|    policy_gradient_loss | -0.00247     |
|    std                  | 0.0545       |
|    value_loss           | 0.00239      |
------------------------------------------
Eval num_timesteps=4150000, episode_reward=0.85 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 838          |
|    time_elapsed         | 61662        |
|    total_timesteps      | 4190000      |
| train/                  |              |
|    approx_kl            | 0.0015037684 |
|    clip_fraction        | 0.037        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.964        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0286       |
|    n_updates            | 16740        |
|    policy_gradient_loss | -0.00295     |
|    std                  | 0.0545       |
|    value_loss           | 0.00242      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 847          |
|    time_elapsed         | 62323        |
|    total_timesteps      | 4235000      |
| train/                  |              |
|    approx_kl            | 0.0017369854 |
|    clip_fraction        | 0.0467       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.963        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0185       |
|    n_updates            | 16920        |
|    policy_gradient_loss | -0.00351     |
|    std                  | 0.0545       |
|    value_loss           | 0.00245      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.844       |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 856         |
|    time_elapsed         | 62986       |
|    total_timesteps      | 4280000     |
| train/                  |             |
|    approx_kl            | 0.001357538 |
|    clip_fraction        | 0.0293      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.965       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0297      |
|    n_updates            | 17100       |
|    policy_gradient_loss | -0.00268    |
|    std                  | 0.0544      |
|    value_loss           | 0.00234     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 865          |
|    time_elapsed         | 63650        |
|    total_timesteps      | 4325000      |
| train/                  |              |
|    approx_kl            | 0.0016381665 |
|    clip_fraction        | 0.0557       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.964        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 17280        |
|    policy_gradient_loss | -0.00437     |
|    std                  | 0.0544       |
|    value_loss           | 0.00242      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.849       |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 874         |
|    time_elapsed         | 64317       |
|    total_timesteps      | 4370000     |
| train/                  |             |
|    approx_kl            | 0.001479826 |
|    clip_fraction        | 0.0404      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.966       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0261      |
|    n_updates            | 17460       |
|    policy_gradient_loss | -0.00328    |
|    std                  | 0.0544      |
|    value_loss           | 0.00229     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 883          |
|    time_elapsed         | 64980        |
|    total_timesteps      | 4415000      |
| train/                  |              |
|    approx_kl            | 0.0013023629 |
|    clip_fraction        | 0.0369       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.964        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0255       |
|    n_updates            | 17640        |
|    policy_gradient_loss | -0.0034      |
|    std                  | 0.0544       |
|    value_loss           | 0.0024       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 892          |
|    time_elapsed         | 65647        |
|    total_timesteps      | 4460000      |
| train/                  |              |
|    approx_kl            | 0.0014192355 |
|    clip_fraction        | 0.0398       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.965        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0273       |
|    n_updates            | 17820        |
|    policy_gradient_loss | -0.00316     |
|    std                  | 0.0544       |
|    value_loss           | 0.00235      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.85         |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 901          |
|    time_elapsed         | 66312        |
|    total_timesteps      | 4505000      |
| train/                  |              |
|    approx_kl            | 0.0013258258 |
|    clip_fraction        | 0.0384       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.966        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0256       |
|    n_updates            | 18000        |
|    policy_gradient_loss | -0.0029      |
|    std                  | 0.0544       |
|    value_loss           | 0.0023       |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=4550000, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.851        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 910          |
|    time_elapsed         | 66978        |
|    total_timesteps      | 4550000      |
| train/                  |              |
|    approx_kl            | 0.0013072351 |
|    clip_fraction        | 0.0313       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.966        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0316       |
|    n_updates            | 181

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.849        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 919          |
|    time_elapsed         | 67628        |
|    total_timesteps      | 4595000      |
| train/                  |              |
|    approx_kl            | 0.0012993384 |
|    clip_fraction        | 0.0307       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.966        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0268       |
|    n_updates            | 18360        |
|    policy_gradient_loss | -0.00294     |
|    std                  | 0.0544       |
|    value_loss           | 0.00229      |
------------------------------------------
Eval num_timesteps=4600000, episode_reward=0.85 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.849        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 928          |
|    time_elapsed         | 68295        |
|    total_timesteps      | 4640000      |
| train/                  |              |
|    approx_kl            | 0.0015655006 |
|    clip_fraction        | 0.0349       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.965        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0297       |
|    n_updates            | 18540        |
|    policy_gradient_loss | -0.00272     |
|    std                  | 0.0544       |
|    value_loss           | 0.00233      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 937          |
|    time_elapsed         | 68961        |
|    total_timesteps      | 4685000      |
| train/                  |              |
|    approx_kl            | 0.0017874672 |
|    clip_fraction        | 0.051        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.965        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0239       |
|    n_updates            | 18720        |
|    policy_gradient_loss | -0.00384     |
|    std                  | 0.0544       |
|    value_loss           | 0.00235      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 946          |
|    time_elapsed         | 69627        |
|    total_timesteps      | 4730000      |
| train/                  |              |
|    approx_kl            | 0.0015187975 |
|    clip_fraction        | 0.0475       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.966        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0345       |
|    n_updates            | 18900        |
|    policy_gradient_loss | -0.00322     |
|    std                  | 0.0544       |
|    value_loss           | 0.0023       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.849        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 955          |
|    time_elapsed         | 70296        |
|    total_timesteps      | 4775000      |
| train/                  |              |
|    approx_kl            | 0.0013420796 |
|    clip_fraction        | 0.0361       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.967        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0271       |
|    n_updates            | 19080        |
|    policy_gradient_loss | -0.00299     |
|    std                  | 0.0544       |
|    value_loss           | 0.00223      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.85        |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 964         |
|    time_elapsed         | 70963       |
|    total_timesteps      | 4820000     |
| train/                  |             |
|    approx_kl            | 0.001352694 |
|    clip_fraction        | 0.0355      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.968       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0268      |
|    n_updates            | 19260       |
|    policy_gradient_loss | -0.0029     |
|    std                  | 0.0544      |
|    value_loss           | 0.00215     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.851        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 973          |
|    time_elapsed         | 71634        |
|    total_timesteps      | 4865000      |
| train/                  |              |
|    approx_kl            | 0.0012005426 |
|    clip_fraction        | 0.0365       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.966        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0324       |
|    n_updates            | 19440        |
|    policy_gradient_loss | -0.00321     |
|    std                  | 0.0544       |
|    value_loss           | 0.00224      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.849        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 982          |
|    time_elapsed         | 72336        |
|    total_timesteps      | 4910000      |
| train/                  |              |
|    approx_kl            | 0.0012466179 |
|    clip_fraction        | 0.0346       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.966        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0278       |
|    n_updates            | 19620        |
|    policy_gradient_loss | -0.00303     |
|    std                  | 0.0544       |
|    value_loss           | 0.00229      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.852        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 991          |
|    time_elapsed         | 72991        |
|    total_timesteps      | 4955000      |
| train/                  |              |
|    approx_kl            | 0.0013015891 |
|    clip_fraction        | 0.0273       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.967        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0302       |
|    n_updates            | 19800        |
|    policy_gradient_loss | -0.00281     |
|    std                  | 0.0544       |
|    value_loss           | 0.00222      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=5000000, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.85        |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.848       |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 1000        |
|    time_elapsed         | 73643       |
|    total_timesteps      | 5000000     |
| train/                  |             |
|    approx_kl            | 0.001564374 |
|    clip_fraction        | 0.037       |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.967       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0273      |
|    n_updates            | 19980       |
|    policy_gradient_loss | -0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.851        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 1009         |
|    time_elapsed         | 74276        |
|    total_timesteps      | 5045000      |
| train/                  |              |
|    approx_kl            | 0.0012421077 |
|    clip_fraction        | 0.0319       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.966        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0248       |
|    n_updates            | 20160        |
|    policy_gradient_loss | -0.00265     |
|    std                  | 0.0544       |
|    value_loss           | 0.00229      |
------------------------------------------
Eval num_timesteps=5050000, episode_reward=0.85 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.852        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 1018         |
|    time_elapsed         | 74930        |
|    total_timesteps      | 5090000      |
| train/                  |              |
|    approx_kl            | 0.0010321125 |
|    clip_fraction        | 0.0172       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.967        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0274       |
|    n_updates            | 20340        |
|    policy_gradient_loss | -0.00213     |
|    std                  | 0.0544       |
|    value_loss           | 0.00223      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.846       |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 1027        |
|    time_elapsed         | 75584       |
|    total_timesteps      | 5135000     |
| train/                  |             |
|    approx_kl            | 0.001462053 |
|    clip_fraction        | 0.0476      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.966       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0239      |
|    n_updates            | 20520       |
|    policy_gradient_loss | -0.00352    |
|    std                  | 0.0543      |
|    value_loss           | 0.00229     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 1036         |
|    time_elapsed         | 76236        |
|    total_timesteps      | 5180000      |
| train/                  |              |
|    approx_kl            | 0.0013829042 |
|    clip_fraction        | 0.0419       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.967        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0306       |
|    n_updates            | 20700        |
|    policy_gradient_loss | -0.00336     |
|    std                  | 0.0543       |
|    value_loss           | 0.0022       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.851        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 1045         |
|    time_elapsed         | 76891        |
|    total_timesteps      | 5225000      |
| train/                  |              |
|    approx_kl            | 0.0011577794 |
|    clip_fraction        | 0.0335       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.967        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0289       |
|    n_updates            | 20880        |
|    policy_gradient_loss | -0.00326     |
|    std                  | 0.0543       |
|    value_loss           | 0.00218      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.849        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 1054         |
|    time_elapsed         | 77547        |
|    total_timesteps      | 5270000      |
| train/                  |              |
|    approx_kl            | 0.0013827324 |
|    clip_fraction        | 0.0314       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0261       |
|    n_updates            | 21060        |
|    policy_gradient_loss | -0.003       |
|    std                  | 0.0543       |
|    value_loss           | 0.00215      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.85        |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 1063        |
|    time_elapsed         | 78203       |
|    total_timesteps      | 5315000     |
| train/                  |             |
|    approx_kl            | 0.001234274 |
|    clip_fraction        | 0.0356      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.967       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0254      |
|    n_updates            | 21240       |
|    policy_gradient_loss | -0.00284    |
|    std                  | 0.0543      |
|    value_loss           | 0.00219     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 1072         |
|    time_elapsed         | 78858        |
|    total_timesteps      | 5360000      |
| train/                  |              |
|    approx_kl            | 0.0013226126 |
|    clip_fraction        | 0.035        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0294       |
|    n_updates            | 21420        |
|    policy_gradient_loss | -0.00332     |
|    std                  | 0.0543       |
|    value_loss           | 0.0021       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.851        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 1081         |
|    time_elapsed         | 79512        |
|    total_timesteps      | 5405000      |
| train/                  |              |
|    approx_kl            | 0.0014635476 |
|    clip_fraction        | 0.0349       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.967        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0282       |
|    n_updates            | 21600        |
|    policy_gradient_loss | -0.00309     |
|    std                  | 0.0543       |
|    value_loss           | 0.00223      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=5450000, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.851        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.849        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 1090         |
|    time_elapsed         | 80167        |
|    total_timesteps      | 5450000      |
| train/                  |              |
|    approx_kl            | 0.0012968675 |
|    clip_fraction        | 0.0366       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0274       |
|    n_updates            | 21780        |
|    polic

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.848       |
| time/                   |             |
|    fps                  | 68          |
|    iterations           | 1099        |
|    time_elapsed         | 80802       |
|    total_timesteps      | 5495000     |
| train/                  |             |
|    approx_kl            | 0.001516647 |
|    clip_fraction        | 0.0437      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.967       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0283      |
|    n_updates            | 21960       |
|    policy_gradient_loss | -0.00331    |
|    std                  | 0.0543      |
|    value_loss           | 0.00221     |
-----------------------------------------
Eval num_timesteps=5500000, episode_reward=0.85 +/- 0.00
Episode length: 5.0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.853        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 1108         |
|    time_elapsed         | 81454        |
|    total_timesteps      | 5540000      |
| train/                  |              |
|    approx_kl            | 0.0012837737 |
|    clip_fraction        | 0.0253       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0272       |
|    n_updates            | 22140        |
|    policy_gradient_loss | -0.00264     |
|    std                  | 0.0543       |
|    value_loss           | 0.00217      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.855        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 1117         |
|    time_elapsed         | 82098        |
|    total_timesteps      | 5585000      |
| train/                  |              |
|    approx_kl            | 0.0012572744 |
|    clip_fraction        | 0.0347       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.967        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0271       |
|    n_updates            | 22320        |
|    policy_gradient_loss | -0.00322     |
|    std                  | 0.0543       |
|    value_loss           | 0.00219      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.85         |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 1126         |
|    time_elapsed         | 82742        |
|    total_timesteps      | 5630000      |
| train/                  |              |
|    approx_kl            | 0.0011909375 |
|    clip_fraction        | 0.0203       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0274       |
|    n_updates            | 22500        |
|    policy_gradient_loss | -0.00215     |
|    std                  | 0.0543       |
|    value_loss           | 0.00214      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.85         |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 1135         |
|    time_elapsed         | 83386        |
|    total_timesteps      | 5675000      |
| train/                  |              |
|    approx_kl            | 0.0011844625 |
|    clip_fraction        | 0.0225       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0279       |
|    n_updates            | 22680        |
|    policy_gradient_loss | -0.0026      |
|    std                  | 0.0543       |
|    value_loss           | 0.00216      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.851        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 1144         |
|    time_elapsed         | 84031        |
|    total_timesteps      | 5720000      |
| train/                  |              |
|    approx_kl            | 0.0012840399 |
|    clip_fraction        | 0.027        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0276       |
|    n_updates            | 22860        |
|    policy_gradient_loss | -0.00279     |
|    std                  | 0.0543       |
|    value_loss           | 0.00212      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.852        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 1153         |
|    time_elapsed         | 84676        |
|    total_timesteps      | 5765000      |
| train/                  |              |
|    approx_kl            | 0.0014203024 |
|    clip_fraction        | 0.0378       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0319       |
|    n_updates            | 23040        |
|    policy_gradient_loss | -0.00295     |
|    std                  | 0.0543       |
|    value_loss           | 0.00213      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.852        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 1162         |
|    time_elapsed         | 85320        |
|    total_timesteps      | 5810000      |
| train/                  |              |
|    approx_kl            | 0.0014224512 |
|    clip_fraction        | 0.0435       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0269       |
|    n_updates            | 23220        |
|    policy_gradient_loss | -0.00347     |
|    std                  | 0.0543       |
|    value_loss           | 0.00213      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.85         |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 1171         |
|    time_elapsed         | 85967        |
|    total_timesteps      | 5855000      |
| train/                  |              |
|    approx_kl            | 0.0016901238 |
|    clip_fraction        | 0.0396       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0277       |
|    n_updates            | 23400        |
|    policy_gradient_loss | -0.00304     |
|    std                  | 0.0543       |
|    value_loss           | 0.00209      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=5900000, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.852        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.85         |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 1180         |
|    time_elapsed         | 86612        |
|    total_timesteps      | 5900000      |
| train/                  |              |
|    approx_kl            | 0.0014948008 |
|    clip_fraction        | 0.0377       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0307       |
|    n_updates            | 23580        |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.849        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 1189         |
|    time_elapsed         | 87238        |
|    total_timesteps      | 5945000      |
| train/                  |              |
|    approx_kl            | 0.0014599157 |
|    clip_fraction        | 0.0463       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0244       |
|    n_updates            | 23760        |
|    policy_gradient_loss | -0.00388     |
|    std                  | 0.0543       |
|    value_loss           | 0.0021       |
------------------------------------------
Eval num_timesteps=5950000, episode_reward=0.85 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.851        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 1198         |
|    time_elapsed         | 87884        |
|    total_timesteps      | 5990000      |
| train/                  |              |
|    approx_kl            | 0.0017312501 |
|    clip_fraction        | 0.0337       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0286       |
|    n_updates            | 23940        |
|    policy_gradient_loss | -0.00305     |
|    std                  | 0.0543       |
|    value_loss           | 0.00214      |
------------------------------------------
-----------------------------------------
| rollout/  

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):


seed 2
generate callback ...
vectorize environment ...
vectorize env level 1
Box(-1.0, 1.0, (35,), float64)
model definition ..
Using cuda device
Logging to ./data/ppo_1l/seed_2
policy learning ..




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5        |
|    ep_rew_mean     | 0.679    |
| time/              |          |
|    fps             | 56       |
|    iterations      | 1        |
|    time_elapsed    | 88       |
|    total_timesteps | 5000     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.682        |
| time/                   |              |
|    fps                  | 60           |
|    iterations           | 2            |
|    time_elapsed         | 165          |
|    total_timesteps      | 10000        |
| train/                  |              |
|    approx_kl            | 0.0013034223 |
|    clip_fraction        | 0.012        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -3.22        |
|    learning_r

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.683        |
| time/                   |              |
|    fps                  | 62           |
|    iterations           | 11           |
|    time_elapsed         | 874          |
|    total_timesteps      | 55000        |
| train/                  |              |
|    approx_kl            | 0.0014418078 |
|    clip_fraction        | 0.0213       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -1.67        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.103        |
|    n_updates            | 200          |
|    policy_gradient_loss | -0.00469     |
|    std                  | 0.055        |
|    value_loss           | 0.152        |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=100000, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.702        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.687        |
| time/                   |              |
|    fps                  | 63           |
|    iterations           | 20           |
|    time_elapsed         | 1573         |
|    total_timesteps      | 100000       |
| train/                  |              |
|    approx_kl            | 0.0013724508 |
|    clip_fraction        | 0.0152       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -0.947       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0722       |
|    n_updates            | 380 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.689       |
| time/                   |             |
|    fps                  | 64          |
|    iterations           | 29          |
|    time_elapsed         | 2257        |
|    total_timesteps      | 145000      |
| train/                  |             |
|    approx_kl            | 0.001589483 |
|    clip_fraction        | 0.0167      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.1       |
|    explained_variance   | -0.551      |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0566      |
|    n_updates            | 560         |
|    policy_gradient_loss | -0.00432    |
|    std                  | 0.055       |
|    value_loss           | 0.0722      |
-----------------------------------------
Eval num_timesteps=150000, episode_reward=0.71 +/- 0.00
Episode length: 5.00

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.688        |
| time/                   |              |
|    fps                  | 64           |
|    iterations           | 38           |
|    time_elapsed         | 2947         |
|    total_timesteps      | 190000       |
| train/                  |              |
|    approx_kl            | 0.0015840696 |
|    clip_fraction        | 0.0246       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -0.297       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0536       |
|    n_updates            | 740          |
|    policy_gradient_loss | -0.00481     |
|    std                  | 0.055        |
|    value_loss           | 0.0585       |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.695        |
| time/                   |              |
|    fps                  | 64           |
|    iterations           | 47           |
|    time_elapsed         | 3638         |
|    total_timesteps      | 235000       |
| train/                  |              |
|    approx_kl            | 0.0015137998 |
|    clip_fraction        | 0.0225       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -0.0429      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0554       |
|    n_updates            | 920          |
|    policy_gradient_loss | -0.00423     |
|    std                  | 0.055        |
|    value_loss           | 0.0472       |
------------------------------------------
------------------------------------------
| rollout/ 

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.697      |
| time/                   |            |
|    fps                  | 64         |
|    iterations           | 56         |
|    time_elapsed         | 4315       |
|    total_timesteps      | 280000     |
| train/                  |            |
|    approx_kl            | 0.00121024 |
|    clip_fraction        | 0.00836    |
|    clip_range           | 0.1        |
|    entropy_loss         | -31.1      |
|    explained_variance   | 0.144      |
|    learning_rate        | 1e-06      |
|    loss                 | 0.041      |
|    n_updates            | 1100       |
|    policy_gradient_loss | -0.00342   |
|    std                  | 0.055      |
|    value_loss           | 0.0388     |
----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.7         |
| time/                   |             |
|    fps                  | 65          |
|    iterations           | 65          |
|    time_elapsed         | 4979        |
|    total_timesteps      | 325000      |
| train/                  |             |
|    approx_kl            | 0.001414563 |
|    clip_fraction        | 0.0197      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.1       |
|    explained_variance   | 0.321       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0377      |
|    n_updates            | 1280        |
|    policy_gradient_loss | -0.00399    |
|    std                  | 0.055       |
|    value_loss           | 0.0309      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.7        |
| time/                   |            |
|    fps                  | 65         |
|    iterations           | 74         |
|    time_elapsed         | 5641       |
|    total_timesteps      | 370000     |
| train/                  |            |
|    approx_kl            | 0.00154742 |
|    clip_fraction        | 0.0187     |
|    clip_range           | 0.1        |
|    entropy_loss         | -31.1      |
|    explained_variance   | 0.396      |
|    learning_rate        | 1e-06      |
|    loss                 | 0.0345     |
|    n_updates            | 1460       |
|    policy_gradient_loss | -0.00409   |
|    std                  | 0.055      |
|    value_loss           | 0.0278     |
----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.703        |
| time/                   |              |
|    fps                  | 65           |
|    iterations           | 83           |
|    time_elapsed         | 6305         |
|    total_timesteps      | 415000       |
| train/                  |              |
|    approx_kl            | 0.0012035569 |
|    clip_fraction        | 0.00982      |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.491        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0334       |
|    n_updates            | 1640         |
|    policy_gradient_loss | -0.0039      |
|    std                  | 0.055        |
|    value_loss           | 0.0237       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.706        |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 92           |
|    time_elapsed         | 6968         |
|    total_timesteps      | 460000       |
| train/                  |              |
|    approx_kl            | 0.0013234223 |
|    clip_fraction        | 0.0199       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.565        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0313       |
|    n_updates            | 1820         |
|    policy_gradient_loss | -0.00426     |
|    std                  | 0.055        |
|    value_loss           | 0.0205       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.722        |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 101          |
|    time_elapsed         | 7632         |
|    total_timesteps      | 505000       |
| train/                  |              |
|    approx_kl            | 0.0013568641 |
|    clip_fraction        | 0.0241       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.598        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0345       |
|    n_updates            | 2000         |
|    policy_gradient_loss | -0.00419     |
|    std                  | 0.055        |
|    value_loss           | 0.0192       |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=550000, episode_reward=0.79 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.788        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.72         |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 110          |
|    time_elapsed         | 8297         |
|    total_timesteps      | 550000       |
| train/                  |              |
|    approx_kl            | 0.0015113758 |
|    clip_fraction        | 0.0272       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.665        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0351       |
|    n_updates            | 2180

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.73         |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 119          |
|    time_elapsed         | 8941         |
|    total_timesteps      | 595000       |
| train/                  |              |
|    approx_kl            | 0.0017384435 |
|    clip_fraction        | 0.035        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.697        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0318       |
|    n_updates            | 2360         |
|    policy_gradient_loss | -0.00441     |
|    std                  | 0.055        |
|    value_loss           | 0.015        |
------------------------------------------
Eval num_timesteps=600000, episode_reward=0.79 +/- 0.0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.74         |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 128          |
|    time_elapsed         | 9605         |
|    total_timesteps      | 640000       |
| train/                  |              |
|    approx_kl            | 0.0015913086 |
|    clip_fraction        | 0.0293       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.741        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0308       |
|    n_updates            | 2540         |
|    policy_gradient_loss | -0.00412     |
|    std                  | 0.055        |
|    value_loss           | 0.013        |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.739        |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 137          |
|    time_elapsed         | 10266        |
|    total_timesteps      | 685000       |
| train/                  |              |
|    approx_kl            | 0.0014633392 |
|    clip_fraction        | 0.0265       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.764        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0296       |
|    n_updates            | 2720         |
|    policy_gradient_loss | -0.00384     |
|    std                  | 0.055        |
|    value_loss           | 0.0122       |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.753       |
| time/                   |             |
|    fps                  | 66          |
|    iterations           | 146         |
|    time_elapsed         | 10929       |
|    total_timesteps      | 730000      |
| train/                  |             |
|    approx_kl            | 0.001482404 |
|    clip_fraction        | 0.0277      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.1       |
|    explained_variance   | 0.787       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0323      |
|    n_updates            | 2900        |
|    policy_gradient_loss | -0.00395    |
|    std                  | 0.055       |
|    value_loss           | 0.0111      |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.755        |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 155          |
|    time_elapsed         | 11593        |
|    total_timesteps      | 775000       |
| train/                  |              |
|    approx_kl            | 0.0013273961 |
|    clip_fraction        | 0.0244       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.807        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0281       |
|    n_updates            | 3080         |
|    policy_gradient_loss | -0.00357     |
|    std                  | 0.055        |
|    value_loss           | 0.0102       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.755        |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 164          |
|    time_elapsed         | 12258        |
|    total_timesteps      | 820000       |
| train/                  |              |
|    approx_kl            | 0.0015714129 |
|    clip_fraction        | 0.0482       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.821        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 3260         |
|    policy_gradient_loss | -0.00468     |
|    std                  | 0.055        |
|    value_loss           | 0.00961      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.766        |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 173          |
|    time_elapsed         | 12922        |
|    total_timesteps      | 865000       |
| train/                  |              |
|    approx_kl            | 0.0014730192 |
|    clip_fraction        | 0.0336       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.836        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.026        |
|    n_updates            | 3440         |
|    policy_gradient_loss | -0.00405     |
|    std                  | 0.055        |
|    value_loss           | 0.00889      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.77         |
| time/                   |              |
|    fps                  | 66           |
|    iterations           | 182          |
|    time_elapsed         | 13587        |
|    total_timesteps      | 910000       |
| train/                  |              |
|    approx_kl            | 0.0014286721 |
|    clip_fraction        | 0.0319       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.848        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 3620         |
|    policy_gradient_loss | -0.00336     |
|    std                  | 0.0549       |
|    value_loss           | 0.00834      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.772        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 191          |
|    time_elapsed         | 14251        |
|    total_timesteps      | 955000       |
| train/                  |              |
|    approx_kl            | 0.0013908825 |
|    clip_fraction        | 0.0324       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.859        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.034        |
|    n_updates            | 3800         |
|    policy_gradient_loss | -0.00383     |
|    std                  | 0.0549       |
|    value_loss           | 0.00786      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=1000000, episode_reward=0.80 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.799        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.771        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 200          |
|    time_elapsed         | 14912        |
|    total_timesteps      | 1000000      |
| train/                  |              |
|    approx_kl            | 0.0012141409 |
|    clip_fraction        | 0.0268       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.871        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 3980         |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.779        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 209          |
|    time_elapsed         | 15554        |
|    total_timesteps      | 1045000      |
| train/                  |              |
|    approx_kl            | 0.0013743778 |
|    clip_fraction        | 0.0359       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.883        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0306       |
|    n_updates            | 4160         |
|    policy_gradient_loss | -0.00405     |
|    std                  | 0.0549       |
|    value_loss           | 0.00664      |
------------------------------------------
Eval num_timesteps=1050000, episode_reward=0.80 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.778        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 218          |
|    time_elapsed         | 16215        |
|    total_timesteps      | 1090000      |
| train/                  |              |
|    approx_kl            | 0.0010274341 |
|    clip_fraction        | 0.0131       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.884        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0309       |
|    n_updates            | 4340         |
|    policy_gradient_loss | -0.00256     |
|    std                  | 0.0549       |
|    value_loss           | 0.00661      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.782       |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 227         |
|    time_elapsed         | 16875       |
|    total_timesteps      | 1135000     |
| train/                  |             |
|    approx_kl            | 0.001326747 |
|    clip_fraction        | 0.0308      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.2       |
|    explained_variance   | 0.887       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0293      |
|    n_updates            | 4520        |
|    policy_gradient_loss | -0.00358    |
|    std                  | 0.0549      |
|    value_loss           | 0.00648     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.787        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 236          |
|    time_elapsed         | 17538        |
|    total_timesteps      | 1180000      |
| train/                  |              |
|    approx_kl            | 0.0016656254 |
|    clip_fraction        | 0.039        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.898        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0258       |
|    n_updates            | 4700         |
|    policy_gradient_loss | -0.00404     |
|    std                  | 0.0549       |
|    value_loss           | 0.00588      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.784        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 245          |
|    time_elapsed         | 18196        |
|    total_timesteps      | 1225000      |
| train/                  |              |
|    approx_kl            | 0.0014787967 |
|    clip_fraction        | 0.0387       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.9          |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0268       |
|    n_updates            | 4880         |
|    policy_gradient_loss | -0.00409     |
|    std                  | 0.0549       |
|    value_loss           | 0.00581      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.788        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 254          |
|    time_elapsed         | 18854        |
|    total_timesteps      | 1270000      |
| train/                  |              |
|    approx_kl            | 0.0016332044 |
|    clip_fraction        | 0.0461       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.902        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0301       |
|    n_updates            | 5060         |
|    policy_gradient_loss | -0.00399     |
|    std                  | 0.0549       |
|    value_loss           | 0.00572      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.793        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 263          |
|    time_elapsed         | 19513        |
|    total_timesteps      | 1315000      |
| train/                  |              |
|    approx_kl            | 0.0015353799 |
|    clip_fraction        | 0.0254       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.911        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0304       |
|    n_updates            | 5240         |
|    policy_gradient_loss | -0.0031      |
|    std                  | 0.0549       |
|    value_loss           | 0.00525      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.794        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 272          |
|    time_elapsed         | 20171        |
|    total_timesteps      | 1360000      |
| train/                  |              |
|    approx_kl            | 0.0017583054 |
|    clip_fraction        | 0.05         |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.913        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0258       |
|    n_updates            | 5420         |
|    policy_gradient_loss | -0.00434     |
|    std                  | 0.0548       |
|    value_loss           | 0.00514      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.795        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 281          |
|    time_elapsed         | 20833        |
|    total_timesteps      | 1405000      |
| train/                  |              |
|    approx_kl            | 0.0015721684 |
|    clip_fraction        | 0.0351       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.913        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.033        |
|    n_updates            | 5600         |
|    policy_gradient_loss | -0.00376     |
|    std                  | 0.0548       |
|    value_loss           | 0.00512      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=1450000, episode_reward=0.81 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.805        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.793        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 290          |
|    time_elapsed         | 21495        |
|    total_timesteps      | 1450000      |
| train/                  |              |
|    approx_kl            | 0.0015227257 |
|    clip_fraction        | 0.0316       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.916        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0323       |
|    n_updates            | 5780         |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.796        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 299          |
|    time_elapsed         | 22136        |
|    total_timesteps      | 1495000      |
| train/                  |              |
|    approx_kl            | 0.0015006427 |
|    clip_fraction        | 0.0357       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.918        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0311       |
|    n_updates            | 5960         |
|    policy_gradient_loss | -0.0033      |
|    std                  | 0.0548       |
|    value_loss           | 0.00489      |
------------------------------------------
Eval num_timesteps=1500000, episode_reward=0.81 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.794        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 308          |
|    time_elapsed         | 22796        |
|    total_timesteps      | 1540000      |
| train/                  |              |
|    approx_kl            | 0.0013957446 |
|    clip_fraction        | 0.0289       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.922        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0273       |
|    n_updates            | 6140         |
|    policy_gradient_loss | -0.00335     |
|    std                  | 0.0548       |
|    value_loss           | 0.00466      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.796        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 317          |
|    time_elapsed         | 23454        |
|    total_timesteps      | 1585000      |
| train/                  |              |
|    approx_kl            | 0.0015028995 |
|    clip_fraction        | 0.0337       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.926        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0293       |
|    n_updates            | 6320         |
|    policy_gradient_loss | -0.00337     |
|    std                  | 0.0548       |
|    value_loss           | 0.00443      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.803        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 326          |
|    time_elapsed         | 24113        |
|    total_timesteps      | 1630000      |
| train/                  |              |
|    approx_kl            | 0.0015922948 |
|    clip_fraction        | 0.0371       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.926        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0261       |
|    n_updates            | 6500         |
|    policy_gradient_loss | -0.00352     |
|    std                  | 0.0548       |
|    value_loss           | 0.00444      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.802        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 335          |
|    time_elapsed         | 24772        |
|    total_timesteps      | 1675000      |
| train/                  |              |
|    approx_kl            | 0.0012913556 |
|    clip_fraction        | 0.0226       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.93         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0249       |
|    n_updates            | 6680         |
|    policy_gradient_loss | -0.00295     |
|    std                  | 0.0548       |
|    value_loss           | 0.00423      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.805        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 344          |
|    time_elapsed         | 25432        |
|    total_timesteps      | 1720000      |
| train/                  |              |
|    approx_kl            | 0.0014031277 |
|    clip_fraction        | 0.036        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.933        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0272       |
|    n_updates            | 6860         |
|    policy_gradient_loss | -0.0034      |
|    std                  | 0.0548       |
|    value_loss           | 0.00406      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.804        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 353          |
|    time_elapsed         | 26092        |
|    total_timesteps      | 1765000      |
| train/                  |              |
|    approx_kl            | 0.0015525016 |
|    clip_fraction        | 0.0306       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.936        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0249       |
|    n_updates            | 7040         |
|    policy_gradient_loss | -0.00326     |
|    std                  | 0.0548       |
|    value_loss           | 0.00387      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.809        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 362          |
|    time_elapsed         | 26751        |
|    total_timesteps      | 1810000      |
| train/                  |              |
|    approx_kl            | 0.0011894618 |
|    clip_fraction        | 0.0221       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.936        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0291       |
|    n_updates            | 7220         |
|    policy_gradient_loss | -0.00246     |
|    std                  | 0.0548       |
|    value_loss           | 0.00388      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.808        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 371          |
|    time_elapsed         | 27412        |
|    total_timesteps      | 1855000      |
| train/                  |              |
|    approx_kl            | 0.0015395621 |
|    clip_fraction        | 0.0436       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.937        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.025        |
|    n_updates            | 7400         |
|    policy_gradient_loss | -0.00373     |
|    std                  | 0.0548       |
|    value_loss           | 0.00383      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=1900000, episode_reward=0.82 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.817        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.807        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 380          |
|    time_elapsed         | 28073        |
|    total_timesteps      | 1900000      |
| train/                  |              |
|    approx_kl            | 0.0013601233 |
|    clip_fraction        | 0.0355       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.936        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0284       |
|    n_updates            | 758

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.809        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 389          |
|    time_elapsed         | 28715        |
|    total_timesteps      | 1945000      |
| train/                  |              |
|    approx_kl            | 0.0014370783 |
|    clip_fraction        | 0.0375       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.941        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.026        |
|    n_updates            | 7760         |
|    policy_gradient_loss | -0.00392     |
|    std                  | 0.0547       |
|    value_loss           | 0.00359      |
------------------------------------------
Eval num_timesteps=1950000, episode_reward=0.82 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.808        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 398          |
|    time_elapsed         | 29379        |
|    total_timesteps      | 1990000      |
| train/                  |              |
|    approx_kl            | 0.0016062723 |
|    clip_fraction        | 0.0344       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.943        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0252       |
|    n_updates            | 7940         |
|    policy_gradient_loss | -0.00356     |
|    std                  | 0.0547       |
|    value_loss           | 0.0035       |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.81        |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 407         |
|    time_elapsed         | 30041       |
|    total_timesteps      | 2035000     |
| train/                  |             |
|    approx_kl            | 0.001574795 |
|    clip_fraction        | 0.0489      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.2       |
|    explained_variance   | 0.945       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0273      |
|    n_updates            | 8120        |
|    policy_gradient_loss | -0.00427    |
|    std                  | 0.0547      |
|    value_loss           | 0.00335     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.814        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 416          |
|    time_elapsed         | 30703        |
|    total_timesteps      | 2080000      |
| train/                  |              |
|    approx_kl            | 0.0015093663 |
|    clip_fraction        | 0.0344       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.947        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0322       |
|    n_updates            | 8300         |
|    policy_gradient_loss | -0.00372     |
|    std                  | 0.0547       |
|    value_loss           | 0.00325      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.815        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 425          |
|    time_elapsed         | 31367        |
|    total_timesteps      | 2125000      |
| train/                  |              |
|    approx_kl            | 0.0016931582 |
|    clip_fraction        | 0.0397       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.948        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0235       |
|    n_updates            | 8480         |
|    policy_gradient_loss | -0.00384     |
|    std                  | 0.0547       |
|    value_loss           | 0.00323      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.817        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 434          |
|    time_elapsed         | 32030        |
|    total_timesteps      | 2170000      |
| train/                  |              |
|    approx_kl            | 0.0017058376 |
|    clip_fraction        | 0.059        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.95         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0277       |
|    n_updates            | 8660         |
|    policy_gradient_loss | -0.0044      |
|    std                  | 0.0547       |
|    value_loss           | 0.00308      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.818        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 443          |
|    time_elapsed         | 32694        |
|    total_timesteps      | 2215000      |
| train/                  |              |
|    approx_kl            | 0.0014014794 |
|    clip_fraction        | 0.0362       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.949        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0284       |
|    n_updates            | 8840         |
|    policy_gradient_loss | -0.00363     |
|    std                  | 0.0547       |
|    value_loss           | 0.00316      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.82         |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 452          |
|    time_elapsed         | 33357        |
|    total_timesteps      | 2260000      |
| train/                  |              |
|    approx_kl            | 0.0012275126 |
|    clip_fraction        | 0.0212       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.952        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0261       |
|    n_updates            | 9020         |
|    policy_gradient_loss | -0.00309     |
|    std                  | 0.0547       |
|    value_loss           | 0.00301      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.821        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 461          |
|    time_elapsed         | 34021        |
|    total_timesteps      | 2305000      |
| train/                  |              |
|    approx_kl            | 0.0013877181 |
|    clip_fraction        | 0.026        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.953        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0267       |
|    n_updates            | 9200         |
|    policy_gradient_loss | -0.00303     |
|    std                  | 0.0547       |
|    value_loss           | 0.00294      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=2350000, episode_reward=0.83 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.834        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.82         |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 470          |
|    time_elapsed         | 34683        |
|    total_timesteps      | 2350000      |
| train/                  |              |
|    approx_kl            | 0.0016960064 |
|    clip_fraction        | 0.039        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.952        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0278       |
|    n_updates            | 938

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.822        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 479          |
|    time_elapsed         | 35327        |
|    total_timesteps      | 2395000      |
| train/                  |              |
|    approx_kl            | 0.0012852203 |
|    clip_fraction        | 0.0324       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.954        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0255       |
|    n_updates            | 9560         |
|    policy_gradient_loss | -0.00345     |
|    std                  | 0.0547       |
|    value_loss           | 0.00289      |
------------------------------------------
Eval num_timesteps=2400000, episode_reward=0.83 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.821        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 488          |
|    time_elapsed         | 35989        |
|    total_timesteps      | 2440000      |
| train/                  |              |
|    approx_kl            | 0.0015340155 |
|    clip_fraction        | 0.0422       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.954        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0248       |
|    n_updates            | 9740         |
|    policy_gradient_loss | -0.00377     |
|    std                  | 0.0547       |
|    value_loss           | 0.00291      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.821        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 497          |
|    time_elapsed         | 36654        |
|    total_timesteps      | 2485000      |
| train/                  |              |
|    approx_kl            | 0.0016552578 |
|    clip_fraction        | 0.0425       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.955        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0249       |
|    n_updates            | 9920         |
|    policy_gradient_loss | -0.00396     |
|    std                  | 0.0546       |
|    value_loss           | 0.00283      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.824        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 506          |
|    time_elapsed         | 37317        |
|    total_timesteps      | 2530000      |
| train/                  |              |
|    approx_kl            | 0.0014277957 |
|    clip_fraction        | 0.044        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.955        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0282       |
|    n_updates            | 10100        |
|    policy_gradient_loss | -0.00372     |
|    std                  | 0.0546       |
|    value_loss           | 0.00287      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.827        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 515          |
|    time_elapsed         | 37980        |
|    total_timesteps      | 2575000      |
| train/                  |              |
|    approx_kl            | 0.0015881058 |
|    clip_fraction        | 0.0385       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.957        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0274       |
|    n_updates            | 10280        |
|    policy_gradient_loss | -0.00361     |
|    std                  | 0.0546       |
|    value_loss           | 0.0027       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.828        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 524          |
|    time_elapsed         | 38642        |
|    total_timesteps      | 2620000      |
| train/                  |              |
|    approx_kl            | 0.0012745841 |
|    clip_fraction        | 0.0325       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.956        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 10460        |
|    policy_gradient_loss | -0.00322     |
|    std                  | 0.0546       |
|    value_loss           | 0.00279      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.831        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 533          |
|    time_elapsed         | 39304        |
|    total_timesteps      | 2665000      |
| train/                  |              |
|    approx_kl            | 0.0018366419 |
|    clip_fraction        | 0.0553       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.958        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0271       |
|    n_updates            | 10640        |
|    policy_gradient_loss | -0.00422     |
|    std                  | 0.0546       |
|    value_loss           | 0.00266      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.83        |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 542         |
|    time_elapsed         | 39966       |
|    total_timesteps      | 2710000     |
| train/                  |             |
|    approx_kl            | 0.001737878 |
|    clip_fraction        | 0.0487      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.957       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0212      |
|    n_updates            | 10820       |
|    policy_gradient_loss | -0.00408    |
|    std                  | 0.0546      |
|    value_loss           | 0.00273     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.831        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 551          |
|    time_elapsed         | 40629        |
|    total_timesteps      | 2755000      |
| train/                  |              |
|    approx_kl            | 0.0017019728 |
|    clip_fraction        | 0.0458       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.958        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0327       |
|    n_updates            | 11000        |
|    policy_gradient_loss | -0.00375     |
|    std                  | 0.0546       |
|    value_loss           | 0.00265      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=2800000, episode_reward=0.84 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.843        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.828        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 560          |
|    time_elapsed         | 41292        |
|    total_timesteps      | 2800000      |
| train/                  |              |
|    approx_kl            | 0.0013904788 |
|    clip_fraction        | 0.0366       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.961        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0248       |
|    n_updates            | 111

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.832        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 569          |
|    time_elapsed         | 41937        |
|    total_timesteps      | 2845000      |
| train/                  |              |
|    approx_kl            | 0.0013019366 |
|    clip_fraction        | 0.0323       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.961        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 11360        |
|    policy_gradient_loss | -0.00362     |
|    std                  | 0.0546       |
|    value_loss           | 0.00251      |
------------------------------------------
Eval num_timesteps=2850000, episode_reward=0.84 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.831        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 578          |
|    time_elapsed         | 42601        |
|    total_timesteps      | 2890000      |
| train/                  |              |
|    approx_kl            | 0.0014469854 |
|    clip_fraction        | 0.037        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.961        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0267       |
|    n_updates            | 11540        |
|    policy_gradient_loss | -0.00363     |
|    std                  | 0.0546       |
|    value_loss           | 0.0025       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.834        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 587          |
|    time_elapsed         | 43268        |
|    total_timesteps      | 2935000      |
| train/                  |              |
|    approx_kl            | 0.0015956595 |
|    clip_fraction        | 0.0446       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.962        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0311       |
|    n_updates            | 11720        |
|    policy_gradient_loss | -0.00415     |
|    std                  | 0.0546       |
|    value_loss           | 0.00246      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.836       |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 596         |
|    time_elapsed         | 43933       |
|    total_timesteps      | 2980000     |
| train/                  |             |
|    approx_kl            | 0.001614998 |
|    clip_fraction        | 0.0387      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.963       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0289      |
|    n_updates            | 11900       |
|    policy_gradient_loss | -0.00351    |
|    std                  | 0.0546      |
|    value_loss           | 0.0024      |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.834        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 605          |
|    time_elapsed         | 44597        |
|    total_timesteps      | 3025000      |
| train/                  |              |
|    approx_kl            | 0.0016122039 |
|    clip_fraction        | 0.0421       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.965        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0271       |
|    n_updates            | 12080        |
|    policy_gradient_loss | -0.00383     |
|    std                  | 0.0546       |
|    value_loss           | 0.00228      |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.836       |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 614         |
|    time_elapsed         | 45258       |
|    total_timesteps      | 3070000     |
| train/                  |             |
|    approx_kl            | 0.001396445 |
|    clip_fraction        | 0.0382      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.964       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0265      |
|    n_updates            | 12260       |
|    policy_gradient_loss | -0.00346    |
|    std                  | 0.0545      |
|    value_loss           | 0.00232     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.835        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 623          |
|    time_elapsed         | 45917        |
|    total_timesteps      | 3115000      |
| train/                  |              |
|    approx_kl            | 0.0015497714 |
|    clip_fraction        | 0.0292       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.965        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0267       |
|    n_updates            | 12440        |
|    policy_gradient_loss | -0.00326     |
|    std                  | 0.0545       |
|    value_loss           | 0.00227      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.837        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 632          |
|    time_elapsed         | 46573        |
|    total_timesteps      | 3160000      |
| train/                  |              |
|    approx_kl            | 0.0012766588 |
|    clip_fraction        | 0.0257       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0294       |
|    n_updates            | 12620        |
|    policy_gradient_loss | -0.00283     |
|    std                  | 0.0545       |
|    value_loss           | 0.00209      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.839        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 641          |
|    time_elapsed         | 47230        |
|    total_timesteps      | 3205000      |
| train/                  |              |
|    approx_kl            | 0.0013546847 |
|    clip_fraction        | 0.0425       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.967        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0277       |
|    n_updates            | 12800        |
|    policy_gradient_loss | -0.00368     |
|    std                  | 0.0545       |
|    value_loss           | 0.00213      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=3250000, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.846        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.837        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 650          |
|    time_elapsed         | 47885        |
|    total_timesteps      | 3250000      |
| train/                  |              |
|    approx_kl            | 0.0014887048 |
|    clip_fraction        | 0.0438       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0285       |
|    n_updates            | 129

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.839        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 659          |
|    time_elapsed         | 48520        |
|    total_timesteps      | 3295000      |
| train/                  |              |
|    approx_kl            | 0.0011248881 |
|    clip_fraction        | 0.0179       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0276       |
|    n_updates            | 13160        |
|    policy_gradient_loss | -0.00256     |
|    std                  | 0.0545       |
|    value_loss           | 0.00204      |
------------------------------------------
Eval num_timesteps=3300000, episode_reward=0.85 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.838        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 668          |
|    time_elapsed         | 49177        |
|    total_timesteps      | 3340000      |
| train/                  |              |
|    approx_kl            | 0.0015302441 |
|    clip_fraction        | 0.0376       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0244       |
|    n_updates            | 13340        |
|    policy_gradient_loss | -0.00357     |
|    std                  | 0.0545       |
|    value_loss           | 0.00207      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.839        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 677          |
|    time_elapsed         | 49834        |
|    total_timesteps      | 3385000      |
| train/                  |              |
|    approx_kl            | 0.0015441413 |
|    clip_fraction        | 0.0293       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0272       |
|    n_updates            | 13520        |
|    policy_gradient_loss | -0.00325     |
|    std                  | 0.0545       |
|    value_loss           | 0.00212      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.84         |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 686          |
|    time_elapsed         | 50490        |
|    total_timesteps      | 3430000      |
| train/                  |              |
|    approx_kl            | 0.0015983217 |
|    clip_fraction        | 0.0398       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0273       |
|    n_updates            | 13700        |
|    policy_gradient_loss | -0.00354     |
|    std                  | 0.0545       |
|    value_loss           | 0.00209      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.842        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 695          |
|    time_elapsed         | 51147        |
|    total_timesteps      | 3475000      |
| train/                  |              |
|    approx_kl            | 0.0010949832 |
|    clip_fraction        | 0.025        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.97         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0319       |
|    n_updates            | 13880        |
|    policy_gradient_loss | -0.00268     |
|    std                  | 0.0545       |
|    value_loss           | 0.00196      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.84        |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 704         |
|    time_elapsed         | 51804       |
|    total_timesteps      | 3520000     |
| train/                  |             |
|    approx_kl            | 0.001750167 |
|    clip_fraction        | 0.0406      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.972       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0313      |
|    n_updates            | 14060       |
|    policy_gradient_loss | -0.00317    |
|    std                  | 0.0545      |
|    value_loss           | 0.00184     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.84         |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 713          |
|    time_elapsed         | 52459        |
|    total_timesteps      | 3565000      |
| train/                  |              |
|    approx_kl            | 0.0014777293 |
|    clip_fraction        | 0.0416       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.97         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0286       |
|    n_updates            | 14240        |
|    policy_gradient_loss | -0.00358     |
|    std                  | 0.0545       |
|    value_loss           | 0.00199      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.84         |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 722          |
|    time_elapsed         | 53116        |
|    total_timesteps      | 3610000      |
| train/                  |              |
|    approx_kl            | 0.0014039314 |
|    clip_fraction        | 0.0299       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.97         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0259       |
|    n_updates            | 14420        |
|    policy_gradient_loss | -0.00283     |
|    std                  | 0.0545       |
|    value_loss           | 0.00194      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.842        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 731          |
|    time_elapsed         | 53772        |
|    total_timesteps      | 3655000      |
| train/                  |              |
|    approx_kl            | 0.0016750663 |
|    clip_fraction        | 0.0478       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.971        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.027        |
|    n_updates            | 14600        |
|    policy_gradient_loss | -0.00415     |
|    std                  | 0.0544       |
|    value_loss           | 0.00189      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=3700000, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.85         |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.843        |
| time/                   |              |
|    fps                  | 67           |
|    iterations           | 740          |
|    time_elapsed         | 54429        |
|    total_timesteps      | 3700000      |
| train/                  |              |
|    approx_kl            | 0.0014862424 |
|    clip_fraction        | 0.0369       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.973        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.025        |
|    n_updates            | 147

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.843        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 749          |
|    time_elapsed         | 55066        |
|    total_timesteps      | 3745000      |
| train/                  |              |
|    approx_kl            | 0.0015693343 |
|    clip_fraction        | 0.0518       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.973        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0243       |
|    n_updates            | 14960        |
|    policy_gradient_loss | -0.00416     |
|    std                  | 0.0544       |
|    value_loss           | 0.00178      |
------------------------------------------
Eval num_timesteps=3750000, episode_reward=0.85 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 758          |
|    time_elapsed         | 55724        |
|    total_timesteps      | 3790000      |
| train/                  |              |
|    approx_kl            | 0.0013902729 |
|    clip_fraction        | 0.0312       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.973        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0264       |
|    n_updates            | 15140        |
|    policy_gradient_loss | -0.00352     |
|    std                  | 0.0544       |
|    value_loss           | 0.00181      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 767          |
|    time_elapsed         | 56379        |
|    total_timesteps      | 3835000      |
| train/                  |              |
|    approx_kl            | 0.0014958249 |
|    clip_fraction        | 0.0509       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.974        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.031        |
|    n_updates            | 15320        |
|    policy_gradient_loss | -0.00441     |
|    std                  | 0.0544       |
|    value_loss           | 0.00175      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 776          |
|    time_elapsed         | 57036        |
|    total_timesteps      | 3880000      |
| train/                  |              |
|    approx_kl            | 0.0015072465 |
|    clip_fraction        | 0.0334       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.973        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0268       |
|    n_updates            | 15500        |
|    policy_gradient_loss | -0.00329     |
|    std                  | 0.0544       |
|    value_loss           | 0.00176      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 785          |
|    time_elapsed         | 57694        |
|    total_timesteps      | 3925000      |
| train/                  |              |
|    approx_kl            | 0.0013515549 |
|    clip_fraction        | 0.0345       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.974        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0286       |
|    n_updates            | 15680        |
|    policy_gradient_loss | -0.00292     |
|    std                  | 0.0544       |
|    value_loss           | 0.00175      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.846        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 794          |
|    time_elapsed         | 58351        |
|    total_timesteps      | 3970000      |
| train/                  |              |
|    approx_kl            | 0.0014208092 |
|    clip_fraction        | 0.0475       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.973        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0266       |
|    n_updates            | 15860        |
|    policy_gradient_loss | -0.00406     |
|    std                  | 0.0544       |
|    value_loss           | 0.00176      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.842        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 803          |
|    time_elapsed         | 59009        |
|    total_timesteps      | 4015000      |
| train/                  |              |
|    approx_kl            | 0.0015317335 |
|    clip_fraction        | 0.0389       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.974        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0235       |
|    n_updates            | 16040        |
|    policy_gradient_loss | -0.0033      |
|    std                  | 0.0544       |
|    value_loss           | 0.00174      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 812          |
|    time_elapsed         | 59666        |
|    total_timesteps      | 4060000      |
| train/                  |              |
|    approx_kl            | 0.0013803331 |
|    clip_fraction        | 0.0346       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.975        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0301       |
|    n_updates            | 16220        |
|    policy_gradient_loss | -0.00327     |
|    std                  | 0.0544       |
|    value_loss           | 0.00168      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 821          |
|    time_elapsed         | 60324        |
|    total_timesteps      | 4105000      |
| train/                  |              |
|    approx_kl            | 0.0016107017 |
|    clip_fraction        | 0.0529       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.975        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0204       |
|    n_updates            | 16400        |
|    policy_gradient_loss | -0.00424     |
|    std                  | 0.0544       |
|    value_loss           | 0.00163      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=4150000, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.853        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 830          |
|    time_elapsed         | 60980        |
|    total_timesteps      | 4150000      |
| train/                  |              |
|    approx_kl            | 0.0013594363 |
|    clip_fraction        | 0.0259       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.975        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0292       |
|    n_updates            | 16580        |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 839          |
|    time_elapsed         | 61617        |
|    total_timesteps      | 4195000      |
| train/                  |              |
|    approx_kl            | 0.0012860068 |
|    clip_fraction        | 0.0288       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.976        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0311       |
|    n_updates            | 16760        |
|    policy_gradient_loss | -0.00312     |
|    std                  | 0.0544       |
|    value_loss           | 0.00159      |
------------------------------------------
Eval num_timesteps=4200000, episode_reward=0.85 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 848          |
|    time_elapsed         | 62276        |
|    total_timesteps      | 4240000      |
| train/                  |              |
|    approx_kl            | 0.0016437289 |
|    clip_fraction        | 0.0431       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.976        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0293       |
|    n_updates            | 16940        |
|    policy_gradient_loss | -0.00358     |
|    std                  | 0.0544       |
|    value_loss           | 0.00161      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 857          |
|    time_elapsed         | 62935        |
|    total_timesteps      | 4285000      |
| train/                  |              |
|    approx_kl            | 0.0014802166 |
|    clip_fraction        | 0.0382       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.976        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.023        |
|    n_updates            | 17120        |
|    policy_gradient_loss | -0.00325     |
|    std                  | 0.0544       |
|    value_loss           | 0.00157      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 866          |
|    time_elapsed         | 63599        |
|    total_timesteps      | 4330000      |
| train/                  |              |
|    approx_kl            | 0.0013553415 |
|    clip_fraction        | 0.0329       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.977        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 17300        |
|    policy_gradient_loss | -0.00311     |
|    std                  | 0.0544       |
|    value_loss           | 0.00152      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.846        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 875          |
|    time_elapsed         | 64264        |
|    total_timesteps      | 4375000      |
| train/                  |              |
|    approx_kl            | 0.0013849032 |
|    clip_fraction        | 0.03         |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.977        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.03         |
|    n_updates            | 17480        |
|    policy_gradient_loss | -0.00286     |
|    std                  | 0.0544       |
|    value_loss           | 0.00154      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 884          |
|    time_elapsed         | 64928        |
|    total_timesteps      | 4420000      |
| train/                  |              |
|    approx_kl            | 0.0014875763 |
|    clip_fraction        | 0.0468       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.977        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0259       |
|    n_updates            | 17660        |
|    policy_gradient_loss | -0.00385     |
|    std                  | 0.0543       |
|    value_loss           | 0.00154      |
------------------------------------------
----------------------------------------
| rollout/   

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.849        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 893          |
|    time_elapsed         | 65594        |
|    total_timesteps      | 4465000      |
| train/                  |              |
|    approx_kl            | 0.0017251879 |
|    clip_fraction        | 0.0524       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.977        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0269       |
|    n_updates            | 17840        |
|    policy_gradient_loss | -0.00372     |
|    std                  | 0.0543       |
|    value_loss           | 0.00155      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 902          |
|    time_elapsed         | 66262        |
|    total_timesteps      | 4510000      |
| train/                  |              |
|    approx_kl            | 0.0012831014 |
|    clip_fraction        | 0.0355       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.976        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 18020        |
|    policy_gradient_loss | -0.00356     |
|    std                  | 0.0543       |
|    value_loss           | 0.0016       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.85         |
| time/                   |              |
|    fps                  | 68           |
|    iterations           | 911          |
|    time_elapsed         | 66928        |
|    total_timesteps      | 4555000      |
| train/                  |              |
|    approx_kl            | 0.0016817048 |
|    clip_fraction        | 0.0393       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.978        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0241       |
|    n_updates            | 18200        |
|    policy_gradient_loss | -0.00357     |
|    std                  | 0.0543       |
|    value_loss           | 0.00146      |
------------------------------------------
------------------------------------------
| rollout/ 