In [1]:
# to access functions from root directory
import sys
sys.path.append('/data/ad181/RemoteDir/multilevel_ppo')

In [2]:
%matplotlib notebook
import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt
from copy import copy, deepcopy
from tqdm.notebook import trange, tqdm

import gym
from stable_baselines3.ppo import PPO, MlpPolicy
from stable_baselines3.ppo_multi_level import PPO_ML
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env.subproc_vec_multi_level_env import SubprocVecMultiLevelEnv
from stable_baselines3.common.envs.multi_level_ressim_env import MultiLevelRessimEnv
from stable_baselines3.common.logger import configure

from utils.custom_eval_callback import CustomEvalCallback, CustomEvalCallbackParallel
from utils.plot_functions import plot_learning
from utils.env_evaluate_functions import eval_actions

In [3]:
seed=1
case='ppo_2l'
data_dir='./data'
log_dir='./data/'+case

In [4]:
os.makedirs(data_dir, exist_ok=True)
os.makedirs(log_dir, exist_ok=True)

In [5]:
with open('../envs_params/env_data_v1/env_train_dict.pkl', 'rb') as input:
    env_ck_dict = pickle.load(input)

In [6]:
# generate dictionaries for env (env_dict_), n_steps (T_ml) and minibatch (M_ml) for `n_level`s
n_levels=2
fine_level = len(env_ck_dict)
env_dict_ = {}
for i,l in enumerate(range(fine_level-n_levels, fine_level)):
    print(i+1,'->',l+1)
    env_dict_[i+1] = env_ck_dict[l+1]

1 -> 3
2 -> 4


In [7]:
for seed in range(2,4):
    if True: 
        print(f'seed {seed}')
        log_dir = './data/'+case+'/seed_'+str(seed)
        os.makedirs(log_dir, exist_ok=True)
        T = {1:140, 2:15} # n_steps
        N = 50 # number of actors
        M = {1:700, 2:75} # minibatch size
        I = 1200 # number of iterations
        K = 20 # number of epochs
        
        log_interval = I/120
        
        fine_level = len(env_dict_)
        
        print('generate callback ...')
        eval_callback = CustomEvalCallback( env_dict_[fine_level], 
                                            best_model_save_path=str(log_dir)+'/best_model', 
                                            n_eval_episodes=1,
                                            log_path=str(log_dir)+'/results_eval', 
                                            eval_freq=log_interval*sum(T.values()) )
        
        print('vectorize environment ...')
        
        # generate PPO_ML parameters for MLMC analysis. 
        env_dict = {}
        n_steps_dict = {}
        batch_size_dict = {}
        for env, level in zip(env_dict_.values(), env_dict_.keys()):
            print(f"vectorize env level {level}")
            env_dict[level] = make_vec_env( MultiLevelRessimEnv, 
                                    n_envs=N, 
                                    seed=seed, 
                                    env_kwargs= {"ressim_params":env.ressim_params, "level":env.level}, 
                                    vec_env_cls=SubprocVecMultiLevelEnv )
            n_steps_dict[level] = T[level]
            batch_size_dict[level] = M[level]
        
        print(env_dict_[level].observation_space)
        print('model definition ..')
        model = PPO_ML(policy=MlpPolicy,
                           env=env_dict,
                           learning_rate = 1e-6,
                           n_steps = n_steps_dict,
                           batch_size = batch_size_dict,
                           n_epochs = K,
                           clip_range = 0.1,
                           ent_coef = 0.001,
                           vf_coef = 0.5,
                           policy_kwargs = dict(net_arch=[70,70,50], log_std_init=-2.9),
                           verbose = 1,
                           seed = seed,
                           target_kl = 0.05,
                           device = "auto")
        # set logger for the model
        new_logger = configure(log_dir)
        model.set_logger(new_logger)
        print('policy learning ..')
        model.learn(total_timesteps=N*sum(T.values())*I, callback=eval_callback)
        model.save(log_dir+'/PPO', exclude=['env_dict'])
        del model
        for level in env_dict.keys():
            env_dict[level].close()


seed 2
generate callback ...
vectorize environment ...
vectorize env level 1
vectorize env level 2
Box(-1.0, 1.0, (35,), float64)
model definition ..
Using cuda device
Logging to ./data/ppo_2l/seed_2
policy learning ..




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5        |
|    ep_rew_mean     | 0.687    |
| time/              |          |
|    fps             | 121      |
|    iterations      | 1        |
|    time_elapsed    | 63       |
|    total_timesteps | 7750     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.674        |
| time/                   |              |
|    fps                  | 135          |
|    iterations           | 2            |
|    time_elapsed         | 114          |
|    total_timesteps      | 15500        |
| train/                  |              |
|    approx_kl            | 0.0011907865 |
|    clip_fraction        | 0.0113       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -3.23        |
|    learning_r

  for j in range(len(p_1)-1):


Eval num_timesteps=77500, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.692        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.686        |
| time/                   |              |
|    fps                  | 146          |
|    iterations           | 10           |
|    time_elapsed         | 530          |
|    total_timesteps      | 77500        |
| train/                  |              |
|    approx_kl            | 0.0012481255 |
|    clip_fraction        | 0.0147       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -1.82        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.098        |
|    n_updates            | 180  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.689        |
| time/                   |              |
|    fps                  | 152          |
|    iterations           | 19           |
|    time_elapsed         | 968          |
|    total_timesteps      | 147250       |
| train/                  |              |
|    approx_kl            | 0.0013719711 |
|    clip_fraction        | 0.0135       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -1.05        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0765       |
|    n_updates            | 360          |
|    policy_gradient_loss | -0.00115     |
|    std                  | 0.055        |
|    value_loss           | 0.104        |
------------------------------------------
Eval num_timesteps=155000, episode_reward=0.71 +/- 0.0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.682        |
| time/                   |              |
|    fps                  | 152          |
|    iterations           | 28           |
|    time_elapsed         | 1418         |
|    total_timesteps      | 217000       |
| train/                  |              |
|    approx_kl            | 0.0010494066 |
|    clip_fraction        | 0.0101       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -0.652       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0595       |
|    n_updates            | 540          |
|    policy_gradient_loss | -0.00196     |
|    std                  | 0.055        |
|    value_loss           | 0.0751       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.689        |
| time/                   |              |
|    fps                  | 153          |
|    iterations           | 37           |
|    time_elapsed         | 1871         |
|    total_timesteps      | 286750       |
| train/                  |              |
|    approx_kl            | 0.0011334454 |
|    clip_fraction        | 0.0131       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -0.294       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0516       |
|    n_updates            | 720          |
|    policy_gradient_loss | -0.00212     |
|    std                  | 0.055        |
|    value_loss           | 0.0579       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.694        |
| time/                   |              |
|    fps                  | 153          |
|    iterations           | 46           |
|    time_elapsed         | 2321         |
|    total_timesteps      | 356500       |
| train/                  |              |
|    approx_kl            | 0.0011652018 |
|    clip_fraction        | 0.0149       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -0.0708      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0533       |
|    n_updates            | 900          |
|    policy_gradient_loss | -0.00187     |
|    std                  | 0.055        |
|    value_loss           | 0.0492       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.695        |
| time/                   |              |
|    fps                  | 153          |
|    iterations           | 55           |
|    time_elapsed         | 2770         |
|    total_timesteps      | 426250       |
| train/                  |              |
|    approx_kl            | 0.0013951347 |
|    clip_fraction        | 0.0175       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.129        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0407       |
|    n_updates            | 1080         |
|    policy_gradient_loss | -0.00172     |
|    std                  | 0.055        |
|    value_loss           | 0.0397       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.7          |
| time/                   |              |
|    fps                  | 154          |
|    iterations           | 64           |
|    time_elapsed         | 3216         |
|    total_timesteps      | 496000       |
| train/                  |              |
|    approx_kl            | 0.0012682877 |
|    clip_fraction        | 0.0143       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.278        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0402       |
|    n_updates            | 1260         |
|    policy_gradient_loss | -0.00196     |
|    std                  | 0.055        |
|    value_loss           | 0.0332       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.702        |
| time/                   |              |
|    fps                  | 154          |
|    iterations           | 73           |
|    time_elapsed         | 3661         |
|    total_timesteps      | 565750       |
| train/                  |              |
|    approx_kl            | 0.0015626797 |
|    clip_fraction        | 0.0212       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.38         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0349       |
|    n_updates            | 1440         |
|    policy_gradient_loss | -0.00176     |
|    std                  | 0.055        |
|    value_loss           | 0.0296       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.695        |
| time/                   |              |
|    fps                  | 154          |
|    iterations           | 82           |
|    time_elapsed         | 4104         |
|    total_timesteps      | 635500       |
| train/                  |              |
|    approx_kl            | 0.0014465288 |
|    clip_fraction        | 0.0258       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.472        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0332       |
|    n_updates            | 1620         |
|    policy_gradient_loss | -0.00146     |
|    std                  | 0.055        |
|    value_loss           | 0.0246       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.711        |
| time/                   |              |
|    fps                  | 155          |
|    iterations           | 91           |
|    time_elapsed         | 4549         |
|    total_timesteps      | 705250       |
| train/                  |              |
|    approx_kl            | 0.0018123412 |
|    clip_fraction        | 0.0297       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.536        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.033        |
|    n_updates            | 1800         |
|    policy_gradient_loss | -0.00123     |
|    std                  | 0.055        |
|    value_loss           | 0.0218       |
------------------------------------------
-------------------------------------------
| rollout/

Eval num_timesteps=775000, episode_reward=0.75 +/- 0.00
Episode length: 5.00 +/- 0.00
----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 5          |
|    mean_reward          | 0.755      |
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.711      |
| time/                   |            |
|    fps                  | 155        |
|    iterations           | 100        |
|    time_elapsed         | 4993       |
|    total_timesteps      | 775000     |
| train/                  |            |
|    approx_kl            | 0.00171436 |
|    clip_fraction        | 0.0325     |
|    clip_range           | 0.1        |
|    entropy_loss         | -31.1      |
|    explained_variance   | 0.594      |
|    learning_rate        | 1e-06      |
|    loss                 | 0.0355     |
|    n_updates            | 1980       |
|    policy_gradient_loss | -0.00134   |
|    std    

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.712        |
| time/                   |              |
|    fps                  | 155          |
|    iterations           | 109          |
|    time_elapsed         | 5418         |
|    total_timesteps      | 844750       |
| train/                  |              |
|    approx_kl            | 0.0014921662 |
|    clip_fraction        | 0.0272       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.628        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0319       |
|    n_updates            | 2160         |
|    policy_gradient_loss | 6.61e-05     |
|    std                  | 0.055        |
|    value_loss           | 0.0175       |
------------------------------------------
Eval num_timesteps=852500, episode_reward=0.75 +/- 0.0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.722        |
| time/                   |              |
|    fps                  | 155          |
|    iterations           | 118          |
|    time_elapsed         | 5863         |
|    total_timesteps      | 914500       |
| train/                  |              |
|    approx_kl            | 0.0017409634 |
|    clip_fraction        | 0.0311       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.663        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0346       |
|    n_updates            | 2340         |
|    policy_gradient_loss | -0.00134     |
|    std                  | 0.055        |
|    value_loss           | 0.0174       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.728        |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 127          |
|    time_elapsed         | 6307         |
|    total_timesteps      | 984250       |
| train/                  |              |
|    approx_kl            | 0.0014803344 |
|    clip_fraction        | 0.0231       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.719        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0308       |
|    n_updates            | 2520         |
|    policy_gradient_loss | -0.00208     |
|    std                  | 0.055        |
|    value_loss           | 0.0142       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.731        |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 136          |
|    time_elapsed         | 6750         |
|    total_timesteps      | 1054000      |
| train/                  |              |
|    approx_kl            | 0.0019716923 |
|    clip_fraction        | 0.0388       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.724        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0291       |
|    n_updates            | 2700         |
|    policy_gradient_loss | -0.0012      |
|    std                  | 0.055        |
|    value_loss           | 0.0139       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.742        |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 145          |
|    time_elapsed         | 7196         |
|    total_timesteps      | 1123750      |
| train/                  |              |
|    approx_kl            | 0.0016642434 |
|    clip_fraction        | 0.0285       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.756        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0295       |
|    n_updates            | 2880         |
|    policy_gradient_loss | -0.000402    |
|    std                  | 0.055        |
|    value_loss           | 0.0126       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.743        |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 154          |
|    time_elapsed         | 7641         |
|    total_timesteps      | 1193500      |
| train/                  |              |
|    approx_kl            | 0.0017236449 |
|    clip_fraction        | 0.034        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.78         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0311       |
|    n_updates            | 3060         |
|    policy_gradient_loss | -0.00155     |
|    std                  | 0.055        |
|    value_loss           | 0.0115       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.749        |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 163          |
|    time_elapsed         | 8089         |
|    total_timesteps      | 1263250      |
| train/                  |              |
|    approx_kl            | 0.0015902935 |
|    clip_fraction        | 0.0245       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.795        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0277       |
|    n_updates            | 3240         |
|    policy_gradient_loss | -0.00157     |
|    std                  | 0.055        |
|    value_loss           | 0.0112       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.754        |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 172          |
|    time_elapsed         | 8537         |
|    total_timesteps      | 1333000      |
| train/                  |              |
|    approx_kl            | 0.0016019975 |
|    clip_fraction        | 0.0349       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.823        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0293       |
|    n_updates            | 3420         |
|    policy_gradient_loss | -0.00182     |
|    std                  | 0.055        |
|    value_loss           | 0.00893      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.76         |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 181          |
|    time_elapsed         | 8977         |
|    total_timesteps      | 1402750      |
| train/                  |              |
|    approx_kl            | 0.0015127531 |
|    clip_fraction        | 0.0289       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.835        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0305       |
|    n_updates            | 3600         |
|    policy_gradient_loss | -0.00146     |
|    std                  | 0.055        |
|    value_loss           | 0.00884      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=1472500, episode_reward=0.78 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.785        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.766        |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 190          |
|    time_elapsed         | 9420         |
|    total_timesteps      | 1472500      |
| train/                  |              |
|    approx_kl            | 0.0015387533 |
|    clip_fraction        | 0.0338       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.848        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0311       |
|    n_updates            | 378

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.767        |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 199          |
|    time_elapsed         | 9846         |
|    total_timesteps      | 1542250      |
| train/                  |              |
|    approx_kl            | 0.0016410666 |
|    clip_fraction        | 0.0344       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.863        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0268       |
|    n_updates            | 3960         |
|    policy_gradient_loss | -0.00167     |
|    std                  | 0.0549       |
|    value_loss           | 0.00823      |
------------------------------------------
Eval num_timesteps=1550000, episode_reward=0.79 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.769        |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 208          |
|    time_elapsed         | 10292        |
|    total_timesteps      | 1612000      |
| train/                  |              |
|    approx_kl            | 0.0015053374 |
|    clip_fraction        | 0.0364       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.869        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0295       |
|    n_updates            | 4140         |
|    policy_gradient_loss | -0.00284     |
|    std                  | 0.0549       |
|    value_loss           | 0.00732      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.773        |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 217          |
|    time_elapsed         | 10731        |
|    total_timesteps      | 1681750      |
| train/                  |              |
|    approx_kl            | 0.0020855556 |
|    clip_fraction        | 0.0559       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.874        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0195       |
|    n_updates            | 4320         |
|    policy_gradient_loss | -0.00122     |
|    std                  | 0.0549       |
|    value_loss           | 0.00693      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.774        |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 226          |
|    time_elapsed         | 11173        |
|    total_timesteps      | 1751500      |
| train/                  |              |
|    approx_kl            | 0.0018178336 |
|    clip_fraction        | 0.052        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.879        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0312       |
|    n_updates            | 4500         |
|    policy_gradient_loss | -0.00246     |
|    std                  | 0.0549       |
|    value_loss           | 0.00688      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.781        |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 235          |
|    time_elapsed         | 11610        |
|    total_timesteps      | 1821250      |
| train/                  |              |
|    approx_kl            | 0.0015387408 |
|    clip_fraction        | 0.0418       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.891        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0263       |
|    n_updates            | 4680         |
|    policy_gradient_loss | -0.00107     |
|    std                  | 0.0549       |
|    value_loss           | 0.00644      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.787        |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 244          |
|    time_elapsed         | 12051        |
|    total_timesteps      | 1891000      |
| train/                  |              |
|    approx_kl            | 0.0011921984 |
|    clip_fraction        | 0.0355       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.899        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0216       |
|    n_updates            | 4860         |
|    policy_gradient_loss | -0.00163     |
|    std                  | 0.0549       |
|    value_loss           | 0.00607      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.787        |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 253          |
|    time_elapsed         | 12492        |
|    total_timesteps      | 1960750      |
| train/                  |              |
|    approx_kl            | 0.0016259605 |
|    clip_fraction        | 0.0391       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.9          |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0296       |
|    n_updates            | 5040         |
|    policy_gradient_loss | -0.00197     |
|    std                  | 0.0549       |
|    value_loss           | 0.00585      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.79         |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 262          |
|    time_elapsed         | 12932        |
|    total_timesteps      | 2030500      |
| train/                  |              |
|    approx_kl            | 0.0012444847 |
|    clip_fraction        | 0.0286       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.909        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0276       |
|    n_updates            | 5220         |
|    policy_gradient_loss | -0.0023      |
|    std                  | 0.0549       |
|    value_loss           | 0.00565      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.793        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 271          |
|    time_elapsed         | 13371        |
|    total_timesteps      | 2100250      |
| train/                  |              |
|    approx_kl            | 0.0013759118 |
|    clip_fraction        | 0.0323       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.913        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0264       |
|    n_updates            | 5400         |
|    policy_gradient_loss | -0.00176     |
|    std                  | 0.0549       |
|    value_loss           | 0.00544      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=2170000, episode_reward=0.82 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.824        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.795        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 280          |
|    time_elapsed         | 13813        |
|    total_timesteps      | 2170000      |
| train/                  |              |
|    approx_kl            | 0.0012747417 |
|    clip_fraction        | 0.0303       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.916        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0257       |
|    n_updates            | 558

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.797       |
| time/                   |             |
|    fps                  | 157         |
|    iterations           | 289         |
|    time_elapsed         | 14230       |
|    total_timesteps      | 2239750     |
| train/                  |             |
|    approx_kl            | 0.001465902 |
|    clip_fraction        | 0.0292      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.2       |
|    explained_variance   | 0.915       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0268      |
|    n_updates            | 5760        |
|    policy_gradient_loss | -0.000911   |
|    std                  | 0.0548      |
|    value_loss           | 0.00538     |
-----------------------------------------
Eval num_timesteps=2247500, episode_reward=0.83 +/- 0.00
Episode length: 5.0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.806        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 298          |
|    time_elapsed         | 14667        |
|    total_timesteps      | 2309500      |
| train/                  |              |
|    approx_kl            | 0.0014071146 |
|    clip_fraction        | 0.0338       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.921        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 5940         |
|    policy_gradient_loss | -0.000136    |
|    std                  | 0.0548       |
|    value_loss           | 0.00483      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.802        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 307          |
|    time_elapsed         | 15103        |
|    total_timesteps      | 2379250      |
| train/                  |              |
|    approx_kl            | 0.0014242047 |
|    clip_fraction        | 0.0317       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.923        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.029        |
|    n_updates            | 6120         |
|    policy_gradient_loss | -0.00117     |
|    std                  | 0.0548       |
|    value_loss           | 0.00464      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.802        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 316          |
|    time_elapsed         | 15539        |
|    total_timesteps      | 2449000      |
| train/                  |              |
|    approx_kl            | 0.0016613132 |
|    clip_fraction        | 0.0378       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.922        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.03         |
|    n_updates            | 6300         |
|    policy_gradient_loss | -0.0016      |
|    std                  | 0.0548       |
|    value_loss           | 0.00508      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.81        |
| time/                   |             |
|    fps                  | 157         |
|    iterations           | 325         |
|    time_elapsed         | 15977       |
|    total_timesteps      | 2518750     |
| train/                  |             |
|    approx_kl            | 0.001712088 |
|    clip_fraction        | 0.0424      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.2       |
|    explained_variance   | 0.926       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0255      |
|    n_updates            | 6480        |
|    policy_gradient_loss | -0.00136    |
|    std                  | 0.0548      |
|    value_loss           | 0.00452     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.809        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 334          |
|    time_elapsed         | 16414        |
|    total_timesteps      | 2588500      |
| train/                  |              |
|    approx_kl            | 0.0014623669 |
|    clip_fraction        | 0.033        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.932        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0297       |
|    n_updates            | 6660         |
|    policy_gradient_loss | -0.00166     |
|    std                  | 0.0548       |
|    value_loss           | 0.00451      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.811        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 343          |
|    time_elapsed         | 16852        |
|    total_timesteps      | 2658250      |
| train/                  |              |
|    approx_kl            | 0.0015035314 |
|    clip_fraction        | 0.0399       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.931        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0303       |
|    n_updates            | 6840         |
|    policy_gradient_loss | -0.00187     |
|    std                  | 0.0548       |
|    value_loss           | 0.00418      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.814        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 352          |
|    time_elapsed         | 17278        |
|    total_timesteps      | 2728000      |
| train/                  |              |
|    approx_kl            | 0.0014416245 |
|    clip_fraction        | 0.0435       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.933        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0272       |
|    n_updates            | 7020         |
|    policy_gradient_loss | -0.000808    |
|    std                  | 0.0548       |
|    value_loss           | 0.00473      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.813        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 361          |
|    time_elapsed         | 17703        |
|    total_timesteps      | 2797750      |
| train/                  |              |
|    approx_kl            | 0.0014969496 |
|    clip_fraction        | 0.0353       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.935        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0287       |
|    n_updates            | 7200         |
|    policy_gradient_loss | -0.00101     |
|    std                  | 0.0547       |
|    value_loss           | 0.00425      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=2867500, episode_reward=0.84 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.838       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.817       |
| time/                   |             |
|    fps                  | 158         |
|    iterations           | 370         |
|    time_elapsed         | 18133       |
|    total_timesteps      | 2867500     |
| train/                  |             |
|    approx_kl            | 0.001411417 |
|    clip_fraction        | 0.0418      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.2       |
|    explained_variance   | 0.933       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0272      |
|    n_updates            | 7380        |
|    policy_gradient_loss | -0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.815        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 379          |
|    time_elapsed         | 18539        |
|    total_timesteps      | 2937250      |
| train/                  |              |
|    approx_kl            | 0.0015048379 |
|    clip_fraction        | 0.0356       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.936        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0296       |
|    n_updates            | 7560         |
|    policy_gradient_loss | -0.00164     |
|    std                  | 0.0547       |
|    value_loss           | 0.00419      |
------------------------------------------
Eval num_timesteps=2945000, episode_reward=0.84 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.822        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 388          |
|    time_elapsed         | 18969        |
|    total_timesteps      | 3007000      |
| train/                  |              |
|    approx_kl            | 0.0013347911 |
|    clip_fraction        | 0.0379       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.938        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0291       |
|    n_updates            | 7740         |
|    policy_gradient_loss | -0.00308     |
|    std                  | 0.0547       |
|    value_loss           | 0.00397      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.818        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 397          |
|    time_elapsed         | 19396        |
|    total_timesteps      | 3076750      |
| train/                  |              |
|    approx_kl            | 0.0017283171 |
|    clip_fraction        | 0.0388       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.938        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0256       |
|    n_updates            | 7920         |
|    policy_gradient_loss | -0.000651    |
|    std                  | 0.0547       |
|    value_loss           | 0.00425      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.822        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 406          |
|    time_elapsed         | 19823        |
|    total_timesteps      | 3146500      |
| train/                  |              |
|    approx_kl            | 0.0015113279 |
|    clip_fraction        | 0.0488       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.939        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0325       |
|    n_updates            | 8100         |
|    policy_gradient_loss | -0.00187     |
|    std                  | 0.0547       |
|    value_loss           | 0.00395      |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.823       |
| time/                   |             |
|    fps                  | 158         |
|    iterations           | 415         |
|    time_elapsed         | 20253       |
|    total_timesteps      | 3216250     |
| train/                  |             |
|    approx_kl            | 0.001210579 |
|    clip_fraction        | 0.0221      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.2       |
|    explained_variance   | 0.944       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0259      |
|    n_updates            | 8280        |
|    policy_gradient_loss | -0.00143    |
|    std                  | 0.0547      |
|    value_loss           | 0.00376     |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.822       |
| time/                   |             |
|    fps                  | 158         |
|    iterations           | 424         |
|    time_elapsed         | 20681       |
|    total_timesteps      | 3286000     |
| train/                  |             |
|    approx_kl            | 0.001228553 |
|    clip_fraction        | 0.0243      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.2       |
|    explained_variance   | 0.943       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0284      |
|    n_updates            | 8460        |
|    policy_gradient_loss | -0.00165    |
|    std                  | 0.0547      |
|    value_loss           | 0.00396     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.824        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 433          |
|    time_elapsed         | 21109        |
|    total_timesteps      | 3355750      |
| train/                  |              |
|    approx_kl            | 0.0012830932 |
|    clip_fraction        | 0.0408       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.945        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0304       |
|    n_updates            | 8640         |
|    policy_gradient_loss | -0.00161     |
|    std                  | 0.0547       |
|    value_loss           | 0.00377      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.824        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 442          |
|    time_elapsed         | 21535        |
|    total_timesteps      | 3425500      |
| train/                  |              |
|    approx_kl            | 0.0016160074 |
|    clip_fraction        | 0.0455       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.946        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0243       |
|    n_updates            | 8820         |
|    policy_gradient_loss | -0.000738    |
|    std                  | 0.0547       |
|    value_loss           | 0.00363      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.827        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 451          |
|    time_elapsed         | 21962        |
|    total_timesteps      | 3495250      |
| train/                  |              |
|    approx_kl            | 0.0012249033 |
|    clip_fraction        | 0.0319       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.945        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 9000         |
|    policy_gradient_loss | -0.000902    |
|    std                  | 0.0546       |
|    value_loss           | 0.00369      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=3565000, episode_reward=0.84 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.842        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.827        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 460          |
|    time_elapsed         | 22389        |
|    total_timesteps      | 3565000      |
| train/                  |              |
|    approx_kl            | 0.0018275997 |
|    clip_fraction        | 0.0444       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.943        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0272       |
|    n_updates            | 9180         |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.829        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 469          |
|    time_elapsed         | 22796        |
|    total_timesteps      | 3634750      |
| train/                  |              |
|    approx_kl            | 0.0014476134 |
|    clip_fraction        | 0.0348       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.948        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.028        |
|    n_updates            | 9360         |
|    policy_gradient_loss | -0.000814    |
|    std                  | 0.0546       |
|    value_loss           | 0.00369      |
------------------------------------------
Eval num_timesteps=3642500, episode_reward=0.84 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.833        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 478          |
|    time_elapsed         | 23224        |
|    total_timesteps      | 3704500      |
| train/                  |              |
|    approx_kl            | 0.0014385057 |
|    clip_fraction        | 0.0308       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.947        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0295       |
|    n_updates            | 9540         |
|    policy_gradient_loss | -0.000665    |
|    std                  | 0.0546       |
|    value_loss           | 0.0035       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.831        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 487          |
|    time_elapsed         | 23652        |
|    total_timesteps      | 3774250      |
| train/                  |              |
|    approx_kl            | 0.0019013945 |
|    clip_fraction        | 0.0423       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.948        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.031        |
|    n_updates            | 9720         |
|    policy_gradient_loss | -0.00207     |
|    std                  | 0.0546       |
|    value_loss           | 0.00408      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.831        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 496          |
|    time_elapsed         | 24079        |
|    total_timesteps      | 3844000      |
| train/                  |              |
|    approx_kl            | 0.0018257061 |
|    clip_fraction        | 0.0638       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.951        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0287       |
|    n_updates            | 9900         |
|    policy_gradient_loss | -0.0016      |
|    std                  | 0.0546       |
|    value_loss           | 0.00342      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.832        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 505          |
|    time_elapsed         | 24502        |
|    total_timesteps      | 3913750      |
| train/                  |              |
|    approx_kl            | 0.0017427335 |
|    clip_fraction        | 0.0393       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.948        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0251       |
|    n_updates            | 10080        |
|    policy_gradient_loss | -0.00188     |
|    std                  | 0.0546       |
|    value_loss           | 0.00356      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.83         |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 514          |
|    time_elapsed         | 24927        |
|    total_timesteps      | 3983500      |
| train/                  |              |
|    approx_kl            | 0.0018003461 |
|    clip_fraction        | 0.0393       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.948        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.031        |
|    n_updates            | 10260        |
|    policy_gradient_loss | -0.000624    |
|    std                  | 0.0546       |
|    value_loss           | 0.0036       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.832        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 523          |
|    time_elapsed         | 25353        |
|    total_timesteps      | 4053250      |
| train/                  |              |
|    approx_kl            | 0.0013315675 |
|    clip_fraction        | 0.0365       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.951        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0284       |
|    n_updates            | 10440        |
|    policy_gradient_loss | -0.00222     |
|    std                  | 0.0546       |
|    value_loss           | 0.00342      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.832        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 532          |
|    time_elapsed         | 25783        |
|    total_timesteps      | 4123000      |
| train/                  |              |
|    approx_kl            | 0.0015102522 |
|    clip_fraction        | 0.0457       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.951        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.028        |
|    n_updates            | 10620        |
|    policy_gradient_loss | -0.000685    |
|    std                  | 0.0546       |
|    value_loss           | 0.00346      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.832        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 541          |
|    time_elapsed         | 26208        |
|    total_timesteps      | 4192750      |
| train/                  |              |
|    approx_kl            | 0.0015799846 |
|    clip_fraction        | 0.0397       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.953        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0263       |
|    n_updates            | 10800        |
|    policy_gradient_loss | -0.00121     |
|    std                  | 0.0546       |
|    value_loss           | 0.00323      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=4262500, episode_reward=0.84 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.842        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.834        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 550          |
|    time_elapsed         | 26636        |
|    total_timesteps      | 4262500      |
| train/                  |              |
|    approx_kl            | 0.0013062424 |
|    clip_fraction        | 0.0307       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.953        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0288       |
|    n_updates            | 10980        |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.835        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 559          |
|    time_elapsed         | 27041        |
|    total_timesteps      | 4332250      |
| train/                  |              |
|    approx_kl            | 0.0013673641 |
|    clip_fraction        | 0.0323       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.955        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0302       |
|    n_updates            | 11160        |
|    policy_gradient_loss | -0.00166     |
|    std                  | 0.0546       |
|    value_loss           | 0.00316      |
------------------------------------------
Eval num_timesteps=4340000, episode_reward=0.84 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.832        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 568          |
|    time_elapsed         | 27464        |
|    total_timesteps      | 4402000      |
| train/                  |              |
|    approx_kl            | 0.0019034126 |
|    clip_fraction        | 0.0513       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.954        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0233       |
|    n_updates            | 11340        |
|    policy_gradient_loss | -0.00191     |
|    std                  | 0.0545       |
|    value_loss           | 0.00345      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.838        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 577          |
|    time_elapsed         | 27892        |
|    total_timesteps      | 4471750      |
| train/                  |              |
|    approx_kl            | 0.0016484152 |
|    clip_fraction        | 0.0386       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.953        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0296       |
|    n_updates            | 11520        |
|    policy_gradient_loss | -0.000994    |
|    std                  | 0.0545       |
|    value_loss           | 0.00335      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.837        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 586          |
|    time_elapsed         | 28319        |
|    total_timesteps      | 4541500      |
| train/                  |              |
|    approx_kl            | 0.0012015183 |
|    clip_fraction        | 0.0274       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.955        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0304       |
|    n_updates            | 11700        |
|    policy_gradient_loss | -0.00149     |
|    std                  | 0.0545       |
|    value_loss           | 0.0031       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.837        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 595          |
|    time_elapsed         | 28744        |
|    total_timesteps      | 4611250      |
| train/                  |              |
|    approx_kl            | 0.0013914024 |
|    clip_fraction        | 0.0355       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.956        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0308       |
|    n_updates            | 11880        |
|    policy_gradient_loss | -0.000549    |
|    std                  | 0.0545       |
|    value_loss           | 0.00316      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.836        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 604          |
|    time_elapsed         | 29170        |
|    total_timesteps      | 4681000      |
| train/                  |              |
|    approx_kl            | 0.0013173022 |
|    clip_fraction        | 0.0388       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.957        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0276       |
|    n_updates            | 12060        |
|    policy_gradient_loss | -0.000568    |
|    std                  | 0.0545       |
|    value_loss           | 0.0031       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.837        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 613          |
|    time_elapsed         | 29596        |
|    total_timesteps      | 4750750      |
| train/                  |              |
|    approx_kl            | 0.0013245286 |
|    clip_fraction        | 0.0354       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.956        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0286       |
|    n_updates            | 12240        |
|    policy_gradient_loss | -0.00198     |
|    std                  | 0.0545       |
|    value_loss           | 0.00305      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.84         |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 622          |
|    time_elapsed         | 30021        |
|    total_timesteps      | 4820500      |
| train/                  |              |
|    approx_kl            | 0.0014579389 |
|    clip_fraction        | 0.0509       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.957        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 12420        |
|    policy_gradient_loss | -0.00135     |
|    std                  | 0.0545       |
|    value_loss           | 0.00303      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.84         |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 631          |
|    time_elapsed         | 30446        |
|    total_timesteps      | 4890250      |
| train/                  |              |
|    approx_kl            | 0.0012875125 |
|    clip_fraction        | 0.0295       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.958        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0287       |
|    n_updates            | 12600        |
|    policy_gradient_loss | -0.000817    |
|    std                  | 0.0545       |
|    value_loss           | 0.00297      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=4960000, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.848        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.837        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 640          |
|    time_elapsed         | 30874        |
|    total_timesteps      | 4960000      |
| train/                  |              |
|    approx_kl            | 0.0013296498 |
|    clip_fraction        | 0.038        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.957        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0271       |
|    n_updates            | 127

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.841       |
| time/                   |             |
|    fps                  | 160         |
|    iterations           | 649         |
|    time_elapsed         | 31282       |
|    total_timesteps      | 5029750     |
| train/                  |             |
|    approx_kl            | 0.001409123 |
|    clip_fraction        | 0.027       |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.959       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0291      |
|    n_updates            | 12960       |
|    policy_gradient_loss | -0.00196    |
|    std                  | 0.0545      |
|    value_loss           | 0.00301     |
-----------------------------------------
Eval num_timesteps=5037500, episode_reward=0.85 +/- 0.00
Episode length: 5.0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.843        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 658          |
|    time_elapsed         | 31714        |
|    total_timesteps      | 5099500      |
| train/                  |              |
|    approx_kl            | 0.0013251402 |
|    clip_fraction        | 0.0321       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.959        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0291       |
|    n_updates            | 13140        |
|    policy_gradient_loss | -0.000809    |
|    std                  | 0.0545       |
|    value_loss           | 0.00298      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.839        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 667          |
|    time_elapsed         | 32139        |
|    total_timesteps      | 5169250      |
| train/                  |              |
|    approx_kl            | 0.0016414443 |
|    clip_fraction        | 0.0415       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.958        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.027        |
|    n_updates            | 13320        |
|    policy_gradient_loss | -0.000825    |
|    std                  | 0.0545       |
|    value_loss           | 0.00299      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.841        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 676          |
|    time_elapsed         | 32563        |
|    total_timesteps      | 5239000      |
| train/                  |              |
|    approx_kl            | 0.0013969648 |
|    clip_fraction        | 0.0376       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.96         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0271       |
|    n_updates            | 13500        |
|    policy_gradient_loss | -0.00119     |
|    std                  | 0.0545       |
|    value_loss           | 0.00294      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.838        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 685          |
|    time_elapsed         | 32989        |
|    total_timesteps      | 5308750      |
| train/                  |              |
|    approx_kl            | 0.0012789733 |
|    clip_fraction        | 0.0383       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.962        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0292       |
|    n_updates            | 13680        |
|    policy_gradient_loss | -0.000201    |
|    std                  | 0.0545       |
|    value_loss           | 0.00279      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.839        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 694          |
|    time_elapsed         | 33416        |
|    total_timesteps      | 5378500      |
| train/                  |              |
|    approx_kl            | 0.0014178845 |
|    clip_fraction        | 0.0434       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.96         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0306       |
|    n_updates            | 13860        |
|    policy_gradient_loss | -0.00217     |
|    std                  | 0.0544       |
|    value_loss           | 0.00278      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.838        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 703          |
|    time_elapsed         | 33840        |
|    total_timesteps      | 5448250      |
| train/                  |              |
|    approx_kl            | 0.0013884243 |
|    clip_fraction        | 0.0278       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.961        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0251       |
|    n_updates            | 14040        |
|    policy_gradient_loss | -0.000857    |
|    std                  | 0.0544       |
|    value_loss           | 0.00292      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.843        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 712          |
|    time_elapsed         | 34266        |
|    total_timesteps      | 5518000      |
| train/                  |              |
|    approx_kl            | 0.0019149126 |
|    clip_fraction        | 0.0456       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.96         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0245       |
|    n_updates            | 14220        |
|    policy_gradient_loss | -0.00125     |
|    std                  | 0.0544       |
|    value_loss           | 0.00285      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.839        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 721          |
|    time_elapsed         | 34690        |
|    total_timesteps      | 5587750      |
| train/                  |              |
|    approx_kl            | 0.0015659686 |
|    clip_fraction        | 0.0354       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.961        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 14400        |
|    policy_gradient_loss | -0.000488    |
|    std                  | 0.0544       |
|    value_loss           | 0.0029       |
------------------------------------------
-----------------------------------------
| rollout/  

Eval num_timesteps=5657500, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.85         |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 730          |
|    time_elapsed         | 35113        |
|    total_timesteps      | 5657500      |
| train/                  |              |
|    approx_kl            | 0.0012158251 |
|    clip_fraction        | 0.0307       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.961        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0285       |
|    n_updates            | 145

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.842       |
| time/                   |             |
|    fps                  | 161         |
|    iterations           | 739         |
|    time_elapsed         | 35522       |
|    total_timesteps      | 5727250     |
| train/                  |             |
|    approx_kl            | 0.001270341 |
|    clip_fraction        | 0.0239      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.964       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0292      |
|    n_updates            | 14760       |
|    policy_gradient_loss | -0.00113    |
|    std                  | 0.0544      |
|    value_loss           | 0.00264     |
-----------------------------------------
Eval num_timesteps=5735000, episode_reward=0.85 +/- 0.00
Episode length: 5.0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 748          |
|    time_elapsed         | 35947        |
|    total_timesteps      | 5797000      |
| train/                  |              |
|    approx_kl            | 0.0014465998 |
|    clip_fraction        | 0.0386       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.963        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0251       |
|    n_updates            | 14940        |
|    policy_gradient_loss | -0.00187     |
|    std                  | 0.0544       |
|    value_loss           | 0.00275      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.844       |
| time/                   |             |
|    fps                  | 161         |
|    iterations           | 757         |
|    time_elapsed         | 36374       |
|    total_timesteps      | 5866750     |
| train/                  |             |
|    approx_kl            | 0.001596051 |
|    clip_fraction        | 0.0488      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.962       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0257      |
|    n_updates            | 15120       |
|    policy_gradient_loss | -0.00118    |
|    std                  | 0.0544      |
|    value_loss           | 0.00279     |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.846       |
| time/                   |             |
|    fps                  | 161         |
|    iterations           | 766         |
|    time_elapsed         | 36801       |
|    total_timesteps      | 5936500     |
| train/                  |             |
|    approx_kl            | 0.001393406 |
|    clip_fraction        | 0.0311      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.963       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0242      |
|    n_updates            | 15300       |
|    policy_gradient_loss | -0.00124    |
|    std                  | 0.0544      |
|    value_loss           | 0.00283     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.843        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 775          |
|    time_elapsed         | 37232        |
|    total_timesteps      | 6006250      |
| train/                  |              |
|    approx_kl            | 0.0013282867 |
|    clip_fraction        | 0.0215       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.963        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0258       |
|    n_updates            | 15480        |
|    policy_gradient_loss | -0.000889    |
|    std                  | 0.0544       |
|    value_loss           | 0.00271      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.846       |
| time/                   |             |
|    fps                  | 161         |
|    iterations           | 784         |
|    time_elapsed         | 37664       |
|    total_timesteps      | 6076000     |
| train/                  |             |
|    approx_kl            | 0.002164377 |
|    clip_fraction        | 0.084       |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.965       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.021       |
|    n_updates            | 15660       |
|    policy_gradient_loss | -0.00278    |
|    std                  | 0.0544      |
|    value_loss           | 0.00269     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 793          |
|    time_elapsed         | 38096        |
|    total_timesteps      | 6145750      |
| train/                  |              |
|    approx_kl            | 0.0016578417 |
|    clip_fraction        | 0.0424       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.965        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0267       |
|    n_updates            | 15840        |
|    policy_gradient_loss | -0.000838    |
|    std                  | 0.0544       |
|    value_loss           | 0.00252      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.843        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 802          |
|    time_elapsed         | 38525        |
|    total_timesteps      | 6215500      |
| train/                  |              |
|    approx_kl            | 0.0012234496 |
|    clip_fraction        | 0.0258       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.965        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0314       |
|    n_updates            | 16020        |
|    policy_gradient_loss | -0.00148     |
|    std                  | 0.0544       |
|    value_loss           | 0.0027       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 811          |
|    time_elapsed         | 38954        |
|    total_timesteps      | 6285250      |
| train/                  |              |
|    approx_kl            | 0.0014479512 |
|    clip_fraction        | 0.0371       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.966        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0262       |
|    n_updates            | 16200        |
|    policy_gradient_loss | -0.00104     |
|    std                  | 0.0544       |
|    value_loss           | 0.00256      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=6355000, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.853        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 820          |
|    time_elapsed         | 39383        |
|    total_timesteps      | 6355000      |
| train/                  |              |
|    approx_kl            | 0.0010274365 |
|    clip_fraction        | 0.0204       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0309       |
|    n_updates            | 163

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.846        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 829          |
|    time_elapsed         | 39790        |
|    total_timesteps      | 6424750      |
| train/                  |              |
|    approx_kl            | 0.0014960375 |
|    clip_fraction        | 0.0405       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.024        |
|    n_updates            | 16560        |
|    policy_gradient_loss | -0.000126    |
|    std                  | 0.0544       |
|    value_loss           | 0.00221      |
------------------------------------------
Eval num_timesteps=6432500, episode_reward=0.85 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.846        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 838          |
|    time_elapsed         | 40223        |
|    total_timesteps      | 6494500      |
| train/                  |              |
|    approx_kl            | 0.0016259378 |
|    clip_fraction        | 0.0398       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0289       |
|    n_updates            | 16740        |
|    policy_gradient_loss | -0.000655    |
|    std                  | 0.0543       |
|    value_loss           | 0.0024       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 847          |
|    time_elapsed         | 40655        |
|    total_timesteps      | 6564250      |
| train/                  |              |
|    approx_kl            | 0.0016141279 |
|    clip_fraction        | 0.0339       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0232       |
|    n_updates            | 16920        |
|    policy_gradient_loss | -0.00143     |
|    std                  | 0.0543       |
|    value_loss           | 0.00233      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 856          |
|    time_elapsed         | 41088        |
|    total_timesteps      | 6634000      |
| train/                  |              |
|    approx_kl            | 0.0012806851 |
|    clip_fraction        | 0.0296       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0288       |
|    n_updates            | 17100        |
|    policy_gradient_loss | -0.00139     |
|    std                  | 0.0543       |
|    value_loss           | 0.00239      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 865          |
|    time_elapsed         | 41520        |
|    total_timesteps      | 6703750      |
| train/                  |              |
|    approx_kl            | 0.0015118743 |
|    clip_fraction        | 0.0426       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0274       |
|    n_updates            | 17280        |
|    policy_gradient_loss | -0.00157     |
|    std                  | 0.0543       |
|    value_loss           | 0.00238      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.846        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 874          |
|    time_elapsed         | 41950        |
|    total_timesteps      | 6773500      |
| train/                  |              |
|    approx_kl            | 0.0014239461 |
|    clip_fraction        | 0.0416       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.97         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0269       |
|    n_updates            | 17460        |
|    policy_gradient_loss | -0.000845    |
|    std                  | 0.0543       |
|    value_loss           | 0.0023       |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.846        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 883          |
|    time_elapsed         | 42383        |
|    total_timesteps      | 6843250      |
| train/                  |              |
|    approx_kl            | 0.0015625523 |
|    clip_fraction        | 0.0471       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0252       |
|    n_updates            | 17640        |
|    policy_gradient_loss | -0.00178     |
|    std                  | 0.0543       |
|    value_loss           | 0.00229      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 892          |
|    time_elapsed         | 42817        |
|    total_timesteps      | 6913000      |
| train/                  |              |
|    approx_kl            | 0.0015075097 |
|    clip_fraction        | 0.0353       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0219       |
|    n_updates            | 17820        |
|    policy_gradient_loss | -0.000442    |
|    std                  | 0.0543       |
|    value_loss           | 0.00226      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 901          |
|    time_elapsed         | 43251        |
|    total_timesteps      | 6982750      |
| train/                  |              |
|    approx_kl            | 0.0017526157 |
|    clip_fraction        | 0.0473       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.971        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0282       |
|    n_updates            | 18000        |
|    policy_gradient_loss | -0.00133     |
|    std                  | 0.0543       |
|    value_loss           | 0.00225      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=7052500, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.855        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.85         |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 910          |
|    time_elapsed         | 43681        |
|    total_timesteps      | 7052500      |
| train/                  |              |
|    approx_kl            | 0.0014379211 |
|    clip_fraction        | 0.0298       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0292       |
|    n_updates            | 181

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.848       |
| time/                   |             |
|    fps                  | 161         |
|    iterations           | 919         |
|    time_elapsed         | 44094       |
|    total_timesteps      | 7122250     |
| train/                  |             |
|    approx_kl            | 0.001378233 |
|    clip_fraction        | 0.0338      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.97        |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0312      |
|    n_updates            | 18360       |
|    policy_gradient_loss | -0.00248    |
|    std                  | 0.0543      |
|    value_loss           | 0.00223     |
-----------------------------------------
Eval num_timesteps=7130000, episode_reward=0.86 +/- 0.00
Episode length: 5.0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 928          |
|    time_elapsed         | 44525        |
|    total_timesteps      | 7192000      |
| train/                  |              |
|    approx_kl            | 0.0015270128 |
|    clip_fraction        | 0.0501       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.97         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0267       |
|    n_updates            | 18540        |
|    policy_gradient_loss | -0.000855    |
|    std                  | 0.0543       |
|    value_loss           | 0.00214      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.848       |
| time/                   |             |
|    fps                  | 161         |
|    iterations           | 937         |
|    time_elapsed         | 44959       |
|    total_timesteps      | 7261750     |
| train/                  |             |
|    approx_kl            | 0.001539476 |
|    clip_fraction        | 0.0393      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.97        |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0212      |
|    n_updates            | 18720       |
|    policy_gradient_loss | -0.000184   |
|    std                  | 0.0543      |
|    value_loss           | 0.00226     |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 946          |
|    time_elapsed         | 45388        |
|    total_timesteps      | 7331500      |
| train/                  |              |
|    approx_kl            | 0.0015341641 |
|    clip_fraction        | 0.0354       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.971        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0281       |
|    n_updates            | 18900        |
|    policy_gradient_loss | -0.000149    |
|    std                  | 0.0543       |
|    value_loss           | 0.00212      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.849        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 955          |
|    time_elapsed         | 45821        |
|    total_timesteps      | 7401250      |
| train/                  |              |
|    approx_kl            | 0.0014546749 |
|    clip_fraction        | 0.029        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.971        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0254       |
|    n_updates            | 19080        |
|    policy_gradient_loss | -0.00179     |
|    std                  | 0.0543       |
|    value_loss           | 0.00229      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.85         |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 964          |
|    time_elapsed         | 46254        |
|    total_timesteps      | 7471000      |
| train/                  |              |
|    approx_kl            | 0.0019163527 |
|    clip_fraction        | 0.0586       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.97         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0219       |
|    n_updates            | 19260        |
|    policy_gradient_loss | -0.00192     |
|    std                  | 0.0543       |
|    value_loss           | 0.00215      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 973          |
|    time_elapsed         | 46686        |
|    total_timesteps      | 7540750      |
| train/                  |              |
|    approx_kl            | 0.0016223672 |
|    clip_fraction        | 0.0442       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.97         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0221       |
|    n_updates            | 19440        |
|    policy_gradient_loss | -0.000592    |
|    std                  | 0.0543       |
|    value_loss           | 0.00245      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.846        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 982          |
|    time_elapsed         | 47117        |
|    total_timesteps      | 7610500      |
| train/                  |              |
|    approx_kl            | 0.0013161968 |
|    clip_fraction        | 0.0351       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.971        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0234       |
|    n_updates            | 19620        |
|    policy_gradient_loss | -0.00167     |
|    std                  | 0.0543       |
|    value_loss           | 0.00222      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.849        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 991          |
|    time_elapsed         | 47542        |
|    total_timesteps      | 7680250      |
| train/                  |              |
|    approx_kl            | 0.0015652209 |
|    clip_fraction        | 0.05         |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.971        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0244       |
|    n_updates            | 19800        |
|    policy_gradient_loss | -0.000105    |
|    std                  | 0.0543       |
|    value_loss           | 0.0021       |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=7750000, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.854        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1000         |
|    time_elapsed         | 47971        |
|    total_timesteps      | 7750000      |
| train/                  |              |
|    approx_kl            | 0.0018108918 |
|    clip_fraction        | 0.047        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.97         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0241       |
|    n_updates            | 19980        |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.85         |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1009         |
|    time_elapsed         | 48382        |
|    total_timesteps      | 7819750      |
| train/                  |              |
|    approx_kl            | 0.0017283973 |
|    clip_fraction        | 0.0589       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0266       |
|    n_updates            | 20160        |
|    policy_gradient_loss | -0.00221     |
|    std                  | 0.0542       |
|    value_loss           | 0.00238      |
------------------------------------------
Eval num_timesteps=7827500, episode_reward=0.85 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.846        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1018         |
|    time_elapsed         | 48811        |
|    total_timesteps      | 7889500      |
| train/                  |              |
|    approx_kl            | 0.0018879134 |
|    clip_fraction        | 0.0546       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0251       |
|    n_updates            | 20340        |
|    policy_gradient_loss | -0.000435    |
|    std                  | 0.0542       |
|    value_loss           | 0.00244      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.849        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1027         |
|    time_elapsed         | 49239        |
|    total_timesteps      | 7959250      |
| train/                  |              |
|    approx_kl            | 0.0018019417 |
|    clip_fraction        | 0.0549       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.97         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0263       |
|    n_updates            | 20520        |
|    policy_gradient_loss | -0.00149     |
|    std                  | 0.0542       |
|    value_loss           | 0.00234      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1036         |
|    time_elapsed         | 49664        |
|    total_timesteps      | 8029000      |
| train/                  |              |
|    approx_kl            | 0.0019817166 |
|    clip_fraction        | 0.0521       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.971        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.017        |
|    n_updates            | 20700        |
|    policy_gradient_loss | -2e-05       |
|    std                  | 0.0542       |
|    value_loss           | 0.00223      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1045         |
|    time_elapsed         | 50092        |
|    total_timesteps      | 8098750      |
| train/                  |              |
|    approx_kl            | 0.0015977982 |
|    clip_fraction        | 0.0532       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0241       |
|    n_updates            | 20880        |
|    policy_gradient_loss | -0.00118     |
|    std                  | 0.0542       |
|    value_loss           | 0.00236      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1054         |
|    time_elapsed         | 50520        |
|    total_timesteps      | 8168500      |
| train/                  |              |
|    approx_kl            | 0.0012652602 |
|    clip_fraction        | 0.0331       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.97         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0272       |
|    n_updates            | 21060        |
|    policy_gradient_loss | -0.00107     |
|    std                  | 0.0542       |
|    value_loss           | 0.00232      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1063         |
|    time_elapsed         | 50948        |
|    total_timesteps      | 8238250      |
| train/                  |              |
|    approx_kl            | 0.0014882025 |
|    clip_fraction        | 0.0262       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0305       |
|    n_updates            | 21240        |
|    policy_gradient_loss | -0.00179     |
|    std                  | 0.0542       |
|    value_loss           | 0.00227      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.85        |
| time/                   |             |
|    fps                  | 161         |
|    iterations           | 1072        |
|    time_elapsed         | 51377       |
|    total_timesteps      | 8308000     |
| train/                  |             |
|    approx_kl            | 0.001745474 |
|    clip_fraction        | 0.0618      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.97        |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0237      |
|    n_updates            | 21420       |
|    policy_gradient_loss | -0.00126    |
|    std                  | 0.0542      |
|    value_loss           | 0.00207     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.85         |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1081         |
|    time_elapsed         | 51807        |
|    total_timesteps      | 8377750      |
| train/                  |              |
|    approx_kl            | 0.0013699764 |
|    clip_fraction        | 0.0339       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0282       |
|    n_updates            | 21600        |
|    policy_gradient_loss | -0.00279     |
|    std                  | 0.0542       |
|    value_loss           | 0.00224      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=8447500, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.853        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.852        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1090         |
|    time_elapsed         | 52235        |
|    total_timesteps      | 8447500      |
| train/                  |              |
|    approx_kl            | 0.0013802461 |
|    clip_fraction        | 0.024        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0244       |
|    n_updates            | 21780        |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1099         |
|    time_elapsed         | 52642        |
|    total_timesteps      | 8517250      |
| train/                  |              |
|    approx_kl            | 0.0016867863 |
|    clip_fraction        | 0.0336       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.965        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0262       |
|    n_updates            | 21960        |
|    policy_gradient_loss | -0.00167     |
|    std                  | 0.0542       |
|    value_loss           | 0.00257      |
------------------------------------------
Eval num_timesteps=8525000, episode_reward=0.85 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.841        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1108         |
|    time_elapsed         | 53068        |
|    total_timesteps      | 8587000      |
| train/                  |              |
|    approx_kl            | 0.0018253839 |
|    clip_fraction        | 0.0602       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.967        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.022        |
|    n_updates            | 22140        |
|    policy_gradient_loss | -0.0015      |
|    std                  | 0.0542       |
|    value_loss           | 0.00252      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1117         |
|    time_elapsed         | 53499        |
|    total_timesteps      | 8656750      |
| train/                  |              |
|    approx_kl            | 0.0015154167 |
|    clip_fraction        | 0.0333       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.965        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0266       |
|    n_updates            | 22320        |
|    policy_gradient_loss | -0.000751    |
|    std                  | 0.0542       |
|    value_loss           | 0.00292      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.846        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1126         |
|    time_elapsed         | 53928        |
|    total_timesteps      | 8726500      |
| train/                  |              |
|    approx_kl            | 0.0018722236 |
|    clip_fraction        | 0.0463       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.018        |
|    n_updates            | 22500        |
|    policy_gradient_loss | -0.000739    |
|    std                  | 0.0542       |
|    value_loss           | 0.00236      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.845        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1135         |
|    time_elapsed         | 54358        |
|    total_timesteps      | 8796250      |
| train/                  |              |
|    approx_kl            | 0.0016631257 |
|    clip_fraction        | 0.038        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.967        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0207       |
|    n_updates            | 22680        |
|    policy_gradient_loss | -0.000344    |
|    std                  | 0.0542       |
|    value_loss           | 0.00233      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1144         |
|    time_elapsed         | 54785        |
|    total_timesteps      | 8866000      |
| train/                  |              |
|    approx_kl            | 0.0019791108 |
|    clip_fraction        | 0.0665       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.958        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0344       |
|    n_updates            | 22860        |
|    policy_gradient_loss | -0.00147     |
|    std                  | 0.0542       |
|    value_loss           | 0.00284      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.843        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1153         |
|    time_elapsed         | 55216        |
|    total_timesteps      | 8935750      |
| train/                  |              |
|    approx_kl            | 0.0017641922 |
|    clip_fraction        | 0.0564       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.96         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0239       |
|    n_updates            | 23040        |
|    policy_gradient_loss | 0.00114      |
|    std                  | 0.0542       |
|    value_loss           | 0.00309      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1162         |
|    time_elapsed         | 55644        |
|    total_timesteps      | 9005500      |
| train/                  |              |
|    approx_kl            | 0.0019966944 |
|    clip_fraction        | 0.0495       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.961        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0202       |
|    n_updates            | 23220        |
|    policy_gradient_loss | -0.000712    |
|    std                  | 0.0542       |
|    value_loss           | 0.00281      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.846        |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1171         |
|    time_elapsed         | 56074        |
|    total_timesteps      | 9075250      |
| train/                  |              |
|    approx_kl            | 0.0016222673 |
|    clip_fraction        | 0.0294       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.96         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0295       |
|    n_updates            | 23400        |
|    policy_gradient_loss | -0.000925    |
|    std                  | 0.0542       |
|    value_loss           | 0.00296      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=9145000, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.853        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.84         |
| time/                   |              |
|    fps                  | 161          |
|    iterations           | 1180         |
|    time_elapsed         | 56505        |
|    total_timesteps      | 9145000      |
| train/                  |              |
|    approx_kl            | 0.0023315852 |
|    clip_fraction        | 0.0645       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.965        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0145       |
|    n_updates            | 23580        |
|    polic

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.842       |
| time/                   |             |
|    fps                  | 161         |
|    iterations           | 1189        |
|    time_elapsed         | 56911       |
|    total_timesteps      | 9214750     |
| train/                  |             |
|    approx_kl            | 0.001740173 |
|    clip_fraction        | 0.0495      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.958       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0252      |
|    n_updates            | 23760       |
|    policy_gradient_loss | -0.00111    |
|    std                  | 0.0541      |
|    value_loss           | 0.00283     |
-----------------------------------------
Eval num_timesteps=9222500, episode_reward=0.85 +/- 0.00
Episode length: 5.0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.843       |
| time/                   |             |
|    fps                  | 161         |
|    iterations           | 1198        |
|    time_elapsed         | 57342       |
|    total_timesteps      | 9284500     |
| train/                  |             |
|    approx_kl            | 0.002268348 |
|    clip_fraction        | 0.0912      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.957       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0171      |
|    n_updates            | 23940       |
|    policy_gradient_loss | -0.0015     |
|    std                  | 0.0541      |
|    value_loss           | 0.00283     |
-----------------------------------------
------------------------------------------
| rollout/                |      

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

seed 3
generate callback ...
vectorize environment ...
vectorize env level 1
vectorize env level 2
Box(-1.0, 1.0, (35,), float64)
model definition ..
Using cuda device
Logging to ./data/ppo_2l/seed_3
policy learning ..




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5        |
|    ep_rew_mean     | 0.677    |
| time/              |          |
|    fps             | 120      |
|    iterations      | 1        |
|    time_elapsed    | 64       |
|    total_timesteps | 7750     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.681        |
| time/                   |              |
|    fps                  | 137          |
|    iterations           | 2            |
|    time_elapsed         | 112          |
|    total_timesteps      | 15500        |
| train/                  |              |
|    approx_kl            | 0.0012313263 |
|    clip_fraction        | 0.0153       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -3.26        |
|    learning_r

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.676        |
| time/                   |              |
|    fps                  | 152          |
|    iterations           | 11           |
|    time_elapsed         | 560          |
|    total_timesteps      | 85250        |
| train/                  |              |
|    approx_kl            | 0.0010190327 |
|    clip_fraction        | 0.00997      |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -1.57        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.105        |
|    n_updates            | 200          |
|    policy_gradient_loss | -0.00197     |
|    std                  | 0.055        |
|    value_loss           | 0.17         |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=155000, episode_reward=0.68 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.68         |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.681        |
| time/                   |              |
|    fps                  | 153          |
|    iterations           | 20           |
|    time_elapsed         | 1008         |
|    total_timesteps      | 155000       |
| train/                  |              |
|    approx_kl            | 0.0009251061 |
|    clip_fraction        | 0.00814      |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -1.21        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0809       |
|    n_updates            | 380          |
|    policy

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.682        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 29           |
|    time_elapsed         | 1431         |
|    total_timesteps      | 224750       |
| train/                  |              |
|    approx_kl            | 0.0012336738 |
|    clip_fraction        | 0.0191       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -0.844       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0628       |
|    n_updates            | 560          |
|    policy_gradient_loss | -0.00141     |
|    std                  | 0.055        |
|    value_loss           | 0.0891       |
------------------------------------------
Eval num_timesteps=232500, episode_reward=0.68 +/- 0.0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.687        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 38           |
|    time_elapsed         | 1870         |
|    total_timesteps      | 294500       |
| train/                  |              |
|    approx_kl            | 0.0011978496 |
|    clip_fraction        | 0.0118       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -0.491       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0587       |
|    n_updates            | 740          |
|    policy_gradient_loss | -0.00152     |
|    std                  | 0.055        |
|    value_loss           | 0.0662       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.683        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 47           |
|    time_elapsed         | 2308         |
|    total_timesteps      | 364250       |
| train/                  |              |
|    approx_kl            | 0.0010686473 |
|    clip_fraction        | 0.00612      |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | -0.186       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0482       |
|    n_updates            | 920          |
|    policy_gradient_loss | -0.001       |
|    std                  | 0.055        |
|    value_loss           | 0.0537       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.688        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 56           |
|    time_elapsed         | 2746         |
|    total_timesteps      | 434000       |
| train/                  |              |
|    approx_kl            | 0.0013652665 |
|    clip_fraction        | 0.0126       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.000865     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0447       |
|    n_updates            | 1100         |
|    policy_gradient_loss | -0.00111     |
|    std                  | 0.055        |
|    value_loss           | 0.0453       |
------------------------------------------
-----------------------------------------
| rollout/  

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 5             |
|    ep_rew_mean          | 0.69          |
| time/                   |               |
|    fps                  | 158           |
|    iterations           | 65            |
|    time_elapsed         | 3186          |
|    total_timesteps      | 503750        |
| train/                  |               |
|    approx_kl            | 0.00081291364 |
|    clip_fraction        | 0.00386       |
|    clip_range           | 0.1           |
|    entropy_loss         | -31.1         |
|    explained_variance   | 0.169         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.045         |
|    n_updates            | 1280          |
|    policy_gradient_loss | -0.000866     |
|    std                  | 0.055         |
|    value_loss           | 0.0371        |
-------------------------------------------
--------------------------------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.688        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 74           |
|    time_elapsed         | 3630         |
|    total_timesteps      | 573500       |
| train/                  |              |
|    approx_kl            | 0.0014095097 |
|    clip_fraction        | 0.0166       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.318        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.037        |
|    n_updates            | 1460         |
|    policy_gradient_loss | -0.0025      |
|    std                  | 0.055        |
|    value_loss           | 0.0324       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.687        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 83           |
|    time_elapsed         | 4072         |
|    total_timesteps      | 643250       |
| train/                  |              |
|    approx_kl            | 0.0011087803 |
|    clip_fraction        | 0.014        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.395        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0356       |
|    n_updates            | 1640         |
|    policy_gradient_loss | -0.00189     |
|    std                  | 0.055        |
|    value_loss           | 0.0265       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.699        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 92           |
|    time_elapsed         | 4514         |
|    total_timesteps      | 713000       |
| train/                  |              |
|    approx_kl            | 0.0012233378 |
|    clip_fraction        | 0.0135       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.473        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0362       |
|    n_updates            | 1820         |
|    policy_gradient_loss | -0.00113     |
|    std                  | 0.055        |
|    value_loss           | 0.0243       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.695        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 101          |
|    time_elapsed         | 4956         |
|    total_timesteps      | 782750       |
| train/                  |              |
|    approx_kl            | 0.0012875898 |
|    clip_fraction        | 0.00874      |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.543        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0361       |
|    n_updates            | 2000         |
|    policy_gradient_loss | -0.00054     |
|    std                  | 0.055        |
|    value_loss           | 0.0216       |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=852500, episode_reward=0.72 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.724        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.695        |
| time/                   |              |
|    fps                  | 157          |
|    iterations           | 110          |
|    time_elapsed         | 5395         |
|    total_timesteps      | 852500       |
| train/                  |              |
|    approx_kl            | 0.0011681321 |
|    clip_fraction        | 0.00659      |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.6          |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0319       |
|    n_updates            | 2180

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.705        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 119          |
|    time_elapsed         | 5813         |
|    total_timesteps      | 922250       |
| train/                  |              |
|    approx_kl            | 0.0011912764 |
|    clip_fraction        | 0.0112       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.637        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.033        |
|    n_updates            | 2360         |
|    policy_gradient_loss | -0.00107     |
|    std                  | 0.055        |
|    value_loss           | 0.0175       |
------------------------------------------
Eval num_timesteps=930000, episode_reward=0.74 +/- 0.0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.7         |
| time/                   |             |
|    fps                  | 158         |
|    iterations           | 128         |
|    time_elapsed         | 6253        |
|    total_timesteps      | 992000      |
| train/                  |             |
|    approx_kl            | 0.001712068 |
|    clip_fraction        | 0.0172      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.1       |
|    explained_variance   | 0.662       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0302      |
|    n_updates            | 2540        |
|    policy_gradient_loss | -0.00233    |
|    std                  | 0.055       |
|    value_loss           | 0.016       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.712        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 137          |
|    time_elapsed         | 6691         |
|    total_timesteps      | 1061750      |
| train/                  |              |
|    approx_kl            | 0.0016711487 |
|    clip_fraction        | 0.0231       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.698        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0328       |
|    n_updates            | 2720         |
|    policy_gradient_loss | -0.00248     |
|    std                  | 0.055        |
|    value_loss           | 0.0144       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.716        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 146          |
|    time_elapsed         | 7130         |
|    total_timesteps      | 1131500      |
| train/                  |              |
|    approx_kl            | 0.0015190025 |
|    clip_fraction        | 0.0161       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.744        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0334       |
|    n_updates            | 2900         |
|    policy_gradient_loss | -0.00178     |
|    std                  | 0.055        |
|    value_loss           | 0.0123       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.719        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 155          |
|    time_elapsed         | 7569         |
|    total_timesteps      | 1201250      |
| train/                  |              |
|    approx_kl            | 0.0015873639 |
|    clip_fraction        | 0.0377       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.765        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0309       |
|    n_updates            | 3080         |
|    policy_gradient_loss | -0.00223     |
|    std                  | 0.055        |
|    value_loss           | 0.0117       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.722        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 164          |
|    time_elapsed         | 8009         |
|    total_timesteps      | 1271000      |
| train/                  |              |
|    approx_kl            | 0.0015123282 |
|    clip_fraction        | 0.0214       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.797        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0269       |
|    n_updates            | 3260         |
|    policy_gradient_loss | -0.00244     |
|    std                  | 0.055        |
|    value_loss           | 0.011        |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.731        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 173          |
|    time_elapsed         | 8456         |
|    total_timesteps      | 1340750      |
| train/                  |              |
|    approx_kl            | 0.0017758539 |
|    clip_fraction        | 0.0415       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.811        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0284       |
|    n_updates            | 3440         |
|    policy_gradient_loss | -0.00251     |
|    std                  | 0.055        |
|    value_loss           | 0.00942      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.736        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 182          |
|    time_elapsed         | 8899         |
|    total_timesteps      | 1410500      |
| train/                  |              |
|    approx_kl            | 0.0016007527 |
|    clip_fraction        | 0.0261       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.837        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0284       |
|    n_updates            | 3620         |
|    policy_gradient_loss | -0.00266     |
|    std                  | 0.055        |
|    value_loss           | 0.0085       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.749        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 191          |
|    time_elapsed         | 9340         |
|    total_timesteps      | 1480250      |
| train/                  |              |
|    approx_kl            | 0.0018528411 |
|    clip_fraction        | 0.0407       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.849        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0267       |
|    n_updates            | 3800         |
|    policy_gradient_loss | -0.0013      |
|    std                  | 0.055        |
|    value_loss           | 0.0079       |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=1550000, episode_reward=0.80 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.803        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.754        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 200          |
|    time_elapsed         | 9782         |
|    total_timesteps      | 1550000      |
| train/                  |              |
|    approx_kl            | 0.0016477883 |
|    clip_fraction        | 0.0406       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.868        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0316       |
|    n_updates            | 398

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.764        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 209          |
|    time_elapsed         | 10203        |
|    total_timesteps      | 1619750      |
| train/                  |              |
|    approx_kl            | 0.0013966757 |
|    clip_fraction        | 0.0479       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.88         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0289       |
|    n_updates            | 4160         |
|    policy_gradient_loss | -0.00291     |
|    std                  | 0.055        |
|    value_loss           | 0.00682      |
------------------------------------------
Eval num_timesteps=1627500, episode_reward=0.81 +/- 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.77        |
| time/                   |             |
|    fps                  | 158         |
|    iterations           | 218         |
|    time_elapsed         | 10644       |
|    total_timesteps      | 1689500     |
| train/                  |             |
|    approx_kl            | 0.001828003 |
|    clip_fraction        | 0.0397      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.1       |
|    explained_variance   | 0.896       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0266      |
|    n_updates            | 4340        |
|    policy_gradient_loss | -0.00183    |
|    std                  | 0.0549      |
|    value_loss           | 0.00614     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.771        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 227          |
|    time_elapsed         | 11083        |
|    total_timesteps      | 1759250      |
| train/                  |              |
|    approx_kl            | 0.0019379307 |
|    clip_fraction        | 0.0484       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.904        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0248       |
|    n_updates            | 4520         |
|    policy_gradient_loss | -0.00228     |
|    std                  | 0.0549       |
|    value_loss           | 0.00569      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.782        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 236          |
|    time_elapsed         | 11520        |
|    total_timesteps      | 1829000      |
| train/                  |              |
|    approx_kl            | 0.0019833972 |
|    clip_fraction        | 0.0534       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.912        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0208       |
|    n_updates            | 4700         |
|    policy_gradient_loss | -0.00156     |
|    std                  | 0.0549       |
|    value_loss           | 0.00532      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.787        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 245          |
|    time_elapsed         | 11959        |
|    total_timesteps      | 1898750      |
| train/                  |              |
|    approx_kl            | 0.0013993187 |
|    clip_fraction        | 0.0262       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.1        |
|    explained_variance   | 0.918        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0272       |
|    n_updates            | 4880         |
|    policy_gradient_loss | -0.00148     |
|    std                  | 0.0549       |
|    value_loss           | 0.00514      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.792        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 254          |
|    time_elapsed         | 12400        |
|    total_timesteps      | 1968500      |
| train/                  |              |
|    approx_kl            | 0.0017778617 |
|    clip_fraction        | 0.0485       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.923        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0289       |
|    n_updates            | 5060         |
|    policy_gradient_loss | -0.00285     |
|    std                  | 0.0549       |
|    value_loss           | 0.00471      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.799        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 263          |
|    time_elapsed         | 12840        |
|    total_timesteps      | 2038250      |
| train/                  |              |
|    approx_kl            | 0.0016130318 |
|    clip_fraction        | 0.0482       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.93         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0286       |
|    n_updates            | 5240         |
|    policy_gradient_loss | -0.00378     |
|    std                  | 0.0549       |
|    value_loss           | 0.00429      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.803        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 272          |
|    time_elapsed         | 13281        |
|    total_timesteps      | 2108000      |
| train/                  |              |
|    approx_kl            | 0.0016015277 |
|    clip_fraction        | 0.0529       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.932        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0265       |
|    n_updates            | 5420         |
|    policy_gradient_loss | -0.00407     |
|    std                  | 0.0549       |
|    value_loss           | 0.00431      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.808        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 281          |
|    time_elapsed         | 13720        |
|    total_timesteps      | 2177750      |
| train/                  |              |
|    approx_kl            | 0.0016491534 |
|    clip_fraction        | 0.033        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.939        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0287       |
|    n_updates            | 5600         |
|    policy_gradient_loss | -0.000954    |
|    std                  | 0.0549       |
|    value_loss           | 0.00413      |
------------------------------------------
-----------------------------------------
| rollout/  

Eval num_timesteps=2247500, episode_reward=0.84 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.841        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.809        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 290          |
|    time_elapsed         | 14158        |
|    total_timesteps      | 2247500      |
| train/                  |              |
|    approx_kl            | 0.0018486569 |
|    clip_fraction        | 0.0519       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.942        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0256       |
|    n_updates            | 578

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.813        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 299          |
|    time_elapsed         | 14581        |
|    total_timesteps      | 2317250      |
| train/                  |              |
|    approx_kl            | 0.0015803863 |
|    clip_fraction        | 0.0539       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.942        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.024        |
|    n_updates            | 5960         |
|    policy_gradient_loss | -0.00246     |
|    std                  | 0.0548       |
|    value_loss           | 0.00391      |
------------------------------------------
Eval num_timesteps=2325000, episode_reward=0.84 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.819        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 308          |
|    time_elapsed         | 15023        |
|    total_timesteps      | 2387000      |
| train/                  |              |
|    approx_kl            | 0.0015743785 |
|    clip_fraction        | 0.0326       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.944        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0325       |
|    n_updates            | 6140         |
|    policy_gradient_loss | -0.000929    |
|    std                  | 0.0548       |
|    value_loss           | 0.0038       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.819        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 317          |
|    time_elapsed         | 15462        |
|    total_timesteps      | 2456750      |
| train/                  |              |
|    approx_kl            | 0.0018215266 |
|    clip_fraction        | 0.0413       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.946        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0259       |
|    n_updates            | 6320         |
|    policy_gradient_loss | -0.000967    |
|    std                  | 0.0548       |
|    value_loss           | 0.00365      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.821        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 326          |
|    time_elapsed         | 15904        |
|    total_timesteps      | 2526500      |
| train/                  |              |
|    approx_kl            | 0.0014541829 |
|    clip_fraction        | 0.0301       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.949        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0316       |
|    n_updates            | 6500         |
|    policy_gradient_loss | -0.0012      |
|    std                  | 0.0548       |
|    value_loss           | 0.0033       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.823        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 335          |
|    time_elapsed         | 16346        |
|    total_timesteps      | 2596250      |
| train/                  |              |
|    approx_kl            | 0.0018178681 |
|    clip_fraction        | 0.0511       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.948        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.02         |
|    n_updates            | 6680         |
|    policy_gradient_loss | -0.00246     |
|    std                  | 0.0548       |
|    value_loss           | 0.00362      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.825        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 344          |
|    time_elapsed         | 16786        |
|    total_timesteps      | 2666000      |
| train/                  |              |
|    approx_kl            | 0.0016201864 |
|    clip_fraction        | 0.0464       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.953        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0299       |
|    n_updates            | 6860         |
|    policy_gradient_loss | -0.0008      |
|    std                  | 0.0548       |
|    value_loss           | 0.00339      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.832        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 353          |
|    time_elapsed         | 17219        |
|    total_timesteps      | 2735750      |
| train/                  |              |
|    approx_kl            | 0.0018139062 |
|    clip_fraction        | 0.0418       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.952        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0234       |
|    n_updates            | 7040         |
|    policy_gradient_loss | -0.00261     |
|    std                  | 0.0548       |
|    value_loss           | 0.00361      |
------------------------------------------
----------------------------------------
| rollout/   

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.833        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 362          |
|    time_elapsed         | 17654        |
|    total_timesteps      | 2805500      |
| train/                  |              |
|    approx_kl            | 0.0014951933 |
|    clip_fraction        | 0.0398       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.955        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0307       |
|    n_updates            | 7220         |
|    policy_gradient_loss | -0.000906    |
|    std                  | 0.0547       |
|    value_loss           | 0.00311      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.837        |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 371          |
|    time_elapsed         | 18088        |
|    total_timesteps      | 2875250      |
| train/                  |              |
|    approx_kl            | 0.0014955959 |
|    clip_fraction        | 0.0305       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.959        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 7400         |
|    policy_gradient_loss | -0.000754    |
|    std                  | 0.0547       |
|    value_loss           | 0.00282      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=2945000, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.854        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.837        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 380          |
|    time_elapsed         | 18521        |
|    total_timesteps      | 2945000      |
| train/                  |              |
|    approx_kl            | 0.0015068704 |
|    clip_fraction        | 0.0482       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.957        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.027        |
|    n_updates            | 758

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.839      |
| time/                   |            |
|    fps                  | 159        |
|    iterations           | 389        |
|    time_elapsed         | 18933      |
|    total_timesteps      | 3014750    |
| train/                  |            |
|    approx_kl            | 0.00158485 |
|    clip_fraction        | 0.0378     |
|    clip_range           | 0.1        |
|    entropy_loss         | -31.2      |
|    explained_variance   | 0.96       |
|    learning_rate        | 1e-06      |
|    loss                 | 0.0287     |
|    n_updates            | 7760       |
|    policy_gradient_loss | -0.00135   |
|    std                  | 0.0547     |
|    value_loss           | 0.00305    |
----------------------------------------
Eval num_timesteps=3022500, episode_reward=0.85 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.836        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 398          |
|    time_elapsed         | 19370        |
|    total_timesteps      | 3084500      |
| train/                  |              |
|    approx_kl            | 0.0013655575 |
|    clip_fraction        | 0.0405       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.962        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0263       |
|    n_updates            | 7940         |
|    policy_gradient_loss | -0.00181     |
|    std                  | 0.0547       |
|    value_loss           | 0.00283      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.841        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 407          |
|    time_elapsed         | 19808        |
|    total_timesteps      | 3154250      |
| train/                  |              |
|    approx_kl            | 0.0014650014 |
|    clip_fraction        | 0.0405       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.962        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0266       |
|    n_updates            | 8120         |
|    policy_gradient_loss | -0.00163     |
|    std                  | 0.0547       |
|    value_loss           | 0.00284      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.842        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 416          |
|    time_elapsed         | 20243        |
|    total_timesteps      | 3224000      |
| train/                  |              |
|    approx_kl            | 0.0015219941 |
|    clip_fraction        | 0.0389       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.962        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0302       |
|    n_updates            | 8300         |
|    policy_gradient_loss | -0.00157     |
|    std                  | 0.0547       |
|    value_loss           | 0.00275      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.843        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 425          |
|    time_elapsed         | 20680        |
|    total_timesteps      | 3293750      |
| train/                  |              |
|    approx_kl            | 0.0014510391 |
|    clip_fraction        | 0.0329       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.966        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0299       |
|    n_updates            | 8480         |
|    policy_gradient_loss | -0.00254     |
|    std                  | 0.0547       |
|    value_loss           | 0.00246      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.844        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 434          |
|    time_elapsed         | 21115        |
|    total_timesteps      | 3363500      |
| train/                  |              |
|    approx_kl            | 0.0015420555 |
|    clip_fraction        | 0.039        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.965        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0287       |
|    n_updates            | 8660         |
|    policy_gradient_loss | -0.00313     |
|    std                  | 0.0547       |
|    value_loss           | 0.00271      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.847        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 443          |
|    time_elapsed         | 21547        |
|    total_timesteps      | 3433250      |
| train/                  |              |
|    approx_kl            | 0.0013149215 |
|    clip_fraction        | 0.0294       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.2        |
|    explained_variance   | 0.966        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0301       |
|    n_updates            | 8840         |
|    policy_gradient_loss | -0.000911    |
|    std                  | 0.0546       |
|    value_loss           | 0.0025       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.848        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 452          |
|    time_elapsed         | 21982        |
|    total_timesteps      | 3503000      |
| train/                  |              |
|    approx_kl            | 0.0014299192 |
|    clip_fraction        | 0.0281       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.967        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0287       |
|    n_updates            | 9020         |
|    policy_gradient_loss | -0.00157     |
|    std                  | 0.0546       |
|    value_loss           | 0.00254      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.85         |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 461          |
|    time_elapsed         | 22419        |
|    total_timesteps      | 3572750      |
| train/                  |              |
|    approx_kl            | 0.0016703745 |
|    clip_fraction        | 0.0366       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0298       |
|    n_updates            | 9200         |
|    policy_gradient_loss | -0.00141     |
|    std                  | 0.0546       |
|    value_loss           | 0.00254      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=3642500, episode_reward=0.86 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.86         |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.849        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 470          |
|    time_elapsed         | 22857        |
|    total_timesteps      | 3642500      |
| train/                  |              |
|    approx_kl            | 0.0015104163 |
|    clip_fraction        | 0.0404       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.968        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0279       |
|    n_updates            | 938

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.851        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 479          |
|    time_elapsed         | 23275        |
|    total_timesteps      | 3712250      |
| train/                  |              |
|    approx_kl            | 0.0015012388 |
|    clip_fraction        | 0.0373       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.969        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0254       |
|    n_updates            | 9560         |
|    policy_gradient_loss | -0.00253     |
|    std                  | 0.0546       |
|    value_loss           | 0.00229      |
------------------------------------------
Eval num_timesteps=3720000, episode_reward=0.86 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.852        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 488          |
|    time_elapsed         | 23711        |
|    total_timesteps      | 3782000      |
| train/                  |              |
|    approx_kl            | 0.0014580488 |
|    clip_fraction        | 0.0446       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.971        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0239       |
|    n_updates            | 9740         |
|    policy_gradient_loss | -0.00189     |
|    std                  | 0.0546       |
|    value_loss           | 0.00231      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.855        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 497          |
|    time_elapsed         | 24146        |
|    total_timesteps      | 3851750      |
| train/                  |              |
|    approx_kl            | 0.0019202747 |
|    clip_fraction        | 0.0372       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.971        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0256       |
|    n_updates            | 9920         |
|    policy_gradient_loss | -0.00106     |
|    std                  | 0.0546       |
|    value_loss           | 0.00221      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.855        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 506          |
|    time_elapsed         | 24583        |
|    total_timesteps      | 3921500      |
| train/                  |              |
|    approx_kl            | 0.0015474071 |
|    clip_fraction        | 0.0372       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.971        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0262       |
|    n_updates            | 10100        |
|    policy_gradient_loss | -0.00235     |
|    std                  | 0.0546       |
|    value_loss           | 0.00227      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.857        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 515          |
|    time_elapsed         | 25015        |
|    total_timesteps      | 3991250      |
| train/                  |              |
|    approx_kl            | 0.0014126322 |
|    clip_fraction        | 0.0375       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.971        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0284       |
|    n_updates            | 10280        |
|    policy_gradient_loss | -0.00145     |
|    std                  | 0.0546       |
|    value_loss           | 0.00217      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.86         |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 524          |
|    time_elapsed         | 25449        |
|    total_timesteps      | 4061000      |
| train/                  |              |
|    approx_kl            | 0.0015735045 |
|    clip_fraction        | 0.036        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.972        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0309       |
|    n_updates            | 10460        |
|    policy_gradient_loss | -0.00012     |
|    std                  | 0.0546       |
|    value_loss           | 0.00212      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.857        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 533          |
|    time_elapsed         | 25881        |
|    total_timesteps      | 4130750      |
| train/                  |              |
|    approx_kl            | 0.0014145209 |
|    clip_fraction        | 0.0422       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.972        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0255       |
|    n_updates            | 10640        |
|    policy_gradient_loss | -0.000663    |
|    std                  | 0.0545       |
|    value_loss           | 0.00211      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.859       |
| time/                   |             |
|    fps                  | 159         |
|    iterations           | 542         |
|    time_elapsed         | 26317       |
|    total_timesteps      | 4200500     |
| train/                  |             |
|    approx_kl            | 0.001518588 |
|    clip_fraction        | 0.0376      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.973       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0273      |
|    n_updates            | 10820       |
|    policy_gradient_loss | -0.00154    |
|    std                  | 0.0545      |
|    value_loss           | 0.00216     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.858        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 551          |
|    time_elapsed         | 26747        |
|    total_timesteps      | 4270250      |
| train/                  |              |
|    approx_kl            | 0.0016595422 |
|    clip_fraction        | 0.0591       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.973        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0262       |
|    n_updates            | 11000        |
|    policy_gradient_loss | -0.00318     |
|    std                  | 0.0545       |
|    value_loss           | 0.00199      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=4340000, episode_reward=0.87 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.866        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.859        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 560          |
|    time_elapsed         | 27179        |
|    total_timesteps      | 4340000      |
| train/                  |              |
|    approx_kl            | 0.0017722397 |
|    clip_fraction        | 0.0332       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.975        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0276       |
|    n_updates            | 111

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.861        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 569          |
|    time_elapsed         | 27591        |
|    total_timesteps      | 4409750      |
| train/                  |              |
|    approx_kl            | 0.0014012344 |
|    clip_fraction        | 0.0229       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.975        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0316       |
|    n_updates            | 11360        |
|    policy_gradient_loss | -0.000642    |
|    std                  | 0.0545       |
|    value_loss           | 0.00205      |
------------------------------------------
Eval num_timesteps=4417500, episode_reward=0.87 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.86         |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 578          |
|    time_elapsed         | 28026        |
|    total_timesteps      | 4479500      |
| train/                  |              |
|    approx_kl            | 0.0017027279 |
|    clip_fraction        | 0.0471       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.975        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0247       |
|    n_updates            | 11540        |
|    policy_gradient_loss | -0.00195     |
|    std                  | 0.0545       |
|    value_loss           | 0.00202      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.86        |
| time/                   |             |
|    fps                  | 159         |
|    iterations           | 587         |
|    time_elapsed         | 28462       |
|    total_timesteps      | 4549250     |
| train/                  |             |
|    approx_kl            | 0.001533177 |
|    clip_fraction        | 0.0331      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.975       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0276      |
|    n_updates            | 11720       |
|    policy_gradient_loss | -0.002      |
|    std                  | 0.0545      |
|    value_loss           | 0.00204     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.859        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 596          |
|    time_elapsed         | 28896        |
|    total_timesteps      | 4619000      |
| train/                  |              |
|    approx_kl            | 0.0015476149 |
|    clip_fraction        | 0.031        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.975        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0256       |
|    n_updates            | 11900        |
|    policy_gradient_loss | -0.00152     |
|    std                  | 0.0545       |
|    value_loss           | 0.0019       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.864        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 605          |
|    time_elapsed         | 29333        |
|    total_timesteps      | 4688750      |
| train/                  |              |
|    approx_kl            | 0.0015479412 |
|    clip_fraction        | 0.0477       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.976        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0246       |
|    n_updates            | 12080        |
|    policy_gradient_loss | -0.00203     |
|    std                  | 0.0545       |
|    value_loss           | 0.00202      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.862        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 614          |
|    time_elapsed         | 29767        |
|    total_timesteps      | 4758500      |
| train/                  |              |
|    approx_kl            | 0.0016251615 |
|    clip_fraction        | 0.0411       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.975        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0277       |
|    n_updates            | 12260        |
|    policy_gradient_loss | -0.00112     |
|    std                  | 0.0545       |
|    value_loss           | 0.00208      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.864        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 623          |
|    time_elapsed         | 30202        |
|    total_timesteps      | 4828250      |
| train/                  |              |
|    approx_kl            | 0.0013068229 |
|    clip_fraction        | 0.0371       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.976        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0273       |
|    n_updates            | 12440        |
|    policy_gradient_loss | -0.000204    |
|    std                  | 0.0545       |
|    value_loss           | 0.00186      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.864        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 632          |
|    time_elapsed         | 30635        |
|    total_timesteps      | 4898000      |
| train/                  |              |
|    approx_kl            | 0.0014893957 |
|    clip_fraction        | 0.0363       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.977        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.028        |
|    n_updates            | 12620        |
|    policy_gradient_loss | -0.00152     |
|    std                  | 0.0545       |
|    value_loss           | 0.00188      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.866        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 641          |
|    time_elapsed         | 31072        |
|    total_timesteps      | 4967750      |
| train/                  |              |
|    approx_kl            | 0.0017226476 |
|    clip_fraction        | 0.0306       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.977        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0255       |
|    n_updates            | 12800        |
|    policy_gradient_loss | -0.00132     |
|    std                  | 0.0545       |
|    value_loss           | 0.00185      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=5037500, episode_reward=0.87 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.87         |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.863        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 650          |
|    time_elapsed         | 31509        |
|    total_timesteps      | 5037500      |
| train/                  |              |
|    approx_kl            | 0.0012617481 |
|    clip_fraction        | 0.027        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.978        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0282       |
|    n_updates            | 12980        |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.864        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 659          |
|    time_elapsed         | 31928        |
|    total_timesteps      | 5107250      |
| train/                  |              |
|    approx_kl            | 0.0014962477 |
|    clip_fraction        | 0.0341       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.977        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0264       |
|    n_updates            | 13160        |
|    policy_gradient_loss | -0.000897    |
|    std                  | 0.0544       |
|    value_loss           | 0.00191      |
------------------------------------------
Eval num_timesteps=5115000, episode_reward=0.87 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.865        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 668          |
|    time_elapsed         | 32363        |
|    total_timesteps      | 5177000      |
| train/                  |              |
|    approx_kl            | 0.0015507943 |
|    clip_fraction        | 0.0546       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.978        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0248       |
|    n_updates            | 13340        |
|    policy_gradient_loss | -0.00305     |
|    std                  | 0.0544       |
|    value_loss           | 0.00171      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.867        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 677          |
|    time_elapsed         | 32794        |
|    total_timesteps      | 5246750      |
| train/                  |              |
|    approx_kl            | 0.0015206059 |
|    clip_fraction        | 0.045        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.978        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0294       |
|    n_updates            | 13520        |
|    policy_gradient_loss | -0.00167     |
|    std                  | 0.0544       |
|    value_loss           | 0.00172      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.868        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 686          |
|    time_elapsed         | 33229        |
|    total_timesteps      | 5316500      |
| train/                  |              |
|    approx_kl            | 0.0013239525 |
|    clip_fraction        | 0.0237       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.978        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0263       |
|    n_updates            | 13700        |
|    policy_gradient_loss | -0.00135     |
|    std                  | 0.0544       |
|    value_loss           | 0.00174      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.867       |
| time/                   |             |
|    fps                  | 160         |
|    iterations           | 695         |
|    time_elapsed         | 33663       |
|    total_timesteps      | 5386250     |
| train/                  |             |
|    approx_kl            | 0.001769283 |
|    clip_fraction        | 0.043       |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.979       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0268      |
|    n_updates            | 13880       |
|    policy_gradient_loss | -0.00107    |
|    std                  | 0.0544      |
|    value_loss           | 0.00166     |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.866       |
| time/                   |             |
|    fps                  | 159         |
|    iterations           | 704         |
|    time_elapsed         | 34101       |
|    total_timesteps      | 5456000     |
| train/                  |             |
|    approx_kl            | 0.001296776 |
|    clip_fraction        | 0.0253      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.3       |
|    explained_variance   | 0.979       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.028       |
|    n_updates            | 14060       |
|    policy_gradient_loss | -0.00177    |
|    std                  | 0.0544      |
|    value_loss           | 0.00181     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.869        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 713          |
|    time_elapsed         | 34537        |
|    total_timesteps      | 5525750      |
| train/                  |              |
|    approx_kl            | 0.0017533549 |
|    clip_fraction        | 0.0431       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.979        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0239       |
|    n_updates            | 14240        |
|    policy_gradient_loss | -0.000742    |
|    std                  | 0.0544       |
|    value_loss           | 0.00174      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.867        |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 722          |
|    time_elapsed         | 34972        |
|    total_timesteps      | 5595500      |
| train/                  |              |
|    approx_kl            | 0.0016084684 |
|    clip_fraction        | 0.0535       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.3        |
|    explained_variance   | 0.979        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0247       |
|    n_updates            | 14420        |
|    policy_gradient_loss | -0.00108     |
|    std                  | 0.0544       |
|    value_loss           | 0.00178      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.869        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 731          |
|    time_elapsed         | 35407        |
|    total_timesteps      | 5665250      |
| train/                  |              |
|    approx_kl            | 0.0016036618 |
|    clip_fraction        | 0.0487       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.98         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0244       |
|    n_updates            | 14600        |
|    policy_gradient_loss | -0.000422    |
|    std                  | 0.0544       |
|    value_loss           | 0.00151      |
------------------------------------------
-----------------------------------------
| rollout/  

Eval num_timesteps=5735000, episode_reward=0.87 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.873        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.868        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 740          |
|    time_elapsed         | 35841        |
|    total_timesteps      | 5735000      |
| train/                  |              |
|    approx_kl            | 0.0014364824 |
|    clip_fraction        | 0.0378       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.98         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0257       |
|    n_updates            | 14780        |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.868        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 749          |
|    time_elapsed         | 36252        |
|    total_timesteps      | 5804750      |
| train/                  |              |
|    approx_kl            | 0.0014535815 |
|    clip_fraction        | 0.0426       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.981        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0237       |
|    n_updates            | 14960        |
|    policy_gradient_loss | 0.000125     |
|    std                  | 0.0544       |
|    value_loss           | 0.00155      |
------------------------------------------
Eval num_timesteps=5812500, episode_reward=0.87 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.868        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 758          |
|    time_elapsed         | 36683        |
|    total_timesteps      | 5874500      |
| train/                  |              |
|    approx_kl            | 0.0015829538 |
|    clip_fraction        | 0.0324       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.98         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.028        |
|    n_updates            | 15140        |
|    policy_gradient_loss | -0.000529    |
|    std                  | 0.0544       |
|    value_loss           | 0.00165      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.867        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 767          |
|    time_elapsed         | 37113        |
|    total_timesteps      | 5944250      |
| train/                  |              |
|    approx_kl            | 0.0017570353 |
|    clip_fraction        | 0.0486       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.981        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0275       |
|    n_updates            | 15320        |
|    policy_gradient_loss | -0.0017      |
|    std                  | 0.0544       |
|    value_loss           | 0.00158      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.87         |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 776          |
|    time_elapsed         | 37545        |
|    total_timesteps      | 6014000      |
| train/                  |              |
|    approx_kl            | 0.0013493942 |
|    clip_fraction        | 0.0305       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.981        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0258       |
|    n_updates            | 15500        |
|    policy_gradient_loss | -0.000384    |
|    std                  | 0.0544       |
|    value_loss           | 0.0015       |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.871       |
| time/                   |             |
|    fps                  | 160         |
|    iterations           | 785         |
|    time_elapsed         | 37977       |
|    total_timesteps      | 6083750     |
| train/                  |             |
|    approx_kl            | 0.001229591 |
|    clip_fraction        | 0.0341      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.981       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0287      |
|    n_updates            | 15680       |
|    policy_gradient_loss | -0.00157    |
|    std                  | 0.0543      |
|    value_loss           | 0.00159     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.867        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 794          |
|    time_elapsed         | 38406        |
|    total_timesteps      | 6153500      |
| train/                  |              |
|    approx_kl            | 0.0014550617 |
|    clip_fraction        | 0.0337       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.981        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0304       |
|    n_updates            | 15860        |
|    policy_gradient_loss | -0.00211     |
|    std                  | 0.0543       |
|    value_loss           | 0.0016       |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.869        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 803          |
|    time_elapsed         | 38838        |
|    total_timesteps      | 6223250      |
| train/                  |              |
|    approx_kl            | 0.0014371563 |
|    clip_fraction        | 0.0358       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.982        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.026        |
|    n_updates            | 16040        |
|    policy_gradient_loss | -0.00129     |
|    std                  | 0.0543       |
|    value_loss           | 0.00151      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.87         |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 812          |
|    time_elapsed         | 39268        |
|    total_timesteps      | 6293000      |
| train/                  |              |
|    approx_kl            | 0.0012776405 |
|    clip_fraction        | 0.0361       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.983        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0243       |
|    n_updates            | 16220        |
|    policy_gradient_loss | -5.29e-05    |
|    std                  | 0.0543       |
|    value_loss           | 0.00133      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.87        |
| time/                   |             |
|    fps                  | 160         |
|    iterations           | 821         |
|    time_elapsed         | 39701       |
|    total_timesteps      | 6362750     |
| train/                  |             |
|    approx_kl            | 0.001490477 |
|    clip_fraction        | 0.0328      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.982       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0274      |
|    n_updates            | 16400       |
|    policy_gradient_loss | -0.00194    |
|    std                  | 0.0543      |
|    value_loss           | 0.00149     |
-----------------------------------------
------------------------------------------
| rollout/                |      

Eval num_timesteps=6432500, episode_reward=0.87 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.873        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.87         |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 830          |
|    time_elapsed         | 40133        |
|    total_timesteps      | 6432500      |
| train/                  |              |
|    approx_kl            | 0.0015792567 |
|    clip_fraction        | 0.0414       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.982        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.023        |
|    n_updates            | 16580        |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.869        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 839          |
|    time_elapsed         | 40547        |
|    total_timesteps      | 6502250      |
| train/                  |              |
|    approx_kl            | 0.0020337263 |
|    clip_fraction        | 0.0541       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.983        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0255       |
|    n_updates            | 16760        |
|    policy_gradient_loss | -0.00053     |
|    std                  | 0.0543       |
|    value_loss           | 0.00141      |
------------------------------------------
Eval num_timesteps=6510000, episode_reward=0.87 +/- 0.

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.869      |
| time/                   |            |
|    fps                  | 160        |
|    iterations           | 848        |
|    time_elapsed         | 40976      |
|    total_timesteps      | 6572000    |
| train/                  |            |
|    approx_kl            | 0.00176631 |
|    clip_fraction        | 0.0386     |
|    clip_range           | 0.1        |
|    entropy_loss         | -31.4      |
|    explained_variance   | 0.982      |
|    learning_rate        | 1e-06      |
|    loss                 | 0.0257     |
|    n_updates            | 16940      |
|    policy_gradient_loss | -0.00214   |
|    std                  | 0.0543     |
|    value_loss           | 0.00156    |
----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.869        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 857          |
|    time_elapsed         | 41407        |
|    total_timesteps      | 6641750      |
| train/                  |              |
|    approx_kl            | 0.0013936686 |
|    clip_fraction        | 0.0307       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.983        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0274       |
|    n_updates            | 17120        |
|    policy_gradient_loss | -0.00133     |
|    std                  | 0.0543       |
|    value_loss           | 0.00146      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.87         |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 866          |
|    time_elapsed         | 41839        |
|    total_timesteps      | 6711500      |
| train/                  |              |
|    approx_kl            | 0.0014521313 |
|    clip_fraction        | 0.0369       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.983        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0307       |
|    n_updates            | 17300        |
|    policy_gradient_loss | -0.000932    |
|    std                  | 0.0543       |
|    value_loss           | 0.00141      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.869        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 875          |
|    time_elapsed         | 42273        |
|    total_timesteps      | 6781250      |
| train/                  |              |
|    approx_kl            | 0.0014742581 |
|    clip_fraction        | 0.0279       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.984        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0263       |
|    n_updates            | 17480        |
|    policy_gradient_loss | -0.000684    |
|    std                  | 0.0543       |
|    value_loss           | 0.00134      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.87         |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 884          |
|    time_elapsed         | 42708        |
|    total_timesteps      | 6851000      |
| train/                  |              |
|    approx_kl            | 0.0014676003 |
|    clip_fraction        | 0.0285       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.983        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0298       |
|    n_updates            | 17660        |
|    policy_gradient_loss | -0.00187     |
|    std                  | 0.0543       |
|    value_loss           | 0.00133      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.87         |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 893          |
|    time_elapsed         | 43143        |
|    total_timesteps      | 6920750      |
| train/                  |              |
|    approx_kl            | 0.0013312993 |
|    clip_fraction        | 0.03         |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.983        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0284       |
|    n_updates            | 17840        |
|    policy_gradient_loss | -0.000342    |
|    std                  | 0.0543       |
|    value_loss           | 0.00135      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.869        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 902          |
|    time_elapsed         | 43578        |
|    total_timesteps      | 6990500      |
| train/                  |              |
|    approx_kl            | 0.0015529732 |
|    clip_fraction        | 0.0396       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.983        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0276       |
|    n_updates            | 18020        |
|    policy_gradient_loss | -0.00103     |
|    std                  | 0.0543       |
|    value_loss           | 0.00143      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.871        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 911          |
|    time_elapsed         | 44010        |
|    total_timesteps      | 7060250      |
| train/                  |              |
|    approx_kl            | 0.0017520881 |
|    clip_fraction        | 0.0432       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.983        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0235       |
|    n_updates            | 18200        |
|    policy_gradient_loss | -0.00182     |
|    std                  | 0.0543       |
|    value_loss           | 0.0014       |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=7130000, episode_reward=0.87 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.874       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.869       |
| time/                   |             |
|    fps                  | 160         |
|    iterations           | 920         |
|    time_elapsed         | 44442       |
|    total_timesteps      | 7130000     |
| train/                  |             |
|    approx_kl            | 0.001445523 |
|    clip_fraction        | 0.0263      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.983       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0272      |
|    n_updates            | 18380       |
|    policy_gradient_loss | -0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.868        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 929          |
|    time_elapsed         | 44858        |
|    total_timesteps      | 7199750      |
| train/                  |              |
|    approx_kl            | 0.0015269967 |
|    clip_fraction        | 0.0355       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.984        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0246       |
|    n_updates            | 18560        |
|    policy_gradient_loss | -0.00192     |
|    std                  | 0.0543       |
|    value_loss           | 0.00131      |
------------------------------------------
Eval num_timesteps=7207500, episode_reward=0.87 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.87         |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 938          |
|    time_elapsed         | 45291        |
|    total_timesteps      | 7269500      |
| train/                  |              |
|    approx_kl            | 0.0013548669 |
|    clip_fraction        | 0.0361       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.984        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0292       |
|    n_updates            | 18740        |
|    policy_gradient_loss | -0.00212     |
|    std                  | 0.0543       |
|    value_loss           | 0.00129      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.869        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 947          |
|    time_elapsed         | 45723        |
|    total_timesteps      | 7339250      |
| train/                  |              |
|    approx_kl            | 0.0015923462 |
|    clip_fraction        | 0.0494       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.984        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0283       |
|    n_updates            | 18920        |
|    policy_gradient_loss | -0.00132     |
|    std                  | 0.0543       |
|    value_loss           | 0.00125      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.87         |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 956          |
|    time_elapsed         | 46156        |
|    total_timesteps      | 7409000      |
| train/                  |              |
|    approx_kl            | 0.0017546203 |
|    clip_fraction        | 0.0266       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.984        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0229       |
|    n_updates            | 19100        |
|    policy_gradient_loss | -0.00167     |
|    std                  | 0.0542       |
|    value_loss           | 0.00127      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.871        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 965          |
|    time_elapsed         | 46594        |
|    total_timesteps      | 7478750      |
| train/                  |              |
|    approx_kl            | 0.0015419519 |
|    clip_fraction        | 0.0439       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.984        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0255       |
|    n_updates            | 19280        |
|    policy_gradient_loss | -0.00178     |
|    std                  | 0.0542       |
|    value_loss           | 0.00126      |
------------------------------------------
-----------------------------------------
| rollout/  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.87         |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 974          |
|    time_elapsed         | 47029        |
|    total_timesteps      | 7548500      |
| train/                  |              |
|    approx_kl            | 0.0016415922 |
|    clip_fraction        | 0.0509       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.984        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0253       |
|    n_updates            | 19460        |
|    policy_gradient_loss | -0.000787    |
|    std                  | 0.0542       |
|    value_loss           | 0.00124      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.87         |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 983          |
|    time_elapsed         | 47461        |
|    total_timesteps      | 7618250      |
| train/                  |              |
|    approx_kl            | 0.0016527737 |
|    clip_fraction        | 0.0443       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.984        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0237       |
|    n_updates            | 19640        |
|    policy_gradient_loss | 0.000343     |
|    std                  | 0.0542       |
|    value_loss           | 0.00134      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.871        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 992          |
|    time_elapsed         | 47893        |
|    total_timesteps      | 7688000      |
| train/                  |              |
|    approx_kl            | 0.0016875702 |
|    clip_fraction        | 0.0329       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.984        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.027        |
|    n_updates            | 19820        |
|    policy_gradient_loss | -0.000972    |
|    std                  | 0.0542       |
|    value_loss           | 0.00123      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.871        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1001         |
|    time_elapsed         | 48327        |
|    total_timesteps      | 7757750      |
| train/                  |              |
|    approx_kl            | 0.0016520075 |
|    clip_fraction        | 0.0411       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.984        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0252       |
|    n_updates            | 20000        |
|    policy_gradient_loss | -0.00138     |
|    std                  | 0.0542       |
|    value_loss           | 0.00123      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=7827500, episode_reward=0.87 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.872        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.869        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1010         |
|    time_elapsed         | 48760        |
|    total_timesteps      | 7827500      |
| train/                  |              |
|    approx_kl            | 0.0015398158 |
|    clip_fraction        | 0.0514       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.983        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0309       |
|    n_updates            | 20180        |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.871        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1019         |
|    time_elapsed         | 49176        |
|    total_timesteps      | 7897250      |
| train/                  |              |
|    approx_kl            | 0.0016648045 |
|    clip_fraction        | 0.0371       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.984        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0242       |
|    n_updates            | 20360        |
|    policy_gradient_loss | -0.00116     |
|    std                  | 0.0542       |
|    value_loss           | 0.00128      |
------------------------------------------
Eval num_timesteps=7905000, episode_reward=0.87 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.869        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1028         |
|    time_elapsed         | 49608        |
|    total_timesteps      | 7967000      |
| train/                  |              |
|    approx_kl            | 0.0014339909 |
|    clip_fraction        | 0.0404       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.983        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0244       |
|    n_updates            | 20540        |
|    policy_gradient_loss | -0.00143     |
|    std                  | 0.0542       |
|    value_loss           | 0.00135      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.867        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1037         |
|    time_elapsed         | 50040        |
|    total_timesteps      | 8036750      |
| train/                  |              |
|    approx_kl            | 0.0017596867 |
|    clip_fraction        | 0.0296       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.982        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0269       |
|    n_updates            | 20720        |
|    policy_gradient_loss | -0.00343     |
|    std                  | 0.0542       |
|    value_loss           | 0.00138      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.87        |
| time/                   |             |
|    fps                  | 160         |
|    iterations           | 1046        |
|    time_elapsed         | 50469       |
|    total_timesteps      | 8106500     |
| train/                  |             |
|    approx_kl            | 0.001966141 |
|    clip_fraction        | 0.0531      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.982       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.028       |
|    n_updates            | 20900       |
|    policy_gradient_loss | -0.00263    |
|    std                  | 0.0542      |
|    value_loss           | 0.00143     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.867        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1055         |
|    time_elapsed         | 50903        |
|    total_timesteps      | 8176250      |
| train/                  |              |
|    approx_kl            | 0.0014695866 |
|    clip_fraction        | 0.0364       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.982        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0236       |
|    n_updates            | 21080        |
|    policy_gradient_loss | -0.000465    |
|    std                  | 0.0542       |
|    value_loss           | 0.00134      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.87        |
| time/                   |             |
|    fps                  | 160         |
|    iterations           | 1064        |
|    time_elapsed         | 51338       |
|    total_timesteps      | 8246000     |
| train/                  |             |
|    approx_kl            | 0.001601201 |
|    clip_fraction        | 0.0565      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.982       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0275      |
|    n_updates            | 21260       |
|    policy_gradient_loss | -0.00214    |
|    std                  | 0.0542      |
|    value_loss           | 0.00143     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.869        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1073         |
|    time_elapsed         | 51771        |
|    total_timesteps      | 8315750      |
| train/                  |              |
|    approx_kl            | 0.0019433267 |
|    clip_fraction        | 0.057        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.982        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0194       |
|    n_updates            | 21440        |
|    policy_gradient_loss | -0.00142     |
|    std                  | 0.0542       |
|    value_loss           | 0.00143      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.868       |
| time/                   |             |
|    fps                  | 160         |
|    iterations           | 1082        |
|    time_elapsed         | 52206       |
|    total_timesteps      | 8385500     |
| train/                  |             |
|    approx_kl            | 0.001664578 |
|    clip_fraction        | 0.0538      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.4       |
|    explained_variance   | 0.982       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0304      |
|    n_updates            | 21620       |
|    policy_gradient_loss | 0.000153    |
|    std                  | 0.0542      |
|    value_loss           | 0.00137     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.867        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1091         |
|    time_elapsed         | 52639        |
|    total_timesteps      | 8455250      |
| train/                  |              |
|    approx_kl            | 0.0013198503 |
|    clip_fraction        | 0.0274       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.981        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0292       |
|    n_updates            | 21800        |
|    policy_gradient_loss | -0.00143     |
|    std                  | 0.0541       |
|    value_loss           | 0.00146      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=8525000, episode_reward=0.87 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.872        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.865        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1100         |
|    time_elapsed         | 53071        |
|    total_timesteps      | 8525000      |
| train/                  |              |
|    approx_kl            | 0.0012954139 |
|    clip_fraction        | 0.0254       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.98         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0258       |
|    n_updates            | 21980        |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.868        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1109         |
|    time_elapsed         | 53480        |
|    total_timesteps      | 8594750      |
| train/                  |              |
|    approx_kl            | 0.0019402395 |
|    clip_fraction        | 0.0519       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.98         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0211       |
|    n_updates            | 22160        |
|    policy_gradient_loss | -0.00222     |
|    std                  | 0.0541       |
|    value_loss           | 0.00155      |
------------------------------------------
Eval num_timesteps=8602500, episode_reward=0.87 +/- 0.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.865        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1118         |
|    time_elapsed         | 53911        |
|    total_timesteps      | 8664500      |
| train/                  |              |
|    approx_kl            | 0.0014364755 |
|    clip_fraction        | 0.0375       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.4        |
|    explained_variance   | 0.981        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0259       |
|    n_updates            | 22340        |
|    policy_gradient_loss | -0.000753    |
|    std                  | 0.0541       |
|    value_loss           | 0.00147      |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.864       |
| time/                   |             |
|    fps                  | 160         |
|    iterations           | 1127        |
|    time_elapsed         | 54350       |
|    total_timesteps      | 8734250     |
| train/                  |             |
|    approx_kl            | 0.002066302 |
|    clip_fraction        | 0.0587      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.5       |
|    explained_variance   | 0.979       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0279      |
|    n_updates            | 22520       |
|    policy_gradient_loss | -0.00101    |
|    std                  | 0.0541      |
|    value_loss           | 0.0015      |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.865        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1136         |
|    time_elapsed         | 54784        |
|    total_timesteps      | 8804000      |
| train/                  |              |
|    approx_kl            | 0.0016227132 |
|    clip_fraction        | 0.0423       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.5        |
|    explained_variance   | 0.98         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0273       |
|    n_updates            | 22700        |
|    policy_gradient_loss | -0.00112     |
|    std                  | 0.0541       |
|    value_loss           | 0.0015       |
------------------------------------------
------------------------------------------
| rollout/ 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.869       |
| time/                   |             |
|    fps                  | 160         |
|    iterations           | 1145        |
|    time_elapsed         | 55221       |
|    total_timesteps      | 8873750     |
| train/                  |             |
|    approx_kl            | 0.001553393 |
|    clip_fraction        | 0.0527      |
|    clip_range           | 0.1         |
|    entropy_loss         | -31.5       |
|    explained_variance   | 0.979       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0258      |
|    n_updates            | 22880       |
|    policy_gradient_loss | -0.00201    |
|    std                  | 0.0541      |
|    value_loss           | 0.00159     |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.866        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1154         |
|    time_elapsed         | 55655        |
|    total_timesteps      | 8943500      |
| train/                  |              |
|    approx_kl            | 0.0020902602 |
|    clip_fraction        | 0.0626       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.5        |
|    explained_variance   | 0.978        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.03         |
|    n_updates            | 23060        |
|    policy_gradient_loss | -0.00184     |
|    std                  | 0.0541       |
|    value_loss           | 0.00175      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.865        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1163         |
|    time_elapsed         | 56089        |
|    total_timesteps      | 9013250      |
| train/                  |              |
|    approx_kl            | 0.0017916234 |
|    clip_fraction        | 0.0363       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.5        |
|    explained_variance   | 0.978        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0224       |
|    n_updates            | 23240        |
|    policy_gradient_loss | -0.000958    |
|    std                  | 0.0541       |
|    value_loss           | 0.00158      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.863        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1172         |
|    time_elapsed         | 56523        |
|    total_timesteps      | 9083000      |
| train/                  |              |
|    approx_kl            | 0.0014824346 |
|    clip_fraction        | 0.0446       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.5        |
|    explained_variance   | 0.977        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0318       |
|    n_updates            | 23420        |
|    policy_gradient_loss | -0.00211     |
|    std                  | 0.0541       |
|    value_loss           | 0.00182      |
------------------------------------------
------------------------------------------
| rollout/ 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.865        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1181         |
|    time_elapsed         | 56955        |
|    total_timesteps      | 9152750      |
| train/                  |              |
|    approx_kl            | 0.0014482967 |
|    clip_fraction        | 0.0375       |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.5        |
|    explained_variance   | 0.977        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0276       |
|    n_updates            | 23600        |
|    policy_gradient_loss | -0.00146     |
|    std                  | 0.0541       |
|    value_loss           | 0.00166      |
------------------------------------------
------------------------------------------
| rollout/ 

Eval num_timesteps=9222500, episode_reward=0.87 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.87         |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.865        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1190         |
|    time_elapsed         | 57389        |
|    total_timesteps      | 9222500      |
| train/                  |              |
|    approx_kl            | 0.0015363577 |
|    clip_fraction        | 0.032        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.5        |
|    explained_variance   | 0.977        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0263       |
|    n_updates            | 23780        |
|    polic

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.865        |
| time/                   |              |
|    fps                  | 160          |
|    iterations           | 1199         |
|    time_elapsed         | 57800        |
|    total_timesteps      | 9292250      |
| train/                  |              |
|    approx_kl            | 0.0012927786 |
|    clip_fraction        | 0.042        |
|    clip_range           | 0.1          |
|    entropy_loss         | -31.5        |
|    explained_variance   | 0.977        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0263       |
|    n_updates            | 23960        |
|    policy_gradient_loss | -0.00279     |
|    std                  | 0.0541       |
|    value_loss           | 0.00174      |
------------------------------------------
Eval num_timesteps=9300000, episode_reward=0.87 +/- 0.

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in