In [1]:
# to access functions from other locations
import sys
sys.path.append('/data/ad181/RemoteDir/k_variability_in_ressim_env/SPE10_like_envs/')

In [2]:
%matplotlib notebook
import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt

import gym
from stable_baselines3.ppo import PPO, MlpPolicy
# from utils.subproc_vec_env import SubprocVecEnv
# from utils.multiprocessing_env import SubprocVecEnv
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines3.common.callbacks import CallbackList
from utils.custom_eval_callback import CustomEvalCallback
from typing import Callable
from utils.plot_functions import plot_learning

from model.ressim import Grid
from ressim_env import ResSimEnv_v0, ResSimEnv_v1, ResSimEnv_v3

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
seed=1
case='1ph_v1'

In [4]:
os.makedirs('./data', exist_ok=True)
os.makedirs('./data/'+case, exist_ok=True)

In [5]:
with open('../envs_params/env_data/env_'+case+'_train.pkl', 'rb') as input:
    env_train = pickle.load(input)
    
rl_indices = [13,15,4,10,5,0,9,7,1]
with open('../envs_params/env_data/env_'+case+'_eval.pkl', 'rb') as input:
    env_eval = pickle.load(input)
k_list_rl = env_eval.k_list[rl_indices]
env_eval.set_k(k_list_rl)
    
with open('../envs_params/env_data/env_list_'+case+'_eval.pkl', 'rb') as input:
    envs = pickle.load(input)

In [6]:
def make_env(env, rank: int, seed: int = 0) -> Callable:
    """
    Utility function for multiprocessed env.
    
    :param env_id: (str) the environment ID
    :param num_env: (int) the number of environment you wish to have in subprocesses
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    :return: (Callable)
    """
    def _init() -> gym.Env:
        env_ = env
        env_.seed(seed + rank)
        return env_
    return _init

In [7]:
for seed in range(1,6):
    if seed>3:
        print(f'seed {seed}')
        log_dir = './data/'+case+'/seed_'+str(seed)
        os.makedirs(log_dir, exist_ok=True)
        num_cpu = 64
        env_train.seed(seed)
        env_eval.seed(seed)
        train_callback = CustomEvalCallback(env_train, best_model_save_path=str(log_dir)+'/best_model_train', n_eval_episodes=1,
                             log_path=str(log_dir)+'/results_train', eval_freq=100)
        callback_list = [train_callback]
        for i in range(16):
            envs[i].seed(seed)
            eval_callback = CustomEvalCallback(envs[i], best_model_save_path=str(log_dir)+'/best_model_eval_'+str(i), n_eval_episodes=1,
                             log_path=str(log_dir)+'/results_eval_'+str(i), eval_freq=300)
            callback_list.append(eval_callback)
        eval_callback = CustomEvalCallback(env_eval, best_model_save_path=str(log_dir)+'/best_model_eval', n_eval_episodes=1,
                             log_path=str(log_dir)+'/results_eval', eval_freq=100)
        callback_list.append(eval_callback)
        callback = CallbackList(callback_list)
        env = SubprocVecEnv([make_env(env_train, i, seed) for i in range(num_cpu)])
#     env = VecMonitor(env, filename=log_dir)
        print(f'seed {seed}: model definition ..')
        model = PPO(policy=MlpPolicy,
            env=env,
            learning_rate = 1e-5,
            n_steps = 4,
            batch_size = 16,
            n_epochs = 20,
            gamma = 0.99,
            gae_lambda = 0.95,
            clip_range = 0.2,
            clip_range_vf = None,
            ent_coef = 0.001,
            vf_coef = 0.5,
            max_grad_norm = 0.5,
            use_sde= False,
            create_eval_env= False,
            policy_kwargs = dict(net_arch=[4000,2000,800,300], log_std_init=-1.9),
            verbose = 1,
            seed = seed,
            device = "auto")
        print(f'seed {seed}: learning ..')
        model.learn(total_timesteps=200000, callback=callback)
        model.save(log_dir+'/PPO')
        del model
        fig = plot_learning(log_dir, case='train')
        fig.savefig(log_dir+'/learn_train.png')
        fig = plot_learning(log_dir, case='eval')
        fig.savefig(log_dir+'/learn_eval.png')

seed 4
seed 4: model definition ..
Using cuda device
seed 4: learning ..




----------------------------
| time/              |     |
|    fps             | 91  |
|    iterations      | 1   |
|    time_elapsed    | 2   |
|    total_timesteps | 256 |
----------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 59        |
|    iterations           | 2         |
|    time_elapsed         | 8         |
|    total_timesteps      | 512       |
| train/                  |           |
|    approx_kl            | 0.2215068 |
|    clip_fraction        | 0.637     |
|    clip_range           | 0.2       |
|    entropy_loss         | 29.8      |
|    explained_variance   | -0.399    |
|    learning_rate        | 1e-05     |
|    loss                 | -0.0902   |
|    n_updates            | 20        |
|    policy_gradient_loss | -0.108    |
|    std                  | 0.15      |
|    value_loss           | 0.0503    |
---------------------------------------
-------------------------------------

----------------------------------------
| time/                   |            |
|    fps                  | 45         |
|    iterations           | 13         |
|    time_elapsed         | 73         |
|    total_timesteps      | 3328       |
| train/                  |            |
|    approx_kl            | 0.06522111 |
|    clip_fraction        | 0.637      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.8       |
|    explained_variance   | 0.852      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.111     |
|    n_updates            | 240        |
|    policy_gradient_loss | -0.102     |
|    std                  | 0.15       |
|    value_loss           | 0.003      |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 44         |
|    iterations           | 14         |
|    time_elapsed         | 80         |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 43         |
|    iterations           | 24         |
|    time_elapsed         | 141        |
|    total_timesteps      | 6144       |
| train/                  |            |
|    approx_kl            | 0.40846217 |
|    clip_fraction        | 0.734      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.8       |
|    explained_variance   | 0.784      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0953    |
|    n_updates            | 460        |
|    policy_gradient_loss | -0.117     |
|    std                  | 0.15       |
|    value_loss           | 0.00467    |
----------------------------------------
Eval num_timesteps=6400, episode_reward=0.64 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=6400, episode_reward=0.64 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
---------

-----------------------------------------
| time/                   |             |
|    fps                  | 38          |
|    iterations           | 35          |
|    time_elapsed         | 230         |
|    total_timesteps      | 8960        |
| train/                  |             |
|    approx_kl            | -0.13796556 |
|    clip_fraction        | 0.68        |
|    clip_range           | 0.2         |
|    entropy_loss         | 29.8        |
|    explained_variance   | 0.93        |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0857     |
|    n_updates            | 680         |
|    policy_gradient_loss | -0.114      |
|    std                  | 0.15        |
|    value_loss           | 0.00557     |
-----------------------------------------
--------------------------------------
| time/                   |          |
|    fps                  | 38       |
|    iterations           | 36       |
|    time_elapsed         | 236      |
|    to

----------------------------------------
| time/                   |            |
|    fps                  | 39         |
|    iterations           | 46         |
|    time_elapsed         | 299        |
|    total_timesteps      | 11776      |
| train/                  |            |
|    approx_kl            | 0.14454782 |
|    clip_fraction        | 0.805      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.8       |
|    explained_variance   | 0.958      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0905    |
|    n_updates            | 900        |
|    policy_gradient_loss | -0.114     |
|    std                  | 0.15       |
|    value_loss           | 0.00233    |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 39        |
|    iterations           | 47        |
|    time_elapsed         | 306       |
|    total_timesteps 

---------------------------------------
| time/                   |           |
|    fps                  | 37        |
|    iterations           | 56        |
|    time_elapsed         | 381       |
|    total_timesteps      | 14336     |
| train/                  |           |
|    approx_kl            | 0.0666971 |
|    clip_fraction        | 0.799     |
|    clip_range           | 0.2       |
|    entropy_loss         | 29.8      |
|    explained_variance   | 0.951     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.0974   |
|    n_updates            | 1100      |
|    policy_gradient_loss | -0.116    |
|    std                  | 0.15      |
|    value_loss           | 0.00206   |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 37         |
|    iterations           | 57         |
|    time_elapsed         | 387        |
|    total_timesteps      | 14592  

---------------------------------------
| time/                   |           |
|    fps                  | 38        |
|    iterations           | 67        |
|    time_elapsed         | 449       |
|    total_timesteps      | 17152     |
| train/                  |           |
|    approx_kl            | 0.2808076 |
|    clip_fraction        | 0.729     |
|    clip_range           | 0.2       |
|    entropy_loss         | 29.8      |
|    explained_variance   | 0.912     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.134    |
|    n_updates            | 1320      |
|    policy_gradient_loss | -0.119    |
|    std                  | 0.15      |
|    value_loss           | 0.00125   |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 38         |
|    iterations           | 68         |
|    time_elapsed         | 455        |
|    total_timesteps      | 17408  

----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 76         |
|    time_elapsed         | 557        |
|    total_timesteps      | 19456      |
| train/                  |            |
|    approx_kl            | 0.20816332 |
|    clip_fraction        | 0.836      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.7       |
|    explained_variance   | 0.944      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.101     |
|    n_updates            | 1500       |
|    policy_gradient_loss | -0.12      |
|    std                  | 0.15       |
|    value_loss           | 0.00658    |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 34           |
|    iterations           | 77           |
|    time_elapsed         | 563          |
|    t

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 87         |
|    time_elapsed         | 623        |
|    total_timesteps      | 22272      |
| train/                  |            |
|    approx_kl            | 0.13385025 |
|    clip_fraction        | 0.726      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.7       |
|    explained_variance   | 0.96       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.11      |
|    n_updates            | 1720       |
|    policy_gradient_loss | -0.105     |
|    std                  | 0.15       |
|    value_loss           | 0.00116    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 88         |
|    time_elapsed         | 629        |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 36         |
|    iterations           | 98         |
|    time_elapsed         | 690        |
|    total_timesteps      | 25088      |
| train/                  |            |
|    approx_kl            | 0.06865987 |
|    clip_fraction        | 0.749      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.7       |
|    explained_variance   | 0.938      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.141     |
|    n_updates            | 1940       |
|    policy_gradient_loss | -0.121     |
|    std                  | 0.15       |
|    value_loss           | 0.00144    |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 36          |
|    iterations           | 99          |
|    time_elapsed         | 696         |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 108         |
|    time_elapsed         | 773         |
|    total_timesteps      | 27648       |
| train/                  |             |
|    approx_kl            | 0.045088816 |
|    clip_fraction        | 0.748       |
|    clip_range           | 0.2         |
|    entropy_loss         | 29.6        |
|    explained_variance   | 0.981       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.117      |
|    n_updates            | 2140        |
|    policy_gradient_loss | -0.118      |
|    std                  | 0.15        |
|    value_loss           | 0.000699    |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 109         |
|    time_elapsed         | 779   

----------------------------------------
| time/                   |            |
|    fps                  | 36         |
|    iterations           | 119        |
|    time_elapsed         | 843        |
|    total_timesteps      | 30464      |
| train/                  |            |
|    approx_kl            | 0.08838774 |
|    clip_fraction        | 0.732      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.6       |
|    explained_variance   | 0.947      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.12      |
|    n_updates            | 2360       |
|    policy_gradient_loss | -0.108     |
|    std                  | 0.15       |
|    value_loss           | 0.00155    |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 36          |
|    iterations           | 120         |
|    time_elapsed         | 850         |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 130        |
|    time_elapsed         | 929        |
|    total_timesteps      | 33280      |
| train/                  |            |
|    approx_kl            | 0.06714458 |
|    clip_fraction        | 0.742      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.5       |
|    explained_variance   | 0.977      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.107     |
|    n_updates            | 2580       |
|    policy_gradient_loss | -0.103     |
|    std                  | 0.15       |
|    value_loss           | 0.00078    |
----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 35            |
|    iterations           | 131           |
|    time_elapsed         | 935           |
|

----------------------------------------
| time/                   |            |
|    fps                  | 36         |
|    iterations           | 141        |
|    time_elapsed         | 995        |
|    total_timesteps      | 36096      |
| train/                  |            |
|    approx_kl            | 0.18386813 |
|    clip_fraction        | 0.826      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.5       |
|    explained_variance   | 0.951      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.106     |
|    n_updates            | 2800       |
|    policy_gradient_loss | -0.119     |
|    std                  | 0.15       |
|    value_loss           | 0.00174    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 36         |
|    iterations           | 142        |
|    time_elapsed         | 1002       |
|    total_times

Eval num_timesteps=38400, episode_reward=0.68 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.63 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.67 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.64 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.67 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 5          |
|    mean_reward          | 0.67       |
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 150        |
|    time_elapsed         | 1105       |
|    total_timesteps      | 38

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 160         |
|    time_elapsed         | 1164        |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.010024436 |
|    clip_fraction        | 0.752       |
|    clip_range           | 0.2         |
|    entropy_loss         | 29.4        |
|    explained_variance   | 0.978       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.113      |
|    n_updates            | 3180        |
|    policy_gradient_loss | -0.113      |
|    std                  | 0.151       |
|    value_loss           | 0.00202     |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 161        |
|    time_elapsed         | 1171      

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 171        |
|    time_elapsed         | 1229       |
|    total_timesteps      | 43776      |
| train/                  |            |
|    approx_kl            | 0.07372416 |
|    clip_fraction        | 0.833      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.4       |
|    explained_variance   | 0.979      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.126     |
|    n_updates            | 3400       |
|    policy_gradient_loss | -0.12      |
|    std                  | 0.151      |
|    value_loss           | 0.00146    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 172        |
|    time_elapsed         | 1235       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 182        |
|    time_elapsed         | 1316       |
|    total_timesteps      | 46592      |
| train/                  |            |
|    approx_kl            | 0.12098867 |
|    clip_fraction        | 0.769      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.3       |
|    explained_variance   | 0.864      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.107     |
|    n_updates            | 3620       |
|    policy_gradient_loss | -0.112     |
|    std                  | 0.151      |
|    value_loss           | 0.00261    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 183        |
|    time_elapsed         | 1322       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 193        |
|    time_elapsed         | 1386       |
|    total_timesteps      | 49408      |
| train/                  |            |
|    approx_kl            | 0.53413355 |
|    clip_fraction        | 0.807      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.3       |
|    explained_variance   | 0.753      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.00573   |
|    n_updates            | 3840       |
|    policy_gradient_loss | -0.116     |
|    std                  | 0.151      |
|    value_loss           | 0.00345    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 194        |
|    time_elapsed         | 1392       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 204        |
|    time_elapsed         | 1475       |
|    total_timesteps      | 52224      |
| train/                  |            |
|    approx_kl            | 0.30405548 |
|    clip_fraction        | 0.796      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.2       |
|    explained_variance   | 0.955      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.132     |
|    n_updates            | 4060       |
|    policy_gradient_loss | -0.123     |
|    std                  | 0.151      |
|    value_loss           | 0.00119    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 205        |
|    time_elapsed         | 1481       |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 215         |
|    time_elapsed         | 1541        |
|    total_timesteps      | 55040       |
| train/                  |             |
|    approx_kl            | -0.12812077 |
|    clip_fraction        | 0.778       |
|    clip_range           | 0.2         |
|    entropy_loss         | 29.2        |
|    explained_variance   | 0.962       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0986     |
|    n_updates            | 4280        |
|    policy_gradient_loss | -0.117      |
|    std                  | 0.151       |
|    value_loss           | 0.00125     |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 216        |
|    time_elapsed         | 1547      

Eval num_timesteps=57600, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=57600, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=57600, episode_reward=0.71 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=57600, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=57600, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=57600, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=57600, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=57600, episode_reward=0.68 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=57600, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=57600, episode_rew

----------------------------------------
| time/                   |            |
|    fps                  | 34         |
|    iterations           | 234        |
|    time_elapsed         | 1712       |
|    total_timesteps      | 59904      |
| train/                  |            |
|    approx_kl            | 0.31703237 |
|    clip_fraction        | 0.806      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29         |
|    explained_variance   | 0.96       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.104     |
|    n_updates            | 4660       |
|    policy_gradient_loss | -0.123     |
|    std                  | 0.151      |
|    value_loss           | 0.000983   |
----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 35            |
|    iterations           | 235           |
|    time_elapsed         | 1718          |
|

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 245        |
|    time_elapsed         | 1778       |
|    total_timesteps      | 62720      |
| train/                  |            |
|    approx_kl            | 0.10795805 |
|    clip_fraction        | 0.784      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29         |
|    explained_variance   | 0.99       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.13      |
|    n_updates            | 4880       |
|    policy_gradient_loss | -0.114     |
|    std                  | 0.152      |
|    value_loss           | 0.000462   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 246        |
|    time_elapsed         | 1784       |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 256       |
|    time_elapsed         | 1863      |
|    total_timesteps      | 65536     |
| train/                  |           |
|    approx_kl            | 0.2609028 |
|    clip_fraction        | 0.842     |
|    clip_range           | 0.2       |
|    entropy_loss         | 28.9      |
|    explained_variance   | 0.967     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.087    |
|    n_updates            | 5100      |
|    policy_gradient_loss | -0.112    |
|    std                  | 0.152     |
|    value_loss           | 0.000602  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 257        |
|    time_elapsed         | 1869       |
|    total_timesteps      | 65792  

------------------------------------------
| time/                   |              |
|    fps                  | 35           |
|    iterations           | 267          |
|    time_elapsed         | 1932         |
|    total_timesteps      | 68352        |
| train/                  |              |
|    approx_kl            | -0.043276075 |
|    clip_fraction        | 0.816        |
|    clip_range           | 0.2          |
|    entropy_loss         | 28.9         |
|    explained_variance   | 0.966        |
|    learning_rate        | 1e-05        |
|    loss                 | -0.112       |
|    n_updates            | 5320         |
|    policy_gradient_loss | -0.137       |
|    std                  | 0.152        |
|    value_loss           | 0.00294      |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 268         |
|    time_elaps

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 278        |
|    time_elapsed         | 2021       |
|    total_timesteps      | 71168      |
| train/                  |            |
|    approx_kl            | 0.13357541 |
|    clip_fraction        | 0.789      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.8       |
|    explained_variance   | 0.981      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.123     |
|    n_updates            | 5540       |
|    policy_gradient_loss | -0.117     |
|    std                  | 0.152      |
|    value_loss           | 0.000721   |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 279         |
|    time_elapsed         | 2027        |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 289        |
|    time_elapsed         | 2089       |
|    total_timesteps      | 73984      |
| train/                  |            |
|    approx_kl            | 0.20366013 |
|    clip_fraction        | 0.786      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.8       |
|    explained_variance   | 0.977      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0263     |
|    n_updates            | 5760       |
|    policy_gradient_loss | -0.116     |
|    std                  | 0.152      |
|    value_loss           | 0.000686   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 290        |
|    time_elapsed         | 2096       |
|    total_times

Eval num_timesteps=76800, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=76800, episode_reward=0.68 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=76800, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=76800, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=76800, episode_reward=0.68 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=76800, episode_reward=0.68 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=76800, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=76800, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=76800, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=76800, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=76800, episode_reward=0.68 +/- 0.00
Episode length: 5.00 +/- 0.00

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 309        |
|    time_elapsed         | 2257       |
|    total_timesteps      | 79104      |
| train/                  |            |
|    approx_kl            | 0.09657071 |
|    clip_fraction        | 0.799      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.6       |
|    explained_variance   | 0.975      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.131     |
|    n_updates            | 6160       |
|    policy_gradient_loss | -0.122     |
|    std                  | 0.152      |
|    value_loss           | 0.00258    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 310        |
|    time_elapsed         | 2264       |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 35           |
|    iterations           | 320          |
|    time_elapsed         | 2326         |
|    total_timesteps      | 81920        |
| train/                  |              |
|    approx_kl            | -0.064924285 |
|    clip_fraction        | 0.801        |
|    clip_range           | 0.2          |
|    entropy_loss         | 28.6         |
|    explained_variance   | 0.972        |
|    learning_rate        | 1e-05        |
|    loss                 | -0.108       |
|    n_updates            | 6380         |
|    policy_gradient_loss | -0.126       |
|    std                  | 0.153        |
|    value_loss           | 0.00127      |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 321        |
|    time_elapsed  

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 331        |
|    time_elapsed         | 2415       |
|    total_timesteps      | 84736      |
| train/                  |            |
|    approx_kl            | 0.20795497 |
|    clip_fraction        | 0.836      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.5       |
|    explained_variance   | 0.972      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.129     |
|    n_updates            | 6600       |
|    policy_gradient_loss | -0.12      |
|    std                  | 0.153      |
|    value_loss           | 0.00116    |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 332       |
|    time_elapsed         | 2421      |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 342        |
|    time_elapsed         | 2482       |
|    total_timesteps      | 87552      |
| train/                  |            |
|    approx_kl            | 0.34646508 |
|    clip_fraction        | 0.781      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.4       |
|    explained_variance   | 0.962      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.11      |
|    n_updates            | 6820       |
|    policy_gradient_loss | -0.119     |
|    std                  | 0.153      |
|    value_loss           | 0.000731   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 343        |
|    time_elapsed         | 2489       |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 353         |
|    time_elapsed         | 2566        |
|    total_timesteps      | 90368       |
| train/                  |             |
|    approx_kl            | 0.021805823 |
|    clip_fraction        | 0.807       |
|    clip_range           | 0.2         |
|    entropy_loss         | 28.3        |
|    explained_variance   | 0.949       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.115      |
|    n_updates            | 7040        |
|    policy_gradient_loss | -0.12       |
|    std                  | 0.153       |
|    value_loss           | 0.00106     |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 354         |
|    time_elapsed         | 2573  

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 364        |
|    time_elapsed         | 2634       |
|    total_timesteps      | 93184      |
| train/                  |            |
|    approx_kl            | 0.33013237 |
|    clip_fraction        | 0.765      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.2       |
|    explained_variance   | 0.984      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.1       |
|    n_updates            | 7260       |
|    policy_gradient_loss | -0.11      |
|    std                  | 0.153      |
|    value_loss           | 0.000596   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 365       |
|    time_elapsed         | 2640      |
|    total_timesteps 

Eval num_timesteps=96000, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=96000, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=96000, episode_reward=0.65 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=96000, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=96000, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=96000, episode_reward=0.67 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=96000, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=96000, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=96000, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=96000, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=96000, episode_reward

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 384        |
|    time_elapsed         | 2795       |
|    total_timesteps      | 98304      |
| train/                  |            |
|    approx_kl            | 0.05556583 |
|    clip_fraction        | 0.798      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.1       |
|    explained_variance   | 0.991      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.115     |
|    n_updates            | 7660       |
|    policy_gradient_loss | -0.114     |
|    std                  | 0.154      |
|    value_loss           | 0.000536   |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 385         |
|    time_elapsed         | 2802        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 35           |
|    iterations           | 395          |
|    time_elapsed         | 2865         |
|    total_timesteps      | 101120       |
| train/                  |              |
|    approx_kl            | -0.046591267 |
|    clip_fraction        | 0.784        |
|    clip_range           | 0.2          |
|    entropy_loss         | 28           |
|    explained_variance   | 0.987        |
|    learning_rate        | 1e-05        |
|    loss                 | -0.0273      |
|    n_updates            | 7880         |
|    policy_gradient_loss | -0.112       |
|    std                  | 0.154        |
|    value_loss           | 0.000564     |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 396        |
|    time_elapsed  

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 406        |
|    time_elapsed         | 2951       |
|    total_timesteps      | 103936     |
| train/                  |            |
|    approx_kl            | 0.27267003 |
|    clip_fraction        | 0.836      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.9       |
|    explained_variance   | 0.985      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0995    |
|    n_updates            | 8100       |
|    policy_gradient_loss | -0.118     |
|    std                  | 0.154      |
|    value_loss           | 0.00106    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 407        |
|    time_elapsed         | 2957       |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 417         |
|    time_elapsed         | 3017        |
|    total_timesteps      | 106752      |
| train/                  |             |
|    approx_kl            | -0.07460098 |
|    clip_fraction        | 0.822       |
|    clip_range           | 0.2         |
|    entropy_loss         | 27.8        |
|    explained_variance   | 0.966       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.141      |
|    n_updates            | 8320        |
|    policy_gradient_loss | -0.132      |
|    std                  | 0.154       |
|    value_loss           | 0.000778    |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 35            |
|    iterations           | 418           |
|    time_elapsed         

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 427        |
|    time_elapsed         | 3095       |
|    total_timesteps      | 109312     |
| train/                  |            |
|    approx_kl            | 0.36677623 |
|    clip_fraction        | 0.783      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.8       |
|    explained_variance   | 0.943      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.141     |
|    n_updates            | 8520       |
|    policy_gradient_loss | -0.116     |
|    std                  | 0.155      |
|    value_loss           | 0.000805   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 428        |
|    time_elapsed         | 3102       |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 438         |
|    time_elapsed         | 3168        |
|    total_timesteps      | 112128      |
| train/                  |             |
|    approx_kl            | 0.043800373 |
|    clip_fraction        | 0.812       |
|    clip_range           | 0.2         |
|    entropy_loss         | 27.7        |
|    explained_variance   | 0.984       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.122      |
|    n_updates            | 8740        |
|    policy_gradient_loss | -0.125      |
|    std                  | 0.155       |
|    value_loss           | 0.000976    |
-----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 439       |
|    time_elapsed         | 3175      |
| 

---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 449       |
|    time_elapsed         | 3239      |
|    total_timesteps      | 114944    |
| train/                  |           |
|    approx_kl            | 0.1549114 |
|    clip_fraction        | 0.81      |
|    clip_range           | 0.2       |
|    entropy_loss         | 27.5      |
|    explained_variance   | 0.983     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.121    |
|    n_updates            | 8960      |
|    policy_gradient_loss | -0.118    |
|    std                  | 0.155     |
|    value_loss           | 0.00095   |
---------------------------------------
Eval num_timesteps=115200, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=115200, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=11520

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 458         |
|    time_elapsed         | 3332        |
|    total_timesteps      | 117248      |
| train/                  |             |
|    approx_kl            | 0.111488625 |
|    clip_fraction        | 0.783       |
|    clip_range           | 0.2         |
|    entropy_loss         | 27.5        |
|    explained_variance   | 0.976       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.103      |
|    n_updates            | 9140        |
|    policy_gradient_loss | -0.118      |
|    std                  | 0.155       |
|    value_loss           | 0.000488    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 459        |
|    time_elapsed         | 3338      

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 469         |
|    time_elapsed         | 3401        |
|    total_timesteps      | 120064      |
| train/                  |             |
|    approx_kl            | 0.049990293 |
|    clip_fraction        | 0.804       |
|    clip_range           | 0.2         |
|    entropy_loss         | 27.4        |
|    explained_variance   | 0.987       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0863     |
|    n_updates            | 9360        |
|    policy_gradient_loss | -0.117      |
|    std                  | 0.156       |
|    value_loss           | 0.000636    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 470        |
|    time_elapsed         | 3407      

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 479        |
|    time_elapsed         | 3484       |
|    total_timesteps      | 122624     |
| train/                  |            |
|    approx_kl            | 0.09088046 |
|    clip_fraction        | 0.81       |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.3       |
|    explained_variance   | 0.987      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.101     |
|    n_updates            | 9560       |
|    policy_gradient_loss | -0.113     |
|    std                  | 0.156      |
|    value_loss           | 0.00127    |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 480         |
|    time_elapsed         | 3490        |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 490         |
|    time_elapsed         | 3552        |
|    total_timesteps      | 125440      |
| train/                  |             |
|    approx_kl            | 0.042199984 |
|    clip_fraction        | 0.76        |
|    clip_range           | 0.2         |
|    entropy_loss         | 27.2        |
|    explained_variance   | 0.971       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.11       |
|    n_updates            | 9780        |
|    policy_gradient_loss | -0.11       |
|    std                  | 0.156       |
|    value_loss           | 0.000557    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 491        |
|    time_elapsed         | 3558      

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 501         |
|    time_elapsed         | 3640        |
|    total_timesteps      | 128256      |
| train/                  |             |
|    approx_kl            | 0.013574973 |
|    clip_fraction        | 0.852       |
|    clip_range           | 0.2         |
|    entropy_loss         | 27.1        |
|    explained_variance   | 0.987       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0888     |
|    n_updates            | 10000       |
|    policy_gradient_loss | -0.119      |
|    std                  | 0.156       |
|    value_loss           | 0.000514    |
-----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 502       |
|    time_elapsed         | 3646      |
| 

---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 512       |
|    time_elapsed         | 3705      |
|    total_timesteps      | 131072    |
| train/                  |           |
|    approx_kl            | 0.3144818 |
|    clip_fraction        | 0.788     |
|    clip_range           | 0.2       |
|    entropy_loss         | 27        |
|    explained_variance   | 0.974     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.119    |
|    n_updates            | 10220     |
|    policy_gradient_loss | -0.119    |
|    std                  | 0.157     |
|    value_loss           | 0.00057   |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 513        |
|    time_elapsed         | 3711       |
|    total_timesteps      | 131328 

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 523        |
|    time_elapsed         | 3773       |
|    total_timesteps      | 133888     |
| train/                  |            |
|    approx_kl            | 0.16619343 |
|    clip_fraction        | 0.781      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.9       |
|    explained_variance   | 0.99       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0437    |
|    n_updates            | 10440      |
|    policy_gradient_loss | -0.116     |
|    std                  | 0.157      |
|    value_loss           | 0.00033    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 524        |
|    time_elapsed         | 3779       |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 532         |
|    time_elapsed         | 3873        |
|    total_timesteps      | 136192      |
| train/                  |             |
|    approx_kl            | 0.118719175 |
|    clip_fraction        | 0.786       |
|    clip_range           | 0.2         |
|    entropy_loss         | 26.8        |
|    explained_variance   | 0.986       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.146      |
|    n_updates            | 10620       |
|    policy_gradient_loss | -0.119      |
|    std                  | 0.157       |
|    value_loss           | 0.000546    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 533        |
|    time_elapsed         | 3879      

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 543        |
|    time_elapsed         | 3941       |
|    total_timesteps      | 139008     |
| train/                  |            |
|    approx_kl            | 0.14574952 |
|    clip_fraction        | 0.8        |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.7       |
|    explained_variance   | 0.993      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0703    |
|    n_updates            | 10840      |
|    policy_gradient_loss | -0.123     |
|    std                  | 0.157      |
|    value_loss           | 0.00031    |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 35           |
|    iterations           | 544          |
|    time_elapsed         | 3946         |
|    t

--------------------------------------
| time/                   |          |
|    fps                  | 35       |
|    iterations           | 554      |
|    time_elapsed         | 4027     |
|    total_timesteps      | 141824   |
| train/                  |          |
|    approx_kl            | 0.3344   |
|    clip_fraction        | 0.804    |
|    clip_range           | 0.2      |
|    entropy_loss         | 26.6     |
|    explained_variance   | 0.987    |
|    learning_rate        | 1e-05    |
|    loss                 | -0.105   |
|    n_updates            | 11060    |
|    policy_gradient_loss | -0.117   |
|    std                  | 0.158    |
|    value_loss           | 0.00054  |
--------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 555        |
|    time_elapsed         | 4034       |
|    total_timesteps      | 142080     |
| train/     

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 565        |
|    time_elapsed         | 4092       |
|    total_timesteps      | 144640     |
| train/                  |            |
|    approx_kl            | 0.15592648 |
|    clip_fraction        | 0.796      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.5       |
|    explained_variance   | 0.989      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.106     |
|    n_updates            | 11280      |
|    policy_gradient_loss | -0.119     |
|    std                  | 0.158      |
|    value_loss           | 0.000615   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 566        |
|    time_elapsed         | 4098       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 576        |
|    time_elapsed         | 4180       |
|    total_timesteps      | 147456     |
| train/                  |            |
|    approx_kl            | 0.05352927 |
|    clip_fraction        | 0.827      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.5       |
|    explained_variance   | 0.984      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.114     |
|    n_updates            | 11500      |
|    policy_gradient_loss | -0.108     |
|    std                  | 0.158      |
|    value_loss           | 0.000492   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 577        |
|    time_elapsed         | 4187       |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 587         |
|    time_elapsed         | 4249        |
|    total_timesteps      | 150272      |
| train/                  |             |
|    approx_kl            | -0.09547509 |
|    clip_fraction        | 0.838       |
|    clip_range           | 0.2         |
|    entropy_loss         | 26.4        |
|    explained_variance   | 0.978       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0855     |
|    n_updates            | 11720       |
|    policy_gradient_loss | -0.132      |
|    std                  | 0.158       |
|    value_loss           | 0.000541    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 588        |
|    time_elapsed         | 4254      

---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 598       |
|    time_elapsed         | 4320      |
|    total_timesteps      | 153088    |
| train/                  |           |
|    approx_kl            | 0.1274455 |
|    clip_fraction        | 0.788     |
|    clip_range           | 0.2       |
|    entropy_loss         | 26.3      |
|    explained_variance   | 0.987     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.101    |
|    n_updates            | 11940     |
|    policy_gradient_loss | -0.118    |
|    std                  | 0.158     |
|    value_loss           | 0.000484  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 599        |
|    time_elapsed         | 4326       |
|    total_timesteps      | 153344 

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 607        |
|    time_elapsed         | 4414       |
|    total_timesteps      | 155392     |
| train/                  |            |
|    approx_kl            | 0.10109797 |
|    clip_fraction        | 0.81       |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.2       |
|    explained_variance   | 0.978      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0364    |
|    n_updates            | 12120      |
|    policy_gradient_loss | -0.118     |
|    std                  | 0.159      |
|    value_loss           | 0.000564   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 608        |
|    time_elapsed         | 4420       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 618        |
|    time_elapsed         | 4481       |
|    total_timesteps      | 158208     |
| train/                  |            |
|    approx_kl            | 0.22588265 |
|    clip_fraction        | 0.792      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.1       |
|    explained_variance   | 0.969      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.099     |
|    n_updates            | 12340      |
|    policy_gradient_loss | -0.112     |
|    std                  | 0.159      |
|    value_loss           | 0.000651   |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 619         |
|    time_elapsed         | 4487        |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 629         |
|    time_elapsed         | 4568        |
|    total_timesteps      | 161024      |
| train/                  |             |
|    approx_kl            | 0.022379596 |
|    clip_fraction        | 0.783       |
|    clip_range           | 0.2         |
|    entropy_loss         | 26          |
|    explained_variance   | 0.992       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.131      |
|    n_updates            | 12560       |
|    policy_gradient_loss | -0.112      |
|    std                  | 0.159       |
|    value_loss           | 0.000449    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 630        |
|    time_elapsed         | 4575      

---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 640       |
|    time_elapsed         | 4637      |
|    total_timesteps      | 163840    |
| train/                  |           |
|    approx_kl            | 0.1264329 |
|    clip_fraction        | 0.795     |
|    clip_range           | 0.2       |
|    entropy_loss         | 25.9      |
|    explained_variance   | 0.976     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.0523   |
|    n_updates            | 12780     |
|    policy_gradient_loss | -0.111    |
|    std                  | 0.159     |
|    value_loss           | 0.000531  |
---------------------------------------
--------------------------------------
| time/                   |          |
|    fps                  | 35       |
|    iterations           | 641      |
|    time_elapsed         | 4643     |
|    total_timesteps      | 164096   |
| trai

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 651        |
|    time_elapsed         | 4724       |
|    total_timesteps      | 166656     |
| train/                  |            |
|    approx_kl            | 0.34280813 |
|    clip_fraction        | 0.85       |
|    clip_range           | 0.2        |
|    entropy_loss         | 25.8       |
|    explained_variance   | 0.969      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.113     |
|    n_updates            | 13000      |
|    policy_gradient_loss | -0.114     |
|    std                  | 0.16       |
|    value_loss           | 0.000333   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 652        |
|    time_elapsed         | 4730       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 662        |
|    time_elapsed         | 4789       |
|    total_timesteps      | 169472     |
| train/                  |            |
|    approx_kl            | 0.20404884 |
|    clip_fraction        | 0.78       |
|    clip_range           | 0.2        |
|    entropy_loss         | 25.7       |
|    explained_variance   | 0.984      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.134     |
|    n_updates            | 13220      |
|    policy_gradient_loss | -0.113     |
|    std                  | 0.16       |
|    value_loss           | 0.000312   |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 663         |
|    time_elapsed         | 4796        |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 673        |
|    time_elapsed         | 4857       |
|    total_timesteps      | 172288     |
| train/                  |            |
|    approx_kl            | 0.18191569 |
|    clip_fraction        | 0.787      |
|    clip_range           | 0.2        |
|    entropy_loss         | 25.6       |
|    explained_variance   | 0.993      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0921    |
|    n_updates            | 13440      |
|    policy_gradient_loss | -0.116     |
|    std                  | 0.16       |
|    value_loss           | 0.000306   |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 674         |
|    time_elapsed         | 4864        |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 35            |
|    iterations           | 682           |
|    time_elapsed         | 4955          |
|    total_timesteps      | 174592        |
| train/                  |               |
|    approx_kl            | -0.0014446825 |
|    clip_fraction        | 0.805         |
|    clip_range           | 0.2           |
|    entropy_loss         | 25.5          |
|    explained_variance   | 0.981         |
|    learning_rate        | 1e-05         |
|    loss                 | -0.13         |
|    n_updates            | 13620         |
|    policy_gradient_loss | -0.123        |
|    std                  | 0.16          |
|    value_loss           | 0.00033       |
-------------------------------------------
--------------------------------------
| time/                   |          |
|    fps                  | 35       |
|    iterations           | 683      |
|    tim

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 693        |
|    time_elapsed         | 5021       |
|    total_timesteps      | 177408     |
| train/                  |            |
|    approx_kl            | 0.15493141 |
|    clip_fraction        | 0.785      |
|    clip_range           | 0.2        |
|    entropy_loss         | 25.4       |
|    explained_variance   | 0.992      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.11      |
|    n_updates            | 13840      |
|    policy_gradient_loss | -0.12      |
|    std                  | 0.161      |
|    value_loss           | 0.000344   |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 694         |
|    time_elapsed         | 5027        |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 703        |
|    time_elapsed         | 5102       |
|    total_timesteps      | 179968     |
| train/                  |            |
|    approx_kl            | 0.06890459 |
|    clip_fraction        | 0.788      |
|    clip_range           | 0.2        |
|    entropy_loss         | 25.3       |
|    explained_variance   | 0.992      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.145     |
|    n_updates            | 14040      |
|    policy_gradient_loss | -0.12      |
|    std                  | 0.161      |
|    value_loss           | 0.00033    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 704        |
|    time_elapsed         | 5108       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 714        |
|    time_elapsed         | 5172       |
|    total_timesteps      | 182784     |
| train/                  |            |
|    approx_kl            | 0.14435163 |
|    clip_fraction        | 0.814      |
|    clip_range           | 0.2        |
|    entropy_loss         | 25.2       |
|    explained_variance   | 0.99       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.071     |
|    n_updates            | 14260      |
|    policy_gradient_loss | -0.114     |
|    std                  | 0.161      |
|    value_loss           | 0.00076    |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 35           |
|    iterations           | 715          |
|    time_elapsed         | 5178         |
|    t

Eval num_timesteps=185600, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=185600, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.689        |
| time/                   |              |
|    fps                  | 35           |
|    iterations           | 725          |
|    time_elapsed         | 5257         |
|    total_timesteps      | 185600       |
| train/                  |              |
|    approx_kl            | -0.036305755 |
|    clip_fraction        | 0.78         |
|    clip_range           | 0.2          |
|    entropy_loss         | 25.1         |
|    explained_variance   | 0.984        |
|    learning_rate        | 1e-05        |
|    loss                 | -0.118       |
|    n_updates            | 14480        |
|    policy_gradient_loss | -0.106       |
|    std   

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 735        |
|    time_elapsed         | 5320       |
|    total_timesteps      | 188160     |
| train/                  |            |
|    approx_kl            | 0.29451865 |
|    clip_fraction        | 0.818      |
|    clip_range           | 0.2        |
|    entropy_loss         | 25         |
|    explained_variance   | 0.964      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0996    |
|    n_updates            | 14680      |
|    policy_gradient_loss | -0.121     |
|    std                  | 0.162      |
|    value_loss           | 0.000905   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 736       |
|    time_elapsed         | 5326      |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 746        |
|    time_elapsed         | 5387       |
|    total_timesteps      | 190976     |
| train/                  |            |
|    approx_kl            | 0.34587988 |
|    clip_fraction        | 0.836      |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.9       |
|    explained_variance   | 0.989      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0716    |
|    n_updates            | 14900      |
|    policy_gradient_loss | -0.122     |
|    std                  | 0.162      |
|    value_loss           | 0.000257   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 747        |
|    time_elapsed         | 5393       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 755        |
|    time_elapsed         | 5479       |
|    total_timesteps      | 193280     |
| train/                  |            |
|    approx_kl            | 0.14445825 |
|    clip_fraction        | 0.795      |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.8       |
|    explained_variance   | 0.989      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.107     |
|    n_updates            | 15080      |
|    policy_gradient_loss | -0.106     |
|    std                  | 0.162      |
|    value_loss           | 0.00041    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 756        |
|    time_elapsed         | 5485       |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 766       |
|    time_elapsed         | 5545      |
|    total_timesteps      | 196096    |
| train/                  |           |
|    approx_kl            | 0.2529084 |
|    clip_fraction        | 0.844     |
|    clip_range           | 0.2       |
|    entropy_loss         | 24.7      |
|    explained_variance   | 0.985     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.0889   |
|    n_updates            | 15300     |
|    policy_gradient_loss | -0.114    |
|    std                  | 0.162     |
|    value_loss           | 0.000594  |
---------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 767       |
|    time_elapsed         | 5551      |
|    total_timesteps      | 196352    |


----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 777        |
|    time_elapsed         | 5631       |
|    total_timesteps      | 198912     |
| train/                  |            |
|    approx_kl            | 0.25837678 |
|    clip_fraction        | 0.782      |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.6       |
|    explained_variance   | 0.966      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0727    |
|    n_updates            | 15520      |
|    policy_gradient_loss | -0.113     |
|    std                  | 0.163      |
|    value_loss           | 0.000421   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 778        |
|    time_elapsed         | 5637       |
|    total_times

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

seed 5
seed 5: model definition ..
Using cuda device
seed 5: learning ..




----------------------------------------
| time/                   |            |
|    fps                  | 106        |
|    iterations           | 1          |
|    time_elapsed         | 2          |
|    total_timesteps      | 256        |
| train/                  |            |
|    approx_kl            | 0.39190567 |
|    clip_fraction        | 0.837      |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.6       |
|    explained_variance   | 0.961      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.1       |
|    n_updates            | 15640      |
|    policy_gradient_loss | -0.125     |
|    std                  | 0.163      |
|    value_loss           | 0.00058    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 59         |
|    iterations           | 2          |
|    time_elapsed         | 8          |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 44          |
|    iterations           | 12          |
|    time_elapsed         | 68          |
|    total_timesteps      | 3072        |
| train/                  |             |
|    approx_kl            | 0.022736024 |
|    clip_fraction        | 0.595       |
|    clip_range           | 0.2         |
|    entropy_loss         | 29.8        |
|    explained_variance   | 0.477       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0945     |
|    n_updates            | 220         |
|    policy_gradient_loss | -0.0904     |
|    std                  | 0.15        |
|    value_loss           | 0.00956     |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 44           |
|    iterations           | 13           |
|    time_elapsed         | 74

-----------------------------------------
| time/                   |             |
|    fps                  | 43          |
|    iterations           | 23          |
|    time_elapsed         | 135         |
|    total_timesteps      | 5888        |
| train/                  |             |
|    approx_kl            | 0.028257377 |
|    clip_fraction        | 0.666       |
|    clip_range           | 0.2         |
|    entropy_loss         | 29.8        |
|    explained_variance   | 0.966       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.121      |
|    n_updates            | 440         |
|    policy_gradient_loss | -0.112      |
|    std                  | 0.15        |
|    value_loss           | 0.00159     |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 43          |
|    iterations           | 24          |
|    time_elapsed         | 141   

---------------------------------------
| time/                   |           |
|    fps                  | 38        |
|    iterations           | 34        |
|    time_elapsed         | 225       |
|    total_timesteps      | 8704      |
| train/                  |           |
|    approx_kl            | 0.2275356 |
|    clip_fraction        | 0.714     |
|    clip_range           | 0.2       |
|    entropy_loss         | 29.8      |
|    explained_variance   | 0.891     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.12     |
|    n_updates            | 660       |
|    policy_gradient_loss | -0.118    |
|    std                  | 0.15      |
|    value_loss           | 0.00818   |
---------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 38          |
|    iterations           | 35          |
|    time_elapsed         | 231         |
|    total_timesteps      | 89

----------------------------------------
| time/                   |            |
|    fps                  | 39         |
|    iterations           | 45         |
|    time_elapsed         | 292        |
|    total_timesteps      | 11520      |
| train/                  |            |
|    approx_kl            | 0.41326272 |
|    clip_fraction        | 0.763      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.8       |
|    explained_variance   | 0.928      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.121     |
|    n_updates            | 880        |
|    policy_gradient_loss | -0.116     |
|    std                  | 0.15       |
|    value_loss           | 0.00115    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 39         |
|    iterations           | 46         |
|    time_elapsed         | 297        |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 38         |
|    iterations           | 55         |
|    time_elapsed         | 370        |
|    total_timesteps      | 14080      |
| train/                  |            |
|    approx_kl            | 0.05713274 |
|    clip_fraction        | 0.702      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.7       |
|    explained_variance   | 0.937      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0959    |
|    n_updates            | 1080       |
|    policy_gradient_loss | -0.104     |
|    std                  | 0.15       |
|    value_loss           | 0.00148    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 38         |
|    iterations           | 56         |
|    time_elapsed         | 376        |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 38         |
|    iterations           | 66         |
|    time_elapsed         | 437        |
|    total_timesteps      | 16896      |
| train/                  |            |
|    approx_kl            | 0.11474283 |
|    clip_fraction        | 0.797      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.7       |
|    explained_variance   | 0.975      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0735    |
|    n_updates            | 1300       |
|    policy_gradient_loss | -0.11      |
|    std                  | 0.15       |
|    value_loss           | 0.00336    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 38         |
|    iterations           | 67         |
|    time_elapsed         | 443        |
|    total_times

Eval num_timesteps=19200, episode_reward=0.65 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=19200, episode_reward=0.68 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=19200, episode_reward=0.60 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=19200, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=19200, episode_reward=0.63 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=19200, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.656       |
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 75          |
|    time_elapsed         | 534         |
|    total_timesteps  

---------------------------------------
| time/                   |           |
|    fps                  | 36        |
|    iterations           | 85        |
|    time_elapsed         | 596       |
|    total_timesteps      | 21760     |
| train/                  |           |
|    approx_kl            | 0.2620875 |
|    clip_fraction        | 0.737     |
|    clip_range           | 0.2       |
|    entropy_loss         | 29.7      |
|    explained_variance   | 0.976     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.11     |
|    n_updates            | 1680      |
|    policy_gradient_loss | -0.11     |
|    std                  | 0.15      |
|    value_loss           | 0.000785  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 36         |
|    iterations           | 86         |
|    time_elapsed         | 602        |
|    total_timesteps      | 22016  

----------------------------------------
| time/                   |            |
|    fps                  | 37         |
|    iterations           | 96         |
|    time_elapsed         | 661        |
|    total_timesteps      | 24576      |
| train/                  |            |
|    approx_kl            | 0.27938068 |
|    clip_fraction        | 0.819      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.6       |
|    explained_variance   | 0.95       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0639    |
|    n_updates            | 1900       |
|    policy_gradient_loss | -0.115     |
|    std                  | 0.15       |
|    value_loss           | 0.00206    |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 37          |
|    iterations           | 97          |
|    time_elapsed         | 667         |
|    total_

---------------------------------------
| time/                   |           |
|    fps                  | 36        |
|    iterations           | 106       |
|    time_elapsed         | 743       |
|    total_timesteps      | 27136     |
| train/                  |           |
|    approx_kl            | 0.3310215 |
|    clip_fraction        | 0.809     |
|    clip_range           | 0.2       |
|    entropy_loss         | 29.6      |
|    explained_variance   | 0.953     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.0465   |
|    n_updates            | 2100      |
|    policy_gradient_loss | -0.104    |
|    std                  | 0.15      |
|    value_loss           | 0.00206   |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 36         |
|    iterations           | 107        |
|    time_elapsed         | 750        |
|    total_timesteps      | 27392  

----------------------------------------
| time/                   |            |
|    fps                  | 36         |
|    iterations           | 117        |
|    time_elapsed         | 810        |
|    total_timesteps      | 29952      |
| train/                  |            |
|    approx_kl            | 0.08621453 |
|    clip_fraction        | 0.753      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.5       |
|    explained_variance   | 0.973      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.119     |
|    n_updates            | 2320       |
|    policy_gradient_loss | -0.112     |
|    std                  | 0.15       |
|    value_loss           | 0.00127    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 37         |
|    iterations           | 118        |
|    time_elapsed         | 816        |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 36         |
|    iterations           | 128        |
|    time_elapsed         | 897        |
|    total_timesteps      | 32768      |
| train/                  |            |
|    approx_kl            | 0.14088902 |
|    clip_fraction        | 0.756      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.5       |
|    explained_variance   | 0.983      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0774    |
|    n_updates            | 2540       |
|    policy_gradient_loss | -0.118     |
|    std                  | 0.15       |
|    value_loss           | 0.00069    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 36         |
|    iterations           | 129        |
|    time_elapsed         | 903        |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 36         |
|    iterations           | 139        |
|    time_elapsed         | 964        |
|    total_timesteps      | 35584      |
| train/                  |            |
|    approx_kl            | 0.08378169 |
|    clip_fraction        | 0.733      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.5       |
|    explained_variance   | 0.968      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.00838   |
|    n_updates            | 2760       |
|    policy_gradient_loss | -0.117     |
|    std                  | 0.15       |
|    value_loss           | 0.00142    |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 36          |
|    iterations           | 140         |
|    time_elapsed         | 970         |
|    total_

Eval num_timesteps=38400, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.67 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=38400, episode_reward=0.67 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=38400, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.67 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.68 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.68 +/- 0.00
Episode length: 5.00 +/- 0

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 158        |
|    time_elapsed         | 1136       |
|    total_timesteps      | 40448      |
| train/                  |            |
|    approx_kl            | 0.13873515 |
|    clip_fraction        | 0.78       |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.4       |
|    explained_variance   | 0.989      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.117     |
|    n_updates            | 3140       |
|    policy_gradient_loss | -0.117     |
|    std                  | 0.151      |
|    value_loss           | 0.000383   |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 159         |
|    time_elapsed         | 1142        |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 169         |
|    time_elapsed         | 1205        |
|    total_timesteps      | 43264       |
| train/                  |             |
|    approx_kl            | 0.045049135 |
|    clip_fraction        | 0.735       |
|    clip_range           | 0.2         |
|    entropy_loss         | 29.3        |
|    explained_variance   | 0.971       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0631     |
|    n_updates            | 3360        |
|    policy_gradient_loss | -0.109      |
|    std                  | 0.151       |
|    value_loss           | 0.00155     |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 170         |
|    time_elapsed         | 1211  

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 180        |
|    time_elapsed         | 1290       |
|    total_timesteps      | 46080      |
| train/                  |            |
|    approx_kl            | 0.11597881 |
|    clip_fraction        | 0.774      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.3       |
|    explained_variance   | 0.986      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0857    |
|    n_updates            | 3580       |
|    policy_gradient_loss | -0.105     |
|    std                  | 0.151      |
|    value_loss           | 0.000726   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 181       |
|    time_elapsed         | 1297      |
|    total_timesteps 

--------------------------------------
| time/                   |          |
|    fps                  | 35       |
|    iterations           | 191      |
|    time_elapsed         | 1359     |
|    total_timesteps      | 48896    |
| train/                  |          |
|    approx_kl            | 0.285002 |
|    clip_fraction        | 0.822    |
|    clip_range           | 0.2      |
|    entropy_loss         | 29.2     |
|    explained_variance   | 0.973    |
|    learning_rate        | 1e-05    |
|    loss                 | -0.107   |
|    n_updates            | 3800     |
|    policy_gradient_loss | -0.115   |
|    std                  | 0.151    |
|    value_loss           | 0.000891 |
--------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 192        |
|    time_elapsed         | 1366       |
|    total_timesteps      | 49152      |
| train/     

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 202        |
|    time_elapsed         | 1446       |
|    total_timesteps      | 51712      |
| train/                  |            |
|    approx_kl            | 0.45119727 |
|    clip_fraction        | 0.798      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.2       |
|    explained_variance   | 0.822      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.115     |
|    n_updates            | 4020       |
|    policy_gradient_loss | -0.121     |
|    std                  | 0.151      |
|    value_loss           | 0.00137    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 203        |
|    time_elapsed         | 1452       |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 36        |
|    iterations           | 213       |
|    time_elapsed         | 1512      |
|    total_timesteps      | 54528     |
| train/                  |           |
|    approx_kl            | 0.3257838 |
|    clip_fraction        | 0.758     |
|    clip_range           | 0.2       |
|    entropy_loss         | 29.1      |
|    explained_variance   | 0.791     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.0991   |
|    n_updates            | 4240      |
|    policy_gradient_loss | -0.119    |
|    std                  | 0.151     |
|    value_loss           | 0.00221   |
---------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 36        |
|    iterations           | 214       |
|    time_elapsed         | 1518      |
|    total_timesteps      | 54784     |


----------------------------------------
| time/                   |            |
|    fps                  | 36         |
|    iterations           | 224        |
|    time_elapsed         | 1578       |
|    total_timesteps      | 57344      |
| train/                  |            |
|    approx_kl            | 0.10947141 |
|    clip_fraction        | 0.79       |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.1       |
|    explained_variance   | 0.987      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.116     |
|    n_updates            | 4460       |
|    policy_gradient_loss | -0.113     |
|    std                  | 0.151      |
|    value_loss           | 0.000723   |
----------------------------------------
Eval num_timesteps=57600, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=57600, episode_reward=0.68 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval nu

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 233        |
|    time_elapsed         | 1684       |
|    total_timesteps      | 59648      |
| train/                  |            |
|    approx_kl            | 0.10882126 |
|    clip_fraction        | 0.796      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29         |
|    explained_variance   | 0.982      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0115    |
|    n_updates            | 4640       |
|    policy_gradient_loss | -0.115     |
|    std                  | 0.152      |
|    value_loss           | 0.000634   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 234        |
|    time_elapsed         | 1690       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 244        |
|    time_elapsed         | 1751       |
|    total_timesteps      | 62464      |
| train/                  |            |
|    approx_kl            | 0.09916749 |
|    clip_fraction        | 0.791      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.9       |
|    explained_variance   | 0.986      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0635    |
|    n_updates            | 4860       |
|    policy_gradient_loss | -0.12      |
|    std                  | 0.152      |
|    value_loss           | 0.00111    |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 245       |
|    time_elapsed         | 1757      |
|    total_timesteps 

---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 255       |
|    time_elapsed         | 1840      |
|    total_timesteps      | 65280     |
| train/                  |           |
|    approx_kl            | 0.2256909 |
|    clip_fraction        | 0.782     |
|    clip_range           | 0.2       |
|    entropy_loss         | 28.8      |
|    explained_variance   | 0.979     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.115    |
|    n_updates            | 5080      |
|    policy_gradient_loss | -0.103    |
|    std                  | 0.152     |
|    value_loss           | 0.000506  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 256        |
|    time_elapsed         | 1847       |
|    total_timesteps      | 65536  

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 266         |
|    time_elapsed         | 1910        |
|    total_timesteps      | 68096       |
| train/                  |             |
|    approx_kl            | 0.122331426 |
|    clip_fraction        | 0.846       |
|    clip_range           | 0.2         |
|    entropy_loss         | 28.8        |
|    explained_variance   | 0.988       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0988     |
|    n_updates            | 5300        |
|    policy_gradient_loss | -0.121      |
|    std                  | 0.152       |
|    value_loss           | 0.00104     |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 267         |
|    time_elapsed         | 1915  

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 277        |
|    time_elapsed         | 1999       |
|    total_timesteps      | 70912      |
| train/                  |            |
|    approx_kl            | 0.06707946 |
|    clip_fraction        | 0.806      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.6       |
|    explained_variance   | 0.969      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0362    |
|    n_updates            | 5520       |
|    policy_gradient_loss | -0.122     |
|    std                  | 0.152      |
|    value_loss           | 0.00137    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 278        |
|    time_elapsed         | 2005       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 288        |
|    time_elapsed         | 2065       |
|    total_timesteps      | 73728      |
| train/                  |            |
|    approx_kl            | 0.14941832 |
|    clip_fraction        | 0.78       |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.6       |
|    explained_variance   | 0.982      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.108     |
|    n_updates            | 5740       |
|    policy_gradient_loss | -0.116     |
|    std                  | 0.153      |
|    value_loss           | 0.000637   |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 289         |
|    time_elapsed         | 2071        |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 299        |
|    time_elapsed         | 2135       |
|    total_timesteps      | 76544      |
| train/                  |            |
|    approx_kl            | 0.20737857 |
|    clip_fraction        | 0.813      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.5       |
|    explained_variance   | 0.982      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.131     |
|    n_updates            | 5960       |
|    policy_gradient_loss | -0.121     |
|    std                  | 0.153      |
|    value_loss           | 0.000711   |
----------------------------------------
Eval num_timesteps=76800, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=76800, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval nu

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 307         |
|    time_elapsed         | 2238        |
|    total_timesteps      | 78592       |
| train/                  |             |
|    approx_kl            | -0.21385662 |
|    clip_fraction        | 0.802       |
|    clip_range           | 0.2         |
|    entropy_loss         | 28.5        |
|    explained_variance   | 0.965       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.139      |
|    n_updates            | 6120        |
|    policy_gradient_loss | -0.119      |
|    std                  | 0.153       |
|    value_loss           | 0.000535    |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 35           |
|    iterations           | 308          |
|    time_elapsed         | 22

---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 318       |
|    time_elapsed         | 2302      |
|    total_timesteps      | 81408     |
| train/                  |           |
|    approx_kl            | 0.1515973 |
|    clip_fraction        | 0.787     |
|    clip_range           | 0.2       |
|    entropy_loss         | 28.4      |
|    explained_variance   | 0.984     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.12     |
|    n_updates            | 6340      |
|    policy_gradient_loss | -0.121    |
|    std                  | 0.153     |
|    value_loss           | 0.000402  |
---------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 319         |
|    time_elapsed         | 2309        |
|    total_timesteps      | 81

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 328        |
|    time_elapsed         | 2385       |
|    total_timesteps      | 83968      |
| train/                  |            |
|    approx_kl            | 0.09125418 |
|    clip_fraction        | 0.774      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.3       |
|    explained_variance   | 0.992      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.131     |
|    n_updates            | 6540       |
|    policy_gradient_loss | -0.115     |
|    std                  | 0.153      |
|    value_loss           | 0.000422   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 329        |
|    time_elapsed         | 2391       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 339        |
|    time_elapsed         | 2451       |
|    total_timesteps      | 86784      |
| train/                  |            |
|    approx_kl            | 0.21688978 |
|    clip_fraction        | 0.793      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.2       |
|    explained_variance   | 0.977      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0752    |
|    n_updates            | 6760       |
|    policy_gradient_loss | -0.12      |
|    std                  | 0.154      |
|    value_loss           | 0.00116    |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 340         |
|    time_elapsed         | 2457        |
|    total_

Eval num_timesteps=89600, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=89600, episode_reward=0.68 +/- 0.00
Episode length: 5.00 +/- 0.00
----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 5          |
|    mean_reward          | 0.685      |
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 350        |
|    time_elapsed         | 2541       |
|    total_timesteps      | 89600      |
| train/                  |            |
|    approx_kl            | -0.0436559 |
|    clip_fraction        | 0.8        |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.1       |
|    explained_variance   | 0.985      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.126     |
|    n_updates            | 6980       |
|    policy_gradient_loss | -0.124     |
|    std                  | 0.154      |
|    value

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 361        |
|    time_elapsed         | 2607       |
|    total_timesteps      | 92416      |
| train/                  |            |
|    approx_kl            | 0.19515085 |
|    clip_fraction        | 0.828      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28         |
|    explained_variance   | 0.982      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0766    |
|    n_updates            | 7200       |
|    policy_gradient_loss | -0.11      |
|    std                  | 0.154      |
|    value_loss           | 0.000591   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 362        |
|    time_elapsed         | 2613       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 372        |
|    time_elapsed         | 2674       |
|    total_timesteps      | 95232      |
| train/                  |            |
|    approx_kl            | 0.11055969 |
|    clip_fraction        | 0.803      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.9       |
|    explained_variance   | 0.977      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.111     |
|    n_updates            | 7420       |
|    policy_gradient_loss | -0.121     |
|    std                  | 0.154      |
|    value_loss           | 0.000667   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 373        |
|    time_elapsed         | 2680       |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 381       |
|    time_elapsed         | 2778      |
|    total_timesteps      | 97536     |
| train/                  |           |
|    approx_kl            | 0.2269158 |
|    clip_fraction        | 0.828     |
|    clip_range           | 0.2       |
|    entropy_loss         | 27.8      |
|    explained_variance   | 0.989     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.067    |
|    n_updates            | 7600      |
|    policy_gradient_loss | -0.117    |
|    std                  | 0.154     |
|    value_loss           | 0.000587  |
---------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 382       |
|    time_elapsed         | 2784      |
|    total_timesteps      | 97792     |


---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 392       |
|    time_elapsed         | 2844      |
|    total_timesteps      | 100352    |
| train/                  |           |
|    approx_kl            | 0.3154637 |
|    clip_fraction        | 0.795     |
|    clip_range           | 0.2       |
|    entropy_loss         | 27.8      |
|    explained_variance   | 0.973     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.106    |
|    n_updates            | 7820      |
|    policy_gradient_loss | -0.12     |
|    std                  | 0.155     |
|    value_loss           | 0.000455  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 393        |
|    time_elapsed         | 2849       |
|    total_timesteps      | 100608 

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 403        |
|    time_elapsed         | 2929       |
|    total_timesteps      | 103168     |
| train/                  |            |
|    approx_kl            | 0.19198194 |
|    clip_fraction        | 0.78       |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.7       |
|    explained_variance   | 0.984      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.109     |
|    n_updates            | 8040       |
|    policy_gradient_loss | -0.112     |
|    std                  | 0.155      |
|    value_loss           | 0.000521   |
----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 35            |
|    iterations           | 404           |
|    time_elapsed         | 2935          |
|

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 414        |
|    time_elapsed         | 3000       |
|    total_timesteps      | 105984     |
| train/                  |            |
|    approx_kl            | 0.24517259 |
|    clip_fraction        | 0.775      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.6       |
|    explained_variance   | 0.989      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.141     |
|    n_updates            | 8260       |
|    policy_gradient_loss | -0.113     |
|    std                  | 0.155      |
|    value_loss           | 0.000659   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 415       |
|    time_elapsed         | 3006      |
|    total_timesteps 

Eval num_timesteps=108800, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=108800, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 5          |
|    mean_reward          | 0.691      |
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 425        |
|    time_elapsed         | 3084       |
|    total_timesteps      | 108800     |
| train/                  |            |
|    approx_kl            | 0.17951009 |
|    clip_fraction        | 0.795      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.5       |
|    explained_variance   | 0.988      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0832    |
|    n_updates            | 8480       |
|    policy_gradient_loss | -0.117     |
|    std                  |

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 436        |
|    time_elapsed         | 3148       |
|    total_timesteps      | 111616     |
| train/                  |            |
|    approx_kl            | 0.40599138 |
|    clip_fraction        | 0.849      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.4       |
|    explained_variance   | 0.937      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.116     |
|    n_updates            | 8700       |
|    policy_gradient_loss | -0.121     |
|    std                  | 0.156      |
|    value_loss           | 0.000814   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 437        |
|    time_elapsed         | 3154       |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 447         |
|    time_elapsed         | 3216        |
|    total_timesteps      | 114432      |
| train/                  |             |
|    approx_kl            | 0.077517554 |
|    clip_fraction        | 0.803       |
|    clip_range           | 0.2         |
|    entropy_loss         | 27.3        |
|    explained_variance   | 0.988       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0989     |
|    n_updates            | 8920        |
|    policy_gradient_loss | -0.12       |
|    std                  | 0.156       |
|    value_loss           | 0.000544    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 448        |
|    time_elapsed         | 3223      

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 456        |
|    time_elapsed         | 3323       |
|    total_timesteps      | 116736     |
| train/                  |            |
|    approx_kl            | 0.27359477 |
|    clip_fraction        | 0.84       |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.2       |
|    explained_variance   | 0.978      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.119     |
|    n_updates            | 9100       |
|    policy_gradient_loss | -0.11      |
|    std                  | 0.156      |
|    value_loss           | 0.000801   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 457        |
|    time_elapsed         | 3330       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 467        |
|    time_elapsed         | 3394       |
|    total_timesteps      | 119552     |
| train/                  |            |
|    approx_kl            | 0.16482382 |
|    clip_fraction        | 0.79       |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.2       |
|    explained_variance   | 0.968      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.107     |
|    n_updates            | 9320       |
|    policy_gradient_loss | -0.114     |
|    std                  | 0.156      |
|    value_loss           | 0.000437   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 468        |
|    time_elapsed         | 3400       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 477        |
|    time_elapsed         | 3474       |
|    total_timesteps      | 122112     |
| train/                  |            |
|    approx_kl            | 0.12657866 |
|    clip_fraction        | 0.784      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.1       |
|    explained_variance   | 0.972      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.118     |
|    n_updates            | 9520       |
|    policy_gradient_loss | -0.116     |
|    std                  | 0.156      |
|    value_loss           | 0.00058    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 478        |
|    time_elapsed         | 3480       |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 488         |
|    time_elapsed         | 3540        |
|    total_timesteps      | 124928      |
| train/                  |             |
|    approx_kl            | 0.095550954 |
|    clip_fraction        | 0.792       |
|    clip_range           | 0.2         |
|    entropy_loss         | 27          |
|    explained_variance   | 0.988       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0859     |
|    n_updates            | 9740        |
|    policy_gradient_loss | -0.114      |
|    std                  | 0.156       |
|    value_loss           | 0.000433    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 489        |
|    time_elapsed         | 3546      

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 499         |
|    time_elapsed         | 3611        |
|    total_timesteps      | 127744      |
| train/                  |             |
|    approx_kl            | 0.008832468 |
|    clip_fraction        | 0.788       |
|    clip_range           | 0.2         |
|    entropy_loss         | 27          |
|    explained_variance   | 0.989       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0806     |
|    n_updates            | 9960        |
|    policy_gradient_loss | -0.108      |
|    std                  | 0.157       |
|    value_loss           | 0.000652    |
-----------------------------------------
Eval num_timesteps=128000, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=128000, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +/- 0.00
------------------------------

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 510        |
|    time_elapsed         | 3695       |
|    total_timesteps      | 130560     |
| train/                  |            |
|    approx_kl            | 0.15644723 |
|    clip_fraction        | 0.795      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.8       |
|    explained_variance   | 0.987      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0872    |
|    n_updates            | 10180      |
|    policy_gradient_loss | -0.112     |
|    std                  | 0.157      |
|    value_loss           | 0.000555   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 511        |
|    time_elapsed         | 3701       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 521        |
|    time_elapsed         | 3763       |
|    total_timesteps      | 133376     |
| train/                  |            |
|    approx_kl            | 0.24577539 |
|    clip_fraction        | 0.847      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.8       |
|    explained_variance   | 0.986      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.124     |
|    n_updates            | 10400      |
|    policy_gradient_loss | -0.118     |
|    std                  | 0.157      |
|    value_loss           | 0.000386   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 522        |
|    time_elapsed         | 3769       |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 530         |
|    time_elapsed         | 3852        |
|    total_timesteps      | 135680      |
| train/                  |             |
|    approx_kl            | -0.09095208 |
|    clip_fraction        | 0.797       |
|    clip_range           | 0.2         |
|    entropy_loss         | 26.7        |
|    explained_variance   | 0.988       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.11       |
|    n_updates            | 10580       |
|    policy_gradient_loss | -0.114      |
|    std                  | 0.157       |
|    value_loss           | 0.000438    |
-----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 531       |
|    time_elapsed         | 3857      |
| 

------------------------------------------
| time/                   |              |
|    fps                  | 35           |
|    iterations           | 541          |
|    time_elapsed         | 3917         |
|    total_timesteps      | 138496       |
| train/                  |              |
|    approx_kl            | -0.015386451 |
|    clip_fraction        | 0.835        |
|    clip_range           | 0.2          |
|    entropy_loss         | 26.6         |
|    explained_variance   | 0.979        |
|    learning_rate        | 1e-05        |
|    loss                 | -0.122       |
|    n_updates            | 10800        |
|    policy_gradient_loss | -0.112       |
|    std                  | 0.158        |
|    value_loss           | 0.000588     |
------------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 542       |
|    time_elapsed      

------------------------------------------
| time/                   |              |
|    fps                  | 35           |
|    iterations           | 552          |
|    time_elapsed         | 4000         |
|    total_timesteps      | 141312       |
| train/                  |              |
|    approx_kl            | -0.011048995 |
|    clip_fraction        | 0.84         |
|    clip_range           | 0.2          |
|    entropy_loss         | 26.5         |
|    explained_variance   | 0.978        |
|    learning_rate        | 1e-05        |
|    loss                 | -0.103       |
|    n_updates            | 11020        |
|    policy_gradient_loss | -0.121       |
|    std                  | 0.158        |
|    value_loss           | 0.000583     |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 553        |
|    time_elapsed  

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 563        |
|    time_elapsed         | 4065       |
|    total_timesteps      | 144128     |
| train/                  |            |
|    approx_kl            | 0.17604095 |
|    clip_fraction        | 0.806      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.4       |
|    explained_variance   | 0.983      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0839    |
|    n_updates            | 11240      |
|    policy_gradient_loss | -0.126     |
|    std                  | 0.158      |
|    value_loss           | 0.000449   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 564        |
|    time_elapsed         | 4070       |
|    total_times

-------------------------------------------
| time/                   |               |
|    fps                  | 35            |
|    iterations           | 574           |
|    time_elapsed         | 4131          |
|    total_timesteps      | 146944        |
| train/                  |               |
|    approx_kl            | 0.00093695894 |
|    clip_fraction        | 0.793         |
|    clip_range           | 0.2           |
|    entropy_loss         | 26.3          |
|    explained_variance   | 0.993         |
|    learning_rate        | 1e-05         |
|    loss                 | -0.111        |
|    n_updates            | 11460         |
|    policy_gradient_loss | -0.112        |
|    std                  | 0.158         |
|    value_loss           | 0.000322      |
-------------------------------------------
Eval num_timesteps=147200, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=147200, episode_reward=0.69 +/- 0.00
Episode length: 5.00 +

---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 585       |
|    time_elapsed         | 4212      |
|    total_timesteps      | 149760    |
| train/                  |           |
|    approx_kl            | 0.2308096 |
|    clip_fraction        | 0.829     |
|    clip_range           | 0.2       |
|    entropy_loss         | 26.2      |
|    explained_variance   | 0.982     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.107    |
|    n_updates            | 11680     |
|    policy_gradient_loss | -0.119    |
|    std                  | 0.159     |
|    value_loss           | 0.000593  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 586        |
|    time_elapsed         | 4218       |
|    total_timesteps      | 150016 

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 596        |
|    time_elapsed         | 4277       |
|    total_timesteps      | 152576     |
| train/                  |            |
|    approx_kl            | 0.41672182 |
|    clip_fraction        | 0.824      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.1       |
|    explained_variance   | 0.943      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0582    |
|    n_updates            | 11900      |
|    policy_gradient_loss | -0.0981    |
|    std                  | 0.159      |
|    value_loss           | 0.000728   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 597        |
|    time_elapsed         | 4283       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 605        |
|    time_elapsed         | 4377       |
|    total_timesteps      | 154880     |
| train/                  |            |
|    approx_kl            | 0.43404585 |
|    clip_fraction        | 0.808      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26         |
|    explained_variance   | 0.974      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.142     |
|    n_updates            | 12080      |
|    policy_gradient_loss | -0.124     |
|    std                  | 0.159      |
|    value_loss           | 0.000547   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 606        |
|    time_elapsed         | 4383       |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 616         |
|    time_elapsed         | 4445        |
|    total_timesteps      | 157696      |
| train/                  |             |
|    approx_kl            | 0.056014877 |
|    clip_fraction        | 0.832       |
|    clip_range           | 0.2         |
|    entropy_loss         | 25.9        |
|    explained_variance   | 0.99        |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0743     |
|    n_updates            | 12300       |
|    policy_gradient_loss | -0.118      |
|    std                  | 0.159       |
|    value_loss           | 0.000447    |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 617         |
|    time_elapsed         | 4452  

-----------------------------------------
| time/                   |             |
|    fps                  | 35          |
|    iterations           | 627         |
|    time_elapsed         | 4531        |
|    total_timesteps      | 160512      |
| train/                  |             |
|    approx_kl            | -0.28595835 |
|    clip_fraction        | 0.796       |
|    clip_range           | 0.2         |
|    entropy_loss         | 25.8        |
|    explained_variance   | 0.953       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.12       |
|    n_updates            | 12520       |
|    policy_gradient_loss | -0.118      |
|    std                  | 0.16        |
|    value_loss           | 0.000698    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 628        |
|    time_elapsed         | 4537      

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 638        |
|    time_elapsed         | 4597       |
|    total_timesteps      | 163328     |
| train/                  |            |
|    approx_kl            | 0.11910209 |
|    clip_fraction        | 0.799      |
|    clip_range           | 0.2        |
|    entropy_loss         | 25.7       |
|    explained_variance   | 0.991      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.111     |
|    n_updates            | 12740      |
|    policy_gradient_loss | -0.122     |
|    std                  | 0.16       |
|    value_loss           | 0.000343   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 639        |
|    time_elapsed         | 4603       |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 35           |
|    iterations           | 649          |
|    time_elapsed         | 4664         |
|    total_timesteps      | 166144       |
| train/                  |              |
|    approx_kl            | -0.031322453 |
|    clip_fraction        | 0.78         |
|    clip_range           | 0.2          |
|    entropy_loss         | 25.6         |
|    explained_variance   | 0.994        |
|    learning_rate        | 1e-05        |
|    loss                 | -0.117       |
|    n_updates            | 12960        |
|    policy_gradient_loss | -0.112       |
|    std                  | 0.16         |
|    value_loss           | 0.000287     |
------------------------------------------
Eval num_timesteps=166400, episode_reward=0.70 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=166400, episode_reward=0.68 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 660        |
|    time_elapsed         | 4747       |
|    total_timesteps      | 168960     |
| train/                  |            |
|    approx_kl            | 0.17652562 |
|    clip_fraction        | 0.817      |
|    clip_range           | 0.2        |
|    entropy_loss         | 25.5       |
|    explained_variance   | 0.99       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.115     |
|    n_updates            | 13180      |
|    policy_gradient_loss | -0.112     |
|    std                  | 0.16       |
|    value_loss           | 0.000511   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 661        |
|    time_elapsed         | 4753       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 671        |
|    time_elapsed         | 4812       |
|    total_timesteps      | 171776     |
| train/                  |            |
|    approx_kl            | 0.17573434 |
|    clip_fraction        | 0.839      |
|    clip_range           | 0.2        |
|    entropy_loss         | 25.4       |
|    explained_variance   | 0.989      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.121     |
|    n_updates            | 13400      |
|    policy_gradient_loss | -0.116     |
|    std                  | 0.161      |
|    value_loss           | 0.000366   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 672        |
|    time_elapsed         | 4818       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 680        |
|    time_elapsed         | 4901       |
|    total_timesteps      | 174080     |
| train/                  |            |
|    approx_kl            | 0.20015258 |
|    clip_fraction        | 0.816      |
|    clip_range           | 0.2        |
|    entropy_loss         | 25.3       |
|    explained_variance   | 0.991      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.142     |
|    n_updates            | 13580      |
|    policy_gradient_loss | -0.122     |
|    std                  | 0.161      |
|    value_loss           | 0.000359   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 681        |
|    time_elapsed         | 4907       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 691        |
|    time_elapsed         | 4966       |
|    total_timesteps      | 176896     |
| train/                  |            |
|    approx_kl            | 0.12394015 |
|    clip_fraction        | 0.822      |
|    clip_range           | 0.2        |
|    entropy_loss         | 25.2       |
|    explained_variance   | 0.99       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0853    |
|    n_updates            | 13800      |
|    policy_gradient_loss | -0.11      |
|    std                  | 0.161      |
|    value_loss           | 0.000429   |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 35           |
|    iterations           | 692          |
|    time_elapsed         | 4972         |
|    t

---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 701       |
|    time_elapsed         | 5042      |
|    total_timesteps      | 179456    |
| train/                  |           |
|    approx_kl            | 0.5278703 |
|    clip_fraction        | 0.854     |
|    clip_range           | 0.2       |
|    entropy_loss         | 25.1      |
|    explained_variance   | 0.972     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.101    |
|    n_updates            | 14000     |
|    policy_gradient_loss | -0.115    |
|    std                  | 0.161     |
|    value_loss           | 0.000353  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 702        |
|    time_elapsed         | 5048       |
|    total_timesteps      | 179712 

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 712        |
|    time_elapsed         | 5107       |
|    total_timesteps      | 182272     |
| train/                  |            |
|    approx_kl            | 0.34269246 |
|    clip_fraction        | 0.78       |
|    clip_range           | 0.2        |
|    entropy_loss         | 25         |
|    explained_variance   | 0.978      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0895    |
|    n_updates            | 14220      |
|    policy_gradient_loss | -0.113     |
|    std                  | 0.162      |
|    value_loss           | 0.000286   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 713        |
|    time_elapsed         | 5113       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 723        |
|    time_elapsed         | 5172       |
|    total_timesteps      | 185088     |
| train/                  |            |
|    approx_kl            | 0.26357388 |
|    clip_fraction        | 0.821      |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.9       |
|    explained_variance   | 0.991      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.14      |
|    n_updates            | 14440      |
|    policy_gradient_loss | -0.124     |
|    std                  | 0.162      |
|    value_loss           | 0.000343   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 724        |
|    time_elapsed         | 5178       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 734        |
|    time_elapsed         | 5258       |
|    total_timesteps      | 187904     |
| train/                  |            |
|    approx_kl            | 0.09375471 |
|    clip_fraction        | 0.824      |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.8       |
|    explained_variance   | 0.993      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.11      |
|    n_updates            | 14660      |
|    policy_gradient_loss | -0.113     |
|    std                  | 0.162      |
|    value_loss           | 0.000375   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 735        |
|    time_elapsed         | 5265       |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 745       |
|    time_elapsed         | 5328      |
|    total_timesteps      | 190720    |
| train/                  |           |
|    approx_kl            | 0.3803712 |
|    clip_fraction        | 0.801     |
|    clip_range           | 0.2       |
|    entropy_loss         | 24.7      |
|    explained_variance   | 0.989     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.0994   |
|    n_updates            | 14880     |
|    policy_gradient_loss | -0.106    |
|    std                  | 0.162     |
|    value_loss           | 0.000447  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 746        |
|    time_elapsed         | 5334       |
|    total_timesteps      | 190976 

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 754        |
|    time_elapsed         | 5414       |
|    total_timesteps      | 193024     |
| train/                  |            |
|    approx_kl            | 0.20562297 |
|    clip_fraction        | 0.762      |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.6       |
|    explained_variance   | 0.991      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.12      |
|    n_updates            | 15060      |
|    policy_gradient_loss | -0.104     |
|    std                  | 0.163      |
|    value_loss           | 0.000405   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 755       |
|    time_elapsed         | 5420      |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 765        |
|    time_elapsed         | 5481       |
|    total_timesteps      | 195840     |
| train/                  |            |
|    approx_kl            | 0.26744556 |
|    clip_fraction        | 0.796      |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.5       |
|    explained_variance   | 0.993      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.112     |
|    n_updates            | 15280      |
|    policy_gradient_loss | -0.115     |
|    std                  | 0.163      |
|    value_loss           | 0.000368   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 766       |
|    time_elapsed         | 5487      |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 35         |
|    iterations           | 776        |
|    time_elapsed         | 5563       |
|    total_timesteps      | 198656     |
| train/                  |            |
|    approx_kl            | 0.10516917 |
|    clip_fraction        | 0.854      |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.4       |
|    explained_variance   | 0.986      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0918    |
|    n_updates            | 15500      |
|    policy_gradient_loss | -0.108     |
|    std                  | 0.163      |
|    value_loss           | 0.000269   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 35        |
|    iterations           | 777       |
|    time_elapsed         | 5570      |
|    total_timesteps 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>