In [1]:
# to access functions from other locations
import sys
sys.path.append('/data/ad181/RemoteDir/k_variability_in_ressim_env/SPE10_like_envs/')

In [2]:
%matplotlib notebook
import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt

import gym
from stable_baselines3.ppo import PPO, MlpPolicy
# from utils.subproc_vec_env import SubprocVecEnv
# from utils.multiprocessing_env import SubprocVecEnv
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines3.common.callbacks import CallbackList
from utils.custom_eval_callback import CustomEvalCallback
from typing import Callable
from utils.plot_functions import plot_learning

from model.ressim import Grid
from ressim_env import ResSimEnv_v0, ResSimEnv_v1, ResSimEnv_v3

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
seed=1
case='2ph_v1'

In [4]:
os.makedirs('./data', exist_ok=True)
os.makedirs('./data/'+case, exist_ok=True)

In [5]:
with open('../envs_params/env_data/env_'+case+'_train.pkl', 'rb') as input:
    env_train = pickle.load(input)
    
with open('../envs_params/env_data/env_'+case+'_eval_rl.pkl', 'rb') as input:
    env_eval = pickle.load(input)

In [6]:
# domain properties
nx = 61
ny = 61
lx = 1200*0.3048 # 1200 ft
ly = 1200*0.3048 # 1200 ft
grid = Grid(nx=nx, ny=ny, lx=lx, ly=ly)

k_train = np.load('../envs_params/k_data/k_log_md_train.npy')
md_m2_conv = 1/1.01325e+15
k_train = md_m2_conv*np.exp(k_train) 
phi = 0.2*np.ones(grid.shape)
s_wir = 0.2
s_oir = 0.2

# fluid properties
mu_w = 3e-4 # 0.3 cp
mu_o = 3e-3 # 3.0 cp
mobility='quadratic'

# time steps
dt = 0.5
nstep = 50
terminal_step= 5 # total: 125 days

# initial conditions
ooip = grid.lx * grid.ly * phi[0,0] * (1 - s_wir - s_oir) # original oil in place
total_time = nstep*terminal_step*dt
fraction = 0.4
Q = fraction*ooip/total_time 
q = np.zeros(grid.shape)
q[::2,0] = Q/round(grid.nx/2)
q[::2,-1] = -Q/round(grid.nx/2)
s = np.ones(grid.shape)*s_wir

env_train = ResSimEnv_v3(grid, k_train, phi, s_wir, s_oir,                     # domain properties
                         mu_w, mu_o, mobility,                                 # fluid properties
                         dt, nstep, terminal_step,                             # timesteps
                         q, s) 

In [7]:
def make_env(env, rank: int, seed: int = 0) -> Callable:
    """
    Utility function for multiprocessed env.
    
    :param env_id: (str) the environment ID
    :param num_env: (int) the number of environment you wish to have in subprocesses
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    :return: (Callable)
    """
    def _init() -> gym.Env:
        env_ = env
        env_.seed(seed + rank)
        return env_
    return _init

In [8]:
for seed in range(1,8):
    if seed>6:
        print(f'seed {seed}')
        log_dir = './data/'+case+'/seed_'+str(seed)
        os.makedirs(log_dir, exist_ok=True)
        num_cpu = 64
        env_train.seed(seed)
        env_eval.seed(seed)
        train_callback = CustomEvalCallback(env_train, best_model_save_path=str(log_dir)+'/best_model_train', n_eval_episodes=1,
                             log_path=str(log_dir)+'/results_train', eval_freq=100)
        eval_callback = CustomEvalCallback(env_eval, best_model_save_path=str(log_dir)+'/best_model_eval', n_eval_episodes=1,
                             log_path=str(log_dir)+'/results_eval', eval_freq=100)
        callback = CallbackList([train_callback, eval_callback])
        env = SubprocVecEnv([make_env(env_train, i, seed) for i in range(num_cpu)])
#     env = VecMonitor(env, filename=log_dir)
        print(f'seed {seed}: model definition ..')
        model = PPO(policy=MlpPolicy,
            env=env,
            learning_rate = 1e-5,
            n_steps = 4,
            batch_size = 16,
            n_epochs = 20,
            gamma = 0.99,
            gae_lambda = 0.95,
            clip_range = 0.2,
            clip_range_vf = None,
            ent_coef = 0.001,
            vf_coef = 0.5,
            max_grad_norm = 0.5,
            use_sde= False,
            create_eval_env= False,
            policy_kwargs = dict(net_arch=[4000,2000,800,300], log_std_init=-1.9),
            verbose = 1,
            seed = seed,
            device = "auto")
        print(f'seed {seed}: learning ..')
        model.learn(total_timesteps=400000, callback=callback)
        model.save(log_dir+'/PPO')
        del model
        fig = plot_learning(log_dir, case='train')
        fig.savefig(log_dir+'/learn_train.png')
        fig = plot_learning(log_dir, case='eval')
        fig.savefig(log_dir+'/learn_eval.png')

seed 7
seed 7: model definition ..
Using cuda device
seed 7: learning ..




----------------------------
| time/              |     |
|    fps             | 32  |
|    iterations      | 1   |
|    time_elapsed    | 7   |
|    total_timesteps | 256 |
----------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 21         |
|    iterations           | 2          |
|    time_elapsed         | 24         |
|    total_timesteps      | 512        |
| train/                  |            |
|    approx_kl            | 0.16350397 |
|    clip_fraction        | 0.667      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.8       |
|    explained_variance   | -0.25      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.00871    |
|    n_updates            | 20         |
|    policy_gradient_loss | -0.0968    |
|    std                  | 0.15       |
|    value_loss           | 0.0181     |
----------------------------------------
------------------

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 13         |
|    time_elapsed         | 204        |
|    total_timesteps      | 3328       |
| train/                  |            |
|    approx_kl            | 0.07460416 |
|    clip_fraction        | 0.684      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.8       |
|    explained_variance   | 0.939      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.076     |
|    n_updates            | 240        |
|    policy_gradient_loss | -0.104     |
|    std                  | 0.15       |
|    value_loss           | 0.00128    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 14         |
|    time_elapsed         | 221        |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 15        |
|    iterations           | 24        |
|    time_elapsed         | 391       |
|    total_timesteps      | 6144      |
| train/                  |           |
|    approx_kl            | 0.1705826 |
|    clip_fraction        | 0.738     |
|    clip_range           | 0.2       |
|    entropy_loss         | 29.8      |
|    explained_variance   | 0.629     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.0871   |
|    n_updates            | 460       |
|    policy_gradient_loss | -0.108    |
|    std                  | 0.15      |
|    value_loss           | 0.00714   |
---------------------------------------
Eval num_timesteps=6400, episode_reward=0.33 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=6400, episode_reward=0.33 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
----------------------------

----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 35         |
|    time_elapsed         | 624        |
|    total_timesteps      | 8960       |
| train/                  |            |
|    approx_kl            | 0.06619315 |
|    clip_fraction        | 0.737      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.8       |
|    explained_variance   | -0.0736    |
|    learning_rate        | 1e-05      |
|    loss                 | -0.121     |
|    n_updates            | 680        |
|    policy_gradient_loss | -0.0965    |
|    std                  | 0.15       |
|    value_loss           | 0.0117     |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 36         |
|    time_elapsed         | 641        |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 46         |
|    time_elapsed         | 808        |
|    total_timesteps      | 11776      |
| train/                  |            |
|    approx_kl            | 0.21563068 |
|    clip_fraction        | 0.748      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.8       |
|    explained_variance   | 0.751      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0784    |
|    n_updates            | 900        |
|    policy_gradient_loss | -0.103     |
|    std                  | 0.15       |
|    value_loss           | 0.00123    |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 14        |
|    iterations           | 47        |
|    time_elapsed         | 825       |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 57         |
|    time_elapsed         | 1040       |
|    total_timesteps      | 14592      |
| train/                  |            |
|    approx_kl            | 0.14754276 |
|    clip_fraction        | 0.768      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.7       |
|    explained_variance   | 0.924      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.105     |
|    n_updates            | 1120       |
|    policy_gradient_loss | -0.108     |
|    std                  | 0.15       |
|    value_loss           | 0.00496    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 58         |
|    time_elapsed         | 1057       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 68         |
|    time_elapsed         | 1230       |
|    total_timesteps      | 17408      |
| train/                  |            |
|    approx_kl            | 0.14272004 |
|    clip_fraction        | 0.743      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.7       |
|    explained_variance   | 0.922      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0787    |
|    n_updates            | 1340       |
|    policy_gradient_loss | -0.111     |
|    std                  | 0.15       |
|    value_loss           | 0.00124    |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 14           |
|    iterations           | 69           |
|    time_elapsed         | 1247         |
|    t

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 79         |
|    time_elapsed         | 1462       |
|    total_timesteps      | 20224      |
| train/                  |            |
|    approx_kl            | 0.16592157 |
|    clip_fraction        | 0.781      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.7       |
|    explained_variance   | 0.942      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.119     |
|    n_updates            | 1560       |
|    policy_gradient_loss | -0.109     |
|    std                  | 0.15       |
|    value_loss           | 0.00116    |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 13          |
|    iterations           | 80          |
|    time_elapsed         | 1480        |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 90         |
|    time_elapsed         | 1651       |
|    total_timesteps      | 23040      |
| train/                  |            |
|    approx_kl            | 0.27530497 |
|    clip_fraction        | 0.773      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.7       |
|    explained_variance   | 0.776      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0276    |
|    n_updates            | 1780       |
|    policy_gradient_loss | -0.108     |
|    std                  | 0.15       |
|    value_loss           | 0.00172    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 91         |
|    time_elapsed         | 1667       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 101        |
|    time_elapsed         | 1887       |
|    total_timesteps      | 25856      |
| train/                  |            |
|    approx_kl            | 0.11411619 |
|    clip_fraction        | 0.753      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.6       |
|    explained_variance   | 0.893      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.075     |
|    n_updates            | 2000       |
|    policy_gradient_loss | -0.0957    |
|    std                  | 0.15       |
|    value_loss           | 0.000604   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 102        |
|    time_elapsed         | 1904       |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 13        |
|    iterations           | 112       |
|    time_elapsed         | 2076      |
|    total_timesteps      | 28672     |
| train/                  |           |
|    approx_kl            | 0.5731381 |
|    clip_fraction        | 0.8       |
|    clip_range           | 0.2       |
|    entropy_loss         | 29.6      |
|    explained_variance   | -0.557    |
|    learning_rate        | 1e-05     |
|    loss                 | 0.0322    |
|    n_updates            | 2220      |
|    policy_gradient_loss | -0.0793   |
|    std                  | 0.15      |
|    value_loss           | 0.00163   |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 113        |
|    time_elapsed         | 2093       |
|    total_timesteps      | 28928  

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 123        |
|    time_elapsed         | 2265       |
|    total_timesteps      | 31488      |
| train/                  |            |
|    approx_kl            | 0.14209446 |
|    clip_fraction        | 0.761      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.5       |
|    explained_variance   | 0.943      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0656    |
|    n_updates            | 2440       |
|    policy_gradient_loss | -0.0935    |
|    std                  | 0.15       |
|    value_loss           | 0.000773   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 13        |
|    iterations           | 124       |
|    time_elapsed         | 2282      |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 134        |
|    time_elapsed         | 2498       |
|    total_timesteps      | 34304      |
| train/                  |            |
|    approx_kl            | 0.26707503 |
|    clip_fraction        | 0.795      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.5       |
|    explained_variance   | 0.907      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0888    |
|    n_updates            | 2660       |
|    policy_gradient_loss | -0.112     |
|    std                  | 0.15       |
|    value_loss           | 0.000797   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 135        |
|    time_elapsed         | 2515       |
|    total_times

--------------------------------------
| time/                   |          |
|    fps                  | 13       |
|    iterations           | 145      |
|    time_elapsed         | 2687     |
|    total_timesteps      | 37120    |
| train/                  |          |
|    approx_kl            | 0.252909 |
|    clip_fraction        | 0.766    |
|    clip_range           | 0.2      |
|    entropy_loss         | 29.5     |
|    explained_variance   | 0.941    |
|    learning_rate        | 1e-05    |
|    loss                 | -0.0721  |
|    n_updates            | 2880     |
|    policy_gradient_loss | -0.0931  |
|    std                  | 0.15     |
|    value_loss           | 0.000472 |
--------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 13            |
|    iterations           | 146           |
|    time_elapsed         | 2704          |
|    total_timesteps      | 37376      

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 156        |
|    time_elapsed         | 2921       |
|    total_timesteps      | 39936      |
| train/                  |            |
|    approx_kl            | 0.25031447 |
|    clip_fraction        | 0.787      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.4       |
|    explained_variance   | 0.913      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0603    |
|    n_updates            | 3100       |
|    policy_gradient_loss | -0.0981    |
|    std                  | 0.151      |
|    value_loss           | 0.00087    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 157        |
|    time_elapsed         | 2938       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 167        |
|    time_elapsed         | 3111       |
|    total_timesteps      | 42752      |
| train/                  |            |
|    approx_kl            | 0.22810352 |
|    clip_fraction        | 0.771      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.4       |
|    explained_variance   | 0.946      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0609    |
|    n_updates            | 3320       |
|    policy_gradient_loss | -0.103     |
|    std                  | 0.151      |
|    value_loss           | 0.000666   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 13        |
|    iterations           | 168       |
|    time_elapsed         | 3129      |
|    total_timesteps 

--------------------------------------
| time/                   |          |
|    fps                  | 13       |
|    iterations           | 178      |
|    time_elapsed         | 3349     |
|    total_timesteps      | 45568    |
| train/                  |          |
|    approx_kl            | 0.288625 |
|    clip_fraction        | 0.806    |
|    clip_range           | 0.2      |
|    entropy_loss         | 29.3     |
|    explained_variance   | 0.61     |
|    learning_rate        | 1e-05    |
|    loss                 | -0.0933  |
|    n_updates            | 3540     |
|    policy_gradient_loss | -0.114   |
|    std                  | 0.151    |
|    value_loss           | 0.00448  |
--------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 179        |
|    time_elapsed         | 3366       |
|    total_timesteps      | 45824      |
| train/     

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 189        |
|    time_elapsed         | 3539       |
|    total_timesteps      | 48384      |
| train/                  |            |
|    approx_kl            | 0.17788228 |
|    clip_fraction        | 0.773      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.3       |
|    explained_variance   | 0.878      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.044      |
|    n_updates            | 3760       |
|    policy_gradient_loss | -0.107     |
|    std                  | 0.151      |
|    value_loss           | 0.00169    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 190        |
|    time_elapsed         | 3556       |
|    total_times

Eval num_timesteps=51200, episode_reward=0.37 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=51200, episode_reward=0.35 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 5          |
|    mean_reward          | 0.348      |
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 200        |
|    time_elapsed         | 3777       |
|    total_timesteps      | 51200      |
| train/                  |            |
|    approx_kl            | 0.20853761 |
|    clip_fraction        | 0.814      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.2       |
|    explained_variance   | 0.981      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.131     |
|    n_updates            | 3980       |
|    policy_gradient_loss | -0.108     |
|    std                  | 0

-----------------------------------------
| time/                   |             |
|    fps                  | 13          |
|    iterations           | 211         |
|    time_elapsed         | 3967        |
|    total_timesteps      | 54016       |
| train/                  |             |
|    approx_kl            | 0.009623416 |
|    clip_fraction        | 0.753       |
|    clip_range           | 0.2         |
|    entropy_loss         | 29.2        |
|    explained_variance   | 0.854       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.142      |
|    n_updates            | 4200        |
|    policy_gradient_loss | -0.0956     |
|    std                  | 0.151       |
|    value_loss           | 0.000752    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 212        |
|    time_elapsed         | 3984      

-------------------------------------------
| time/                   |               |
|    fps                  | 13            |
|    iterations           | 222           |
|    time_elapsed         | 4158          |
|    total_timesteps      | 56832         |
| train/                  |               |
|    approx_kl            | 0.00034524128 |
|    clip_fraction        | 0.774         |
|    clip_range           | 0.2           |
|    entropy_loss         | 29.1          |
|    explained_variance   | 0.869         |
|    learning_rate        | 1e-05         |
|    loss                 | -0.118        |
|    n_updates            | 4420          |
|    policy_gradient_loss | -0.108        |
|    std                  | 0.151         |
|    value_loss           | 0.000986      |
-------------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 13        |
|    iterations           | 223       |
|   

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 233        |
|    time_elapsed         | 4396       |
|    total_timesteps      | 59648      |
| train/                  |            |
|    approx_kl            | 0.24687855 |
|    clip_fraction        | 0.8        |
|    clip_range           | 0.2        |
|    entropy_loss         | 29.1       |
|    explained_variance   | 0.933      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0766    |
|    n_updates            | 4640       |
|    policy_gradient_loss | -0.0966    |
|    std                  | 0.151      |
|    value_loss           | 0.00129    |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 13          |
|    iterations           | 234         |
|    time_elapsed         | 4413        |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 244        |
|    time_elapsed         | 4584       |
|    total_timesteps      | 62464      |
| train/                  |            |
|    approx_kl            | 0.27377942 |
|    clip_fraction        | 0.777      |
|    clip_range           | 0.2        |
|    entropy_loss         | 29         |
|    explained_variance   | 0.818      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0929    |
|    n_updates            | 4860       |
|    policy_gradient_loss | -0.084     |
|    std                  | 0.152      |
|    value_loss           | 0.00114    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 245        |
|    time_elapsed         | 4601       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 255        |
|    time_elapsed         | 4819       |
|    total_timesteps      | 65280      |
| train/                  |            |
|    approx_kl            | 0.17047158 |
|    clip_fraction        | 0.799      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.9       |
|    explained_variance   | -0.0354    |
|    learning_rate        | 1e-05      |
|    loss                 | -0.138     |
|    n_updates            | 5080       |
|    policy_gradient_loss | -0.115     |
|    std                  | 0.152      |
|    value_loss           | 0.00381    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 256        |
|    time_elapsed         | 4836       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 266        |
|    time_elapsed         | 5009       |
|    total_timesteps      | 68096      |
| train/                  |            |
|    approx_kl            | 0.16587478 |
|    clip_fraction        | 0.812      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.9       |
|    explained_variance   | 0.862      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.104     |
|    n_updates            | 5300       |
|    policy_gradient_loss | -0.108     |
|    std                  | 0.152      |
|    value_loss           | 0.000578   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 267        |
|    time_elapsed         | 5027       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 277        |
|    time_elapsed         | 5249       |
|    total_timesteps      | 70912      |
| train/                  |            |
|    approx_kl            | 0.22151828 |
|    clip_fraction        | 0.802      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.8       |
|    explained_variance   | 0.876      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.116     |
|    n_updates            | 5520       |
|    policy_gradient_loss | -0.11      |
|    std                  | 0.152      |
|    value_loss           | 0.00294    |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 13        |
|    iterations           | 278       |
|    time_elapsed         | 5267      |
|    total_timesteps 

-----------------------------------------
| time/                   |             |
|    fps                  | 13          |
|    iterations           | 288         |
|    time_elapsed         | 5441        |
|    total_timesteps      | 73728       |
| train/                  |             |
|    approx_kl            | 0.090424776 |
|    clip_fraction        | 0.771       |
|    clip_range           | 0.2         |
|    entropy_loss         | 28.7        |
|    explained_variance   | 0.923       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0682     |
|    n_updates            | 5740        |
|    policy_gradient_loss | -0.0969     |
|    std                  | 0.152       |
|    value_loss           | 0.000757    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 289        |
|    time_elapsed         | 5458      

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 299        |
|    time_elapsed         | 5627       |
|    total_timesteps      | 76544      |
| train/                  |            |
|    approx_kl            | 0.11733567 |
|    clip_fraction        | 0.79       |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.7       |
|    explained_variance   | 0.966      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.119     |
|    n_updates            | 5960       |
|    policy_gradient_loss | -0.108     |
|    std                  | 0.152      |
|    value_loss           | 0.000537   |
----------------------------------------
Eval num_timesteps=76800, episode_reward=0.37 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=76800, episode_reward=0.36 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 310        |
|    time_elapsed         | 5864       |
|    total_timesteps      | 79360      |
| train/                  |            |
|    approx_kl            | 0.30256417 |
|    clip_fraction        | 0.788      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.6       |
|    explained_variance   | 0.918      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.111     |
|    n_updates            | 6180       |
|    policy_gradient_loss | -0.0936    |
|    std                  | 0.152      |
|    value_loss           | 0.00189    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 311        |
|    time_elapsed         | 5881       |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 13           |
|    iterations           | 321          |
|    time_elapsed         | 6053         |
|    total_timesteps      | 82176        |
| train/                  |              |
|    approx_kl            | -0.021751814 |
|    clip_fraction        | 0.79         |
|    clip_range           | 0.2          |
|    entropy_loss         | 28.6         |
|    explained_variance   | 0.901        |
|    learning_rate        | 1e-05        |
|    loss                 | -0.0167      |
|    n_updates            | 6400         |
|    policy_gradient_loss | -0.108       |
|    std                  | 0.153        |
|    value_loss           | 0.000686     |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 322        |
|    time_elapsed  

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 332        |
|    time_elapsed         | 6288       |
|    total_timesteps      | 84992      |
| train/                  |            |
|    approx_kl            | 0.25231656 |
|    clip_fraction        | 0.828      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.5       |
|    explained_variance   | 0.948      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0866    |
|    n_updates            | 6620       |
|    policy_gradient_loss | -0.111     |
|    std                  | 0.153      |
|    value_loss           | 0.000605   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 333        |
|    time_elapsed         | 6305       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 343        |
|    time_elapsed         | 6479       |
|    total_timesteps      | 87808      |
| train/                  |            |
|    approx_kl            | 0.10508439 |
|    clip_fraction        | 0.797      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.4       |
|    explained_variance   | 0.881      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0952    |
|    n_updates            | 6840       |
|    policy_gradient_loss | -0.1       |
|    std                  | 0.153      |
|    value_loss           | 0.000845   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 13        |
|    iterations           | 344       |
|    time_elapsed         | 6496      |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 354        |
|    time_elapsed         | 6717       |
|    total_timesteps      | 90624      |
| train/                  |            |
|    approx_kl            | 0.14876288 |
|    clip_fraction        | 0.837      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.3       |
|    explained_variance   | 0.969      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.104     |
|    n_updates            | 7060       |
|    policy_gradient_loss | -0.119     |
|    std                  | 0.153      |
|    value_loss           | 0.000382   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 355        |
|    time_elapsed         | 6733       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 365        |
|    time_elapsed         | 6907       |
|    total_timesteps      | 93440      |
| train/                  |            |
|    approx_kl            | 0.23946783 |
|    clip_fraction        | 0.79       |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.3       |
|    explained_variance   | -0.169     |
|    learning_rate        | 1e-05      |
|    loss                 | -0.116     |
|    n_updates            | 7280       |
|    policy_gradient_loss | -0.111     |
|    std                  | 0.153      |
|    value_loss           | 0.00214    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 366        |
|    time_elapsed         | 6924       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 376        |
|    time_elapsed         | 7144       |
|    total_timesteps      | 96256      |
| train/                  |            |
|    approx_kl            | 0.11450009 |
|    clip_fraction        | 0.805      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.2       |
|    explained_variance   | 0.927      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0991    |
|    n_updates            | 7500       |
|    policy_gradient_loss | -0.109     |
|    std                  | 0.154      |
|    value_loss           | 0.00066    |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 13          |
|    iterations           | 377         |
|    time_elapsed         | 7161        |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 387        |
|    time_elapsed         | 7319       |
|    total_timesteps      | 99072      |
| train/                  |            |
|    approx_kl            | 0.30908102 |
|    clip_fraction        | 0.791      |
|    clip_range           | 0.2        |
|    entropy_loss         | 28.1       |
|    explained_variance   | 0.871      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0619    |
|    n_updates            | 7720       |
|    policy_gradient_loss | -0.1       |
|    std                  | 0.154      |
|    value_loss           | 0.000629   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 388        |
|    time_elapsed         | 7331       |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 13        |
|    iterations           | 398       |
|    time_elapsed         | 7445      |
|    total_timesteps      | 101888    |
| train/                  |           |
|    approx_kl            | 0.1891697 |
|    clip_fraction        | 0.802     |
|    clip_range           | 0.2       |
|    entropy_loss         | 28.1      |
|    explained_variance   | 0.924     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.0815   |
|    n_updates            | 7940      |
|    policy_gradient_loss | -0.112    |
|    std                  | 0.154     |
|    value_loss           | 0.00143   |
---------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 13        |
|    iterations           | 399       |
|    time_elapsed         | 7457      |
|    total_timesteps      | 102144    |


----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 409        |
|    time_elapsed         | 7615       |
|    total_timesteps      | 104704     |
| train/                  |            |
|    approx_kl            | 0.26762992 |
|    clip_fraction        | 0.81       |
|    clip_range           | 0.2        |
|    entropy_loss         | 28         |
|    explained_variance   | 0.962      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.112     |
|    n_updates            | 8160       |
|    policy_gradient_loss | -0.112     |
|    std                  | 0.154      |
|    value_loss           | 0.000725   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 13        |
|    iterations           | 410       |
|    time_elapsed         | 7627      |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 420        |
|    time_elapsed         | 7741       |
|    total_timesteps      | 107520     |
| train/                  |            |
|    approx_kl            | 0.17735396 |
|    clip_fraction        | 0.785      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.9       |
|    explained_variance   | 0.977      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0912    |
|    n_updates            | 8380       |
|    policy_gradient_loss | -0.0996    |
|    std                  | 0.154      |
|    value_loss           | 0.00321    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 421        |
|    time_elapsed         | 7752       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 431        |
|    time_elapsed         | 7913       |
|    total_timesteps      | 110336     |
| train/                  |            |
|    approx_kl            | 0.18027958 |
|    clip_fraction        | 0.793      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.9       |
|    explained_variance   | 0.965      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.065     |
|    n_updates            | 8600       |
|    policy_gradient_loss | -0.101     |
|    std                  | 0.154      |
|    value_loss           | 0.000409   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 13         |
|    iterations           | 432        |
|    time_elapsed         | 7925       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 442        |
|    time_elapsed         | 8038       |
|    total_timesteps      | 113152     |
| train/                  |            |
|    approx_kl            | 0.36426154 |
|    clip_fraction        | 0.812      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.8       |
|    explained_variance   | 0.732      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0927    |
|    n_updates            | 8820       |
|    policy_gradient_loss | -0.102     |
|    std                  | 0.155      |
|    value_loss           | 0.00103    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 443        |
|    time_elapsed         | 8049       |
|    total_times

--------------------------------------
| time/                   |          |
|    fps                  | 14       |
|    iterations           | 453      |
|    time_elapsed         | 8210     |
|    total_timesteps      | 115968   |
| train/                  |          |
|    approx_kl            | 0.132917 |
|    clip_fraction        | 0.812    |
|    clip_range           | 0.2      |
|    entropy_loss         | 27.7     |
|    explained_variance   | 0.955    |
|    learning_rate        | 1e-05    |
|    loss                 | -0.105   |
|    n_updates            | 9040     |
|    policy_gradient_loss | -0.11    |
|    std                  | 0.155    |
|    value_loss           | 0.00055  |
--------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 454        |
|    time_elapsed         | 8222       |
|    total_timesteps      | 116224     |
| train/     

----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 464        |
|    time_elapsed         | 8334       |
|    total_timesteps      | 118784     |
| train/                  |            |
|    approx_kl            | 0.18470629 |
|    clip_fraction        | 0.814      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.7       |
|    explained_variance   | 0.938      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.134     |
|    n_updates            | 9260       |
|    policy_gradient_loss | -0.111     |
|    std                  | 0.155      |
|    value_loss           | 0.00112    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 465        |
|    time_elapsed         | 8345       |
|    total_times

Eval num_timesteps=121600, episode_reward=0.37 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=121600, episode_reward=0.36 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.356       |
| time/                   |             |
|    fps                  | 14          |
|    iterations           | 475         |
|    time_elapsed         | 8506        |
|    total_timesteps      | 121600      |
| train/                  |             |
|    approx_kl            | 0.030523673 |
|    clip_fraction        | 0.786       |
|    clip_range           | 0.2         |
|    entropy_loss         | 27.6        |
|    explained_variance   | 0.859       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.126      |
|    n_updates            | 9480        |
|    policy_gradient_loss | -0.111      |
|    std                  | 0.

----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 486        |
|    time_elapsed         | 8630       |
|    total_timesteps      | 124416     |
| train/                  |            |
|    approx_kl            | 0.09837567 |
|    clip_fraction        | 0.793      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.5       |
|    explained_variance   | 0.929      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.112     |
|    n_updates            | 9700       |
|    policy_gradient_loss | -0.105     |
|    std                  | 0.155      |
|    value_loss           | 0.000514   |
----------------------------------------
--------------------------------------
| time/                   |          |
|    fps                  | 14       |
|    iterations           | 487      |
|    time_elapsed         | 8642     |
|    total_timesteps      

----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 497        |
|    time_elapsed         | 8756       |
|    total_timesteps      | 127232     |
| train/                  |            |
|    approx_kl            | 0.22818911 |
|    clip_fraction        | 0.783      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.4       |
|    explained_variance   | 0.886      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.12      |
|    n_updates            | 9920       |
|    policy_gradient_loss | -0.0911    |
|    std                  | 0.155      |
|    value_loss           | 0.000775   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 498        |
|    time_elapsed         | 8767       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 508        |
|    time_elapsed         | 8926       |
|    total_timesteps      | 130048     |
| train/                  |            |
|    approx_kl            | 0.16695085 |
|    clip_fraction        | 0.792      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.4       |
|    explained_variance   | 0.954      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.115     |
|    n_updates            | 10140      |
|    policy_gradient_loss | -0.115     |
|    std                  | 0.156      |
|    value_loss           | 0.000466   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 509        |
|    time_elapsed         | 8938       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 519        |
|    time_elapsed         | 9050       |
|    total_timesteps      | 132864     |
| train/                  |            |
|    approx_kl            | 0.07548567 |
|    clip_fraction        | 0.801      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.3       |
|    explained_variance   | 0.962      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.117     |
|    n_updates            | 10360      |
|    policy_gradient_loss | -0.118     |
|    std                  | 0.156      |
|    value_loss           | 0.0006     |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 520        |
|    time_elapsed         | 9061       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 530        |
|    time_elapsed         | 9220       |
|    total_timesteps      | 135680     |
| train/                  |            |
|    approx_kl            | 0.29311335 |
|    clip_fraction        | 0.8        |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.2       |
|    explained_variance   | 0.961      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0684    |
|    n_updates            | 10580      |
|    policy_gradient_loss | -0.0987    |
|    std                  | 0.156      |
|    value_loss           | 0.00095    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 531        |
|    time_elapsed         | 9232       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 541        |
|    time_elapsed         | 9344       |
|    total_timesteps      | 138496     |
| train/                  |            |
|    approx_kl            | 0.19868484 |
|    clip_fraction        | 0.827      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27.2       |
|    explained_variance   | 0.934      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.08      |
|    n_updates            | 10800      |
|    policy_gradient_loss | -0.113     |
|    std                  | 0.156      |
|    value_loss           | 0.000682   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 542        |
|    time_elapsed         | 9355       |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 14        |
|    iterations           | 552       |
|    time_elapsed         | 9516      |
|    total_timesteps      | 141312    |
| train/                  |           |
|    approx_kl            | 0.1728884 |
|    clip_fraction        | 0.813     |
|    clip_range           | 0.2       |
|    entropy_loss         | 27        |
|    explained_variance   | 0.943     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.1      |
|    n_updates            | 11020     |
|    policy_gradient_loss | -0.117    |
|    std                  | 0.156     |
|    value_loss           | 0.000682  |
---------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 14        |
|    iterations           | 553       |
|    time_elapsed         | 9527      |
|    total_timesteps      | 141568    |


----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 563        |
|    time_elapsed         | 9640       |
|    total_timesteps      | 144128     |
| train/                  |            |
|    approx_kl            | 0.19168107 |
|    clip_fraction        | 0.813      |
|    clip_range           | 0.2        |
|    entropy_loss         | 27         |
|    explained_variance   | 0.943      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.124     |
|    n_updates            | 11240      |
|    policy_gradient_loss | -0.117     |
|    std                  | 0.157      |
|    value_loss           | 0.000899   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 14         |
|    iterations           | 564        |
|    time_elapsed         | 9651       |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 15        |
|    iterations           | 574       |
|    time_elapsed         | 9767      |
|    total_timesteps      | 146944    |
| train/                  |           |
|    approx_kl            | 0.1849665 |
|    clip_fraction        | 0.834     |
|    clip_range           | 0.2       |
|    entropy_loss         | 26.8      |
|    explained_variance   | 0.955     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.125    |
|    n_updates            | 11460     |
|    policy_gradient_loss | -0.121    |
|    std                  | 0.157     |
|    value_loss           | 0.000622  |
---------------------------------------
Eval num_timesteps=147200, episode_reward=0.38 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=147200, episode_reward=0.36 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------

---------------------------------------
| time/                   |           |
|    fps                  | 15        |
|    iterations           | 585       |
|    time_elapsed         | 9940      |
|    total_timesteps      | 149760    |
| train/                  |           |
|    approx_kl            | 0.3796449 |
|    clip_fraction        | 0.787     |
|    clip_range           | 0.2       |
|    entropy_loss         | 26.8      |
|    explained_variance   | 0.879     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.0371   |
|    n_updates            | 11680     |
|    policy_gradient_loss | -0.0951   |
|    std                  | 0.157     |
|    value_loss           | 0.00097   |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 586        |
|    time_elapsed         | 9951       |
|    total_timesteps      | 150016 

----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 596        |
|    time_elapsed         | 10063      |
|    total_timesteps      | 152576     |
| train/                  |            |
|    approx_kl            | 0.45118803 |
|    clip_fraction        | 0.843      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.7       |
|    explained_variance   | 0.757      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.124     |
|    n_updates            | 11900      |
|    policy_gradient_loss | -0.115     |
|    std                  | 0.157      |
|    value_loss           | 0.000815   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 597        |
|    time_elapsed         | 10074      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 607        |
|    time_elapsed         | 10236      |
|    total_timesteps      | 155392     |
| train/                  |            |
|    approx_kl            | 0.24484603 |
|    clip_fraction        | 0.812      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.6       |
|    explained_variance   | 0.935      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.129     |
|    n_updates            | 12120      |
|    policy_gradient_loss | -0.111     |
|    std                  | 0.158      |
|    value_loss           | 0.000448   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 608        |
|    time_elapsed         | 10247      |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 15        |
|    iterations           | 618       |
|    time_elapsed         | 10359     |
|    total_timesteps      | 158208    |
| train/                  |           |
|    approx_kl            | 0.1932354 |
|    clip_fraction        | 0.83      |
|    clip_range           | 0.2       |
|    entropy_loss         | 26.5      |
|    explained_variance   | 0.953     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.127    |
|    n_updates            | 12340     |
|    policy_gradient_loss | -0.121    |
|    std                  | 0.158     |
|    value_loss           | 0.000645  |
---------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 15        |
|    iterations           | 619       |
|    time_elapsed         | 10370     |
|    total_timesteps      | 158464    |


----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 629        |
|    time_elapsed         | 10529      |
|    total_timesteps      | 161024     |
| train/                  |            |
|    approx_kl            | 0.19267005 |
|    clip_fraction        | 0.82       |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.4       |
|    explained_variance   | 0.965      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0664    |
|    n_updates            | 12560      |
|    policy_gradient_loss | -0.113     |
|    std                  | 0.158      |
|    value_loss           | 0.000517   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 630        |
|    time_elapsed         | 10540      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 640        |
|    time_elapsed         | 10653      |
|    total_timesteps      | 163840     |
| train/                  |            |
|    approx_kl            | 0.23866138 |
|    clip_fraction        | 0.782      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.3       |
|    explained_variance   | 0.97       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0968    |
|    n_updates            | 12780      |
|    policy_gradient_loss | -0.0941    |
|    std                  | 0.158      |
|    value_loss           | 0.000378   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 15        |
|    iterations           | 641       |
|    time_elapsed         | 10664     |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 651        |
|    time_elapsed         | 10821      |
|    total_timesteps      | 166656     |
| train/                  |            |
|    approx_kl            | 0.28887594 |
|    clip_fraction        | 0.816      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.2       |
|    explained_variance   | 0.937      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0691    |
|    n_updates            | 13000      |
|    policy_gradient_loss | -0.112     |
|    std                  | 0.159      |
|    value_loss           | 0.000644   |
----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 15           |
|    iterations           | 652          |
|    time_elapsed         | 10832        |
|    t

-----------------------------------------
| time/                   |             |
|    fps                  | 15          |
|    iterations           | 662         |
|    time_elapsed         | 10943       |
|    total_timesteps      | 169472      |
| train/                  |             |
|    approx_kl            | 0.078801304 |
|    clip_fraction        | 0.817       |
|    clip_range           | 0.2         |
|    entropy_loss         | 26.1        |
|    explained_variance   | 0.949       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0923     |
|    n_updates            | 13220       |
|    policy_gradient_loss | -0.113      |
|    std                  | 0.159       |
|    value_loss           | 0.000401    |
-----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 15        |
|    iterations           | 663       |
|    time_elapsed         | 10954     |
| 

----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 673        |
|    time_elapsed         | 11068      |
|    total_timesteps      | 172288     |
| train/                  |            |
|    approx_kl            | 0.20781803 |
|    clip_fraction        | 0.837      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26.1       |
|    explained_variance   | 0.962      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.125     |
|    n_updates            | 13440      |
|    policy_gradient_loss | -0.121     |
|    std                  | 0.159      |
|    value_loss           | 0.00204    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 674        |
|    time_elapsed         | 11080      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 684        |
|    time_elapsed         | 11241      |
|    total_timesteps      | 175104     |
| train/                  |            |
|    approx_kl            | 0.24995928 |
|    clip_fraction        | 0.806      |
|    clip_range           | 0.2        |
|    entropy_loss         | 26         |
|    explained_variance   | 0.961      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.11      |
|    n_updates            | 13660      |
|    policy_gradient_loss | -0.104     |
|    std                  | 0.159      |
|    value_loss           | 0.000712   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 685        |
|    time_elapsed         | 11252      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 695        |
|    time_elapsed         | 11367      |
|    total_timesteps      | 177920     |
| train/                  |            |
|    approx_kl            | 0.13546139 |
|    clip_fraction        | 0.834      |
|    clip_range           | 0.2        |
|    entropy_loss         | 25.9       |
|    explained_variance   | 0.965      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.136     |
|    n_updates            | 13880      |
|    policy_gradient_loss | -0.127     |
|    std                  | 0.159      |
|    value_loss           | 0.000757   |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 15          |
|    iterations           | 696         |
|    time_elapsed         | 11378       |
|    total_

---------------------------------------
| time/                   |           |
|    fps                  | 15        |
|    iterations           | 706       |
|    time_elapsed         | 11541     |
|    total_timesteps      | 180736    |
| train/                  |           |
|    approx_kl            | 0.1300108 |
|    clip_fraction        | 0.828     |
|    clip_range           | 0.2       |
|    entropy_loss         | 25.8      |
|    explained_variance   | 0.952     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.051    |
|    n_updates            | 14100     |
|    policy_gradient_loss | -0.106    |
|    std                  | 0.16      |
|    value_loss           | 0.000903  |
---------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 15          |
|    iterations           | 707         |
|    time_elapsed         | 11552       |
|    total_timesteps      | 18

------------------------------------------
| time/                   |              |
|    fps                  | 15           |
|    iterations           | 717          |
|    time_elapsed         | 11666        |
|    total_timesteps      | 183552       |
| train/                  |              |
|    approx_kl            | -0.081610024 |
|    clip_fraction        | 0.801        |
|    clip_range           | 0.2          |
|    entropy_loss         | 25.7         |
|    explained_variance   | 0.838        |
|    learning_rate        | 1e-05        |
|    loss                 | -0.101       |
|    n_updates            | 14320        |
|    policy_gradient_loss | -0.118       |
|    std                  | 0.16         |
|    value_loss           | 0.00103      |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 718        |
|    time_elapsed  

----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 728        |
|    time_elapsed         | 11841      |
|    total_timesteps      | 186368     |
| train/                  |            |
|    approx_kl            | 0.15852812 |
|    clip_fraction        | 0.782      |
|    clip_range           | 0.2        |
|    entropy_loss         | 25.6       |
|    explained_variance   | 0.962      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.114     |
|    n_updates            | 14540      |
|    policy_gradient_loss | -0.109     |
|    std                  | 0.16       |
|    value_loss           | 0.00118    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 729        |
|    time_elapsed         | 11853      |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 15          |
|    iterations           | 739         |
|    time_elapsed         | 11973       |
|    total_timesteps      | 189184      |
| train/                  |             |
|    approx_kl            | 0.093927965 |
|    clip_fraction        | 0.812       |
|    clip_range           | 0.2         |
|    entropy_loss         | 25.5        |
|    explained_variance   | 0.979       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.135      |
|    n_updates            | 14760       |
|    policy_gradient_loss | -0.113      |
|    std                  | 0.16        |
|    value_loss           | 0.000435    |
-----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 15        |
|    iterations           | 740       |
|    time_elapsed         | 11985     |
| 

Eval num_timesteps=192000, episode_reward=0.38 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=192000, episode_reward=0.36 +/- 0.00
Episode length: 5.00 +/- 0.00
--------------------------------------
| eval/                   |          |
|    mean_ep_length       | 5        |
|    mean_reward          | 0.363    |
| time/                   |          |
|    fps                  | 15       |
|    iterations           | 750      |
|    time_elapsed         | 12145    |
|    total_timesteps      | 192000   |
| train/                  |          |
|    approx_kl            | 0.281761 |
|    clip_fraction        | 0.809    |
|    clip_range           | 0.2      |
|    entropy_loss         | 25.4     |
|    explained_variance   | 0.958    |
|    learning_rate        | 1e-05    |
|    loss                 | -0.109   |
|    n_updates            | 14980    |
|    policy_gradient_loss | -0.105   |
|    std                  | 0.161    |
|    value_loss           | 0.000776 |
---------

----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 761        |
|    time_elapsed         | 12272      |
|    total_timesteps      | 194816     |
| train/                  |            |
|    approx_kl            | 0.15151633 |
|    clip_fraction        | 0.83       |
|    clip_range           | 0.2        |
|    entropy_loss         | 25.3       |
|    explained_variance   | 0.924      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.101     |
|    n_updates            | 15200      |
|    policy_gradient_loss | -0.112     |
|    std                  | 0.161      |
|    value_loss           | 0.000492   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 762        |
|    time_elapsed         | 12284      |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 15          |
|    iterations           | 772         |
|    time_elapsed         | 12398       |
|    total_timesteps      | 197632      |
| train/                  |             |
|    approx_kl            | 0.083433226 |
|    clip_fraction        | 0.779       |
|    clip_range           | 0.2         |
|    entropy_loss         | 25.2        |
|    explained_variance   | 0.969       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.117      |
|    n_updates            | 15420       |
|    policy_gradient_loss | -0.0968     |
|    std                  | 0.161       |
|    value_loss           | 0.000291    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 773        |
|    time_elapsed         | 12410     

---------------------------------------
| time/                   |           |
|    fps                  | 15        |
|    iterations           | 783       |
|    time_elapsed         | 12572     |
|    total_timesteps      | 200448    |
| train/                  |           |
|    approx_kl            | 0.1865316 |
|    clip_fraction        | 0.788     |
|    clip_range           | 0.2       |
|    entropy_loss         | 25.1      |
|    explained_variance   | 0.932     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.0952   |
|    n_updates            | 15640     |
|    policy_gradient_loss | -0.0978   |
|    std                  | 0.162     |
|    value_loss           | 0.000547  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 15         |
|    iterations           | 784        |
|    time_elapsed         | 12584      |
|    total_timesteps      | 200704 

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 794        |
|    time_elapsed         | 12702      |
|    total_timesteps      | 203264     |
| train/                  |            |
|    approx_kl            | 0.20644969 |
|    clip_fraction        | 0.825      |
|    clip_range           | 0.2        |
|    entropy_loss         | 25         |
|    explained_variance   | 0.976      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0982    |
|    n_updates            | 15860      |
|    policy_gradient_loss | -0.115     |
|    std                  | 0.162      |
|    value_loss           | 0.000403   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 795        |
|    time_elapsed         | 12714      |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 15        |
|    iterations           | 805       |
|    time_elapsed         | 12882     |
|    total_timesteps      | 206080    |
| train/                  |           |
|    approx_kl            | 0.1613608 |
|    clip_fraction        | 0.802     |
|    clip_range           | 0.2       |
|    entropy_loss         | 24.9      |
|    explained_variance   | 0.986     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.104    |
|    n_updates            | 16080     |
|    policy_gradient_loss | -0.112    |
|    std                  | 0.162     |
|    value_loss           | 0.000263  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 806        |
|    time_elapsed         | 12894      |
|    total_timesteps      | 206336 

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 816        |
|    time_elapsed         | 13009      |
|    total_timesteps      | 208896     |
| train/                  |            |
|    approx_kl            | 0.22569011 |
|    clip_fraction        | 0.804      |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.8       |
|    explained_variance   | 0.936      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.00759   |
|    n_updates            | 16300      |
|    policy_gradient_loss | -0.101     |
|    std                  | 0.162      |
|    value_loss           | 0.000402   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 817        |
|    time_elapsed         | 13020      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 827        |
|    time_elapsed         | 13185      |
|    total_timesteps      | 211712     |
| train/                  |            |
|    approx_kl            | 0.22347122 |
|    clip_fraction        | 0.82       |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.6       |
|    explained_variance   | 0.954      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0558    |
|    n_updates            | 16520      |
|    policy_gradient_loss | -0.116     |
|    std                  | 0.163      |
|    value_loss           | 0.000425   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 828        |
|    time_elapsed         | 13196      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 838        |
|    time_elapsed         | 13311      |
|    total_timesteps      | 214528     |
| train/                  |            |
|    approx_kl            | 0.24062017 |
|    clip_fraction        | 0.806      |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.6       |
|    explained_variance   | 0.963      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.115     |
|    n_updates            | 16740      |
|    policy_gradient_loss | -0.107     |
|    std                  | 0.163      |
|    value_loss           | 0.000385   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 839        |
|    time_elapsed         | 13323      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 849        |
|    time_elapsed         | 13437      |
|    total_timesteps      | 217344     |
| train/                  |            |
|    approx_kl            | 0.36705363 |
|    clip_fraction        | 0.804      |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.5       |
|    explained_variance   | 0.799      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0631    |
|    n_updates            | 16960      |
|    policy_gradient_loss | -0.0986    |
|    std                  | 0.163      |
|    value_loss           | 0.00278    |
----------------------------------------
Eval num_timesteps=217600, episode_reward=0.38 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=217600, episode_reward=0.37 +/- 0.00
Episode length: 5.00 +/- 0.00
----------------------------------------
| eval/ 

-----------------------------------------
| time/                   |             |
|    fps                  | 16          |
|    iterations           | 860         |
|    time_elapsed         | 13616       |
|    total_timesteps      | 220160      |
| train/                  |             |
|    approx_kl            | 0.113164894 |
|    clip_fraction        | 0.838       |
|    clip_range           | 0.2         |
|    entropy_loss         | 24.4        |
|    explained_variance   | 0.979       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0852     |
|    n_updates            | 17180       |
|    policy_gradient_loss | -0.123      |
|    std                  | 0.163       |
|    value_loss           | 0.000258    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 861        |
|    time_elapsed         | 13628     

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 871        |
|    time_elapsed         | 13745      |
|    total_timesteps      | 222976     |
| train/                  |            |
|    approx_kl            | 0.21067268 |
|    clip_fraction        | 0.824      |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.2       |
|    explained_variance   | 0.974      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.107     |
|    n_updates            | 17400      |
|    policy_gradient_loss | -0.115     |
|    std                  | 0.164      |
|    value_loss           | 0.000277   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 872        |
|    time_elapsed         | 13757      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 882        |
|    time_elapsed         | 13922      |
|    total_timesteps      | 225792     |
| train/                  |            |
|    approx_kl            | 0.25091207 |
|    clip_fraction        | 0.84       |
|    clip_range           | 0.2        |
|    entropy_loss         | 24.1       |
|    explained_variance   | 0.919      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.134     |
|    n_updates            | 17620      |
|    policy_gradient_loss | -0.12      |
|    std                  | 0.164      |
|    value_loss           | 0.000719   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 16        |
|    iterations           | 883       |
|    time_elapsed         | 13934     |
|    total_timesteps 

--------------------------------------
| time/                   |          |
|    fps                  | 16       |
|    iterations           | 893      |
|    time_elapsed         | 14054    |
|    total_timesteps      | 228608   |
| train/                  |          |
|    approx_kl            | 0.323188 |
|    clip_fraction        | 0.83     |
|    clip_range           | 0.2      |
|    entropy_loss         | 24       |
|    explained_variance   | 0.944    |
|    learning_rate        | 1e-05    |
|    loss                 | -0.0815  |
|    n_updates            | 17840    |
|    policy_gradient_loss | -0.115   |
|    std                  | 0.164    |
|    value_loss           | 0.000579 |
--------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 894        |
|    time_elapsed         | 14066      |
|    total_timesteps      | 228864     |
| train/     

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 904        |
|    time_elapsed         | 14231      |
|    total_timesteps      | 231424     |
| train/                  |            |
|    approx_kl            | 0.17198205 |
|    clip_fraction        | 0.799      |
|    clip_range           | 0.2        |
|    entropy_loss         | 23.9       |
|    explained_variance   | 0.987      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.111     |
|    n_updates            | 18060      |
|    policy_gradient_loss | -0.109     |
|    std                  | 0.165      |
|    value_loss           | 0.000297   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 905        |
|    time_elapsed         | 14243      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 915        |
|    time_elapsed         | 14363      |
|    total_timesteps      | 234240     |
| train/                  |            |
|    approx_kl            | 0.10272388 |
|    clip_fraction        | 0.832      |
|    clip_range           | 0.2        |
|    entropy_loss         | 23.8       |
|    explained_variance   | 0.968      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.134     |
|    n_updates            | 18280      |
|    policy_gradient_loss | -0.121     |
|    std                  | 0.165      |
|    value_loss           | 0.000377   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 916        |
|    time_elapsed         | 14374      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 926        |
|    time_elapsed         | 14537      |
|    total_timesteps      | 237056     |
| train/                  |            |
|    approx_kl            | 0.23158191 |
|    clip_fraction        | 0.804      |
|    clip_range           | 0.2        |
|    entropy_loss         | 23.7       |
|    explained_variance   | 0.96       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0832    |
|    n_updates            | 18500      |
|    policy_gradient_loss | -0.105     |
|    std                  | 0.165      |
|    value_loss           | 0.000267   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 927        |
|    time_elapsed         | 14548      |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 16        |
|    iterations           | 937       |
|    time_elapsed         | 14662     |
|    total_timesteps      | 239872    |
| train/                  |           |
|    approx_kl            | 0.2383406 |
|    clip_fraction        | 0.795     |
|    clip_range           | 0.2       |
|    entropy_loss         | 23.6      |
|    explained_variance   | 0.954     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.107    |
|    n_updates            | 18720     |
|    policy_gradient_loss | -0.106    |
|    std                  | 0.165     |
|    value_loss           | 0.000317  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 938        |
|    time_elapsed         | 14674      |
|    total_timesteps      | 240128 

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 948        |
|    time_elapsed         | 14791      |
|    total_timesteps      | 242688     |
| train/                  |            |
|    approx_kl            | 0.24885836 |
|    clip_fraction        | 0.811      |
|    clip_range           | 0.2        |
|    entropy_loss         | 23.5       |
|    explained_variance   | 0.97       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.107     |
|    n_updates            | 18940      |
|    policy_gradient_loss | -0.118     |
|    std                  | 0.166      |
|    value_loss           | 0.000381   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 949        |
|    time_elapsed         | 14803      |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 16          |
|    iterations           | 959         |
|    time_elapsed         | 14969       |
|    total_timesteps      | 245504      |
| train/                  |             |
|    approx_kl            | 0.046179127 |
|    clip_fraction        | 0.795       |
|    clip_range           | 0.2         |
|    entropy_loss         | 23.3        |
|    explained_variance   | 0.966       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0776     |
|    n_updates            | 19160       |
|    policy_gradient_loss | -0.105      |
|    std                  | 0.166       |
|    value_loss           | 0.000521    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 960        |
|    time_elapsed         | 14981     

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 970        |
|    time_elapsed         | 15100      |
|    total_timesteps      | 248320     |
| train/                  |            |
|    approx_kl            | 0.18550178 |
|    clip_fraction        | 0.814      |
|    clip_range           | 0.2        |
|    entropy_loss         | 23.2       |
|    explained_variance   | 0.967      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.111     |
|    n_updates            | 19380      |
|    policy_gradient_loss | -0.118     |
|    std                  | 0.166      |
|    value_loss           | 0.000395   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 971        |
|    time_elapsed         | 15111      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 981        |
|    time_elapsed         | 15271      |
|    total_timesteps      | 251136     |
| train/                  |            |
|    approx_kl            | 0.31066686 |
|    clip_fraction        | 0.826      |
|    clip_range           | 0.2        |
|    entropy_loss         | 23.2       |
|    explained_variance   | 0.9        |
|    learning_rate        | 1e-05      |
|    loss                 | -0.106     |
|    n_updates            | 19600      |
|    policy_gradient_loss | -0.102     |
|    std                  | 0.167      |
|    value_loss           | 0.00046    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 982        |
|    time_elapsed         | 15283      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 992        |
|    time_elapsed         | 15401      |
|    total_timesteps      | 253952     |
| train/                  |            |
|    approx_kl            | 0.16973855 |
|    clip_fraction        | 0.837      |
|    clip_range           | 0.2        |
|    entropy_loss         | 23         |
|    explained_variance   | 0.958      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.128     |
|    n_updates            | 19820      |
|    policy_gradient_loss | -0.12      |
|    std                  | 0.167      |
|    value_loss           | 0.000386   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 993        |
|    time_elapsed         | 15412      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1003       |
|    time_elapsed         | 15572      |
|    total_timesteps      | 256768     |
| train/                  |            |
|    approx_kl            | 0.17408827 |
|    clip_fraction        | 0.84       |
|    clip_range           | 0.2        |
|    entropy_loss         | 22.9       |
|    explained_variance   | 0.949      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.13      |
|    n_updates            | 20040      |
|    policy_gradient_loss | -0.124     |
|    std                  | 0.167      |
|    value_loss           | 0.000962   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1004       |
|    time_elapsed         | 15583      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1014       |
|    time_elapsed         | 15694      |
|    total_timesteps      | 259584     |
| train/                  |            |
|    approx_kl            | 0.19950664 |
|    clip_fraction        | 0.818      |
|    clip_range           | 0.2        |
|    entropy_loss         | 22.8       |
|    explained_variance   | 0.969      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0863    |
|    n_updates            | 20260      |
|    policy_gradient_loss | -0.115     |
|    std                  | 0.168      |
|    value_loss           | 0.000466   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1015       |
|    time_elapsed         | 15706      |
|    total_times

Eval num_timesteps=262400, episode_reward=0.38 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=262400, episode_reward=0.37 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.371       |
| time/                   |             |
|    fps                  | 16          |
|    iterations           | 1025        |
|    time_elapsed         | 15871       |
|    total_timesteps      | 262400      |
| train/                  |             |
|    approx_kl            | 0.084383994 |
|    clip_fraction        | 0.809       |
|    clip_range           | 0.2         |
|    entropy_loss         | 22.7        |
|    explained_variance   | 0.973       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.113      |
|    n_updates            | 20480       |
|    policy_gradient_loss | -0.116      |
|    std                  | 0.

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1036       |
|    time_elapsed         | 15992      |
|    total_timesteps      | 265216     |
| train/                  |            |
|    approx_kl            | 0.19454777 |
|    clip_fraction        | 0.821      |
|    clip_range           | 0.2        |
|    entropy_loss         | 22.6       |
|    explained_variance   | 0.949      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.111     |
|    n_updates            | 20700      |
|    policy_gradient_loss | -0.101     |
|    std                  | 0.168      |
|    value_loss           | 0.000281   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1037       |
|    time_elapsed         | 16002      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1047       |
|    time_elapsed         | 16116      |
|    total_timesteps      | 268032     |
| train/                  |            |
|    approx_kl            | 0.17376232 |
|    clip_fraction        | 0.809      |
|    clip_range           | 0.2        |
|    entropy_loss         | 22.5       |
|    explained_variance   | 0.95       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0536    |
|    n_updates            | 20920      |
|    policy_gradient_loss | -0.11      |
|    std                  | 0.168      |
|    value_loss           | 0.000636   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1048       |
|    time_elapsed         | 16128      |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 16          |
|    iterations           | 1058        |
|    time_elapsed         | 16295       |
|    total_timesteps      | 270848      |
| train/                  |             |
|    approx_kl            | 0.115275726 |
|    clip_fraction        | 0.816       |
|    clip_range           | 0.2         |
|    entropy_loss         | 22.4        |
|    explained_variance   | 0.975       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0897     |
|    n_updates            | 21140       |
|    policy_gradient_loss | -0.111      |
|    std                  | 0.169       |
|    value_loss           | 0.000543    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1059       |
|    time_elapsed         | 16308     

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1069       |
|    time_elapsed         | 16420      |
|    total_timesteps      | 273664     |
| train/                  |            |
|    approx_kl            | 0.07097697 |
|    clip_fraction        | 0.812      |
|    clip_range           | 0.2        |
|    entropy_loss         | 22.3       |
|    explained_variance   | 0.967      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0979    |
|    n_updates            | 21360      |
|    policy_gradient_loss | -0.113     |
|    std                  | 0.169      |
|    value_loss           | 0.000523   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 16        |
|    iterations           | 1070      |
|    time_elapsed         | 16431     |
|    total_timesteps 

-----------------------------------------
| time/                   |             |
|    fps                  | 16          |
|    iterations           | 1080        |
|    time_elapsed         | 16599       |
|    total_timesteps      | 276480      |
| train/                  |             |
|    approx_kl            | 0.036416516 |
|    clip_fraction        | 0.826       |
|    clip_range           | 0.2         |
|    entropy_loss         | 22.2        |
|    explained_variance   | 0.972       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.114      |
|    n_updates            | 21580       |
|    policy_gradient_loss | -0.113      |
|    std                  | 0.169       |
|    value_loss           | 0.000444    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1081       |
|    time_elapsed         | 16610     

-----------------------------------------
| time/                   |             |
|    fps                  | 16          |
|    iterations           | 1091        |
|    time_elapsed         | 16731       |
|    total_timesteps      | 279296      |
| train/                  |             |
|    approx_kl            | 0.009416059 |
|    clip_fraction        | 0.822       |
|    clip_range           | 0.2         |
|    entropy_loss         | 22.1        |
|    explained_variance   | 0.947       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.128      |
|    n_updates            | 21800       |
|    policy_gradient_loss | -0.112      |
|    std                  | 0.169       |
|    value_loss           | 0.000696    |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 16          |
|    iterations           | 1092        |
|    time_elapsed         | 16743 

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1102       |
|    time_elapsed         | 16904      |
|    total_timesteps      | 282112     |
| train/                  |            |
|    approx_kl            | 0.13371006 |
|    clip_fraction        | 0.842      |
|    clip_range           | 0.2        |
|    entropy_loss         | 22         |
|    explained_variance   | 0.969      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.143     |
|    n_updates            | 22020      |
|    policy_gradient_loss | -0.122     |
|    std                  | 0.17       |
|    value_loss           | 0.000552   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1103       |
|    time_elapsed         | 16916      |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 16        |
|    iterations           | 1113      |
|    time_elapsed         | 17034     |
|    total_timesteps      | 284928    |
| train/                  |           |
|    approx_kl            | 0.2193568 |
|    clip_fraction        | 0.829     |
|    clip_range           | 0.2       |
|    entropy_loss         | 21.9      |
|    explained_variance   | 0.946     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.126    |
|    n_updates            | 22240     |
|    policy_gradient_loss | -0.113    |
|    std                  | 0.17      |
|    value_loss           | 0.000842  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1114       |
|    time_elapsed         | 17046      |
|    total_timesteps      | 285184 

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1124       |
|    time_elapsed         | 17164      |
|    total_timesteps      | 287744     |
| train/                  |            |
|    approx_kl            | 0.16355824 |
|    clip_fraction        | 0.815      |
|    clip_range           | 0.2        |
|    entropy_loss         | 21.8       |
|    explained_variance   | 0.98       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.09      |
|    n_updates            | 22460      |
|    policy_gradient_loss | -0.114     |
|    std                  | 0.17       |
|    value_loss           | 0.000385   |
----------------------------------------
Eval num_timesteps=288000, episode_reward=0.38 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=288000, episode_reward=0.37 +/- 0.00
Episode length: 5.00 +/- 0.00
----------------------------------------
| eval/ 

---------------------------------------
| time/                   |           |
|    fps                  | 16        |
|    iterations           | 1135      |
|    time_elapsed         | 17334     |
|    total_timesteps      | 290560    |
| train/                  |           |
|    approx_kl            | 0.1623109 |
|    clip_fraction        | 0.813     |
|    clip_range           | 0.2       |
|    entropy_loss         | 21.7      |
|    explained_variance   | 0.975     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.0757   |
|    n_updates            | 22680     |
|    policy_gradient_loss | -0.109    |
|    std                  | 0.171     |
|    value_loss           | 0.000312  |
---------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 16          |
|    iterations           | 1136        |
|    time_elapsed         | 17345       |
|    total_timesteps      | 29

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1146       |
|    time_elapsed         | 17459      |
|    total_timesteps      | 293376     |
| train/                  |            |
|    approx_kl            | 0.24157967 |
|    clip_fraction        | 0.815      |
|    clip_range           | 0.2        |
|    entropy_loss         | 21.6       |
|    explained_variance   | 0.908      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0829    |
|    n_updates            | 22900      |
|    policy_gradient_loss | -0.102     |
|    std                  | 0.171      |
|    value_loss           | 0.000324   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1147       |
|    time_elapsed         | 17471      |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 16        |
|    iterations           | 1157      |
|    time_elapsed         | 17631     |
|    total_timesteps      | 296192    |
| train/                  |           |
|    approx_kl            | 0.2856367 |
|    clip_fraction        | 0.8       |
|    clip_range           | 0.2       |
|    entropy_loss         | 21.5      |
|    explained_variance   | 0.949     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.121    |
|    n_updates            | 23120     |
|    policy_gradient_loss | -0.101    |
|    std                  | 0.171     |
|    value_loss           | 0.000393  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1158       |
|    time_elapsed         | 17643      |
|    total_timesteps      | 296448 

-----------------------------------------
| time/                   |             |
|    fps                  | 16          |
|    iterations           | 1168        |
|    time_elapsed         | 17761       |
|    total_timesteps      | 299008      |
| train/                  |             |
|    approx_kl            | 0.024664745 |
|    clip_fraction        | 0.822       |
|    clip_range           | 0.2         |
|    entropy_loss         | 21.4        |
|    explained_variance   | 0.941       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0287     |
|    n_updates            | 23340       |
|    policy_gradient_loss | -0.126      |
|    std                  | 0.171       |
|    value_loss           | 0.000711    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1169       |
|    time_elapsed         | 17772     

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1179       |
|    time_elapsed         | 17941      |
|    total_timesteps      | 301824     |
| train/                  |            |
|    approx_kl            | 0.14333174 |
|    clip_fraction        | 0.837      |
|    clip_range           | 0.2        |
|    entropy_loss         | 21.3       |
|    explained_variance   | 0.972      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.108     |
|    n_updates            | 23560      |
|    policy_gradient_loss | -0.115     |
|    std                  | 0.172      |
|    value_loss           | 0.000291   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1180       |
|    time_elapsed         | 17953      |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 16        |
|    iterations           | 1190      |
|    time_elapsed         | 18069     |
|    total_timesteps      | 304640    |
| train/                  |           |
|    approx_kl            | 0.3187293 |
|    clip_fraction        | 0.815     |
|    clip_range           | 0.2       |
|    entropy_loss         | 21.1      |
|    explained_variance   | 0.952     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.114    |
|    n_updates            | 23780     |
|    policy_gradient_loss | -0.11     |
|    std                  | 0.172     |
|    value_loss           | 0.000451  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1191       |
|    time_elapsed         | 18080      |
|    total_timesteps      | 304896 

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1201       |
|    time_elapsed         | 18246      |
|    total_timesteps      | 307456     |
| train/                  |            |
|    approx_kl            | 0.26889086 |
|    clip_fraction        | 0.827      |
|    clip_range           | 0.2        |
|    entropy_loss         | 21         |
|    explained_variance   | 0.929      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.111     |
|    n_updates            | 24000      |
|    policy_gradient_loss | -0.103     |
|    std                  | 0.172      |
|    value_loss           | 0.000361   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1202       |
|    time_elapsed         | 18258      |
|    total_times

---------------------------------------
| time/                   |           |
|    fps                  | 16        |
|    iterations           | 1212      |
|    time_elapsed         | 18377     |
|    total_timesteps      | 310272    |
| train/                  |           |
|    approx_kl            | 0.1641128 |
|    clip_fraction        | 0.828     |
|    clip_range           | 0.2       |
|    entropy_loss         | 20.9      |
|    explained_variance   | 0.973     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.0964   |
|    n_updates            | 24220     |
|    policy_gradient_loss | -0.118    |
|    std                  | 0.173     |
|    value_loss           | 0.000306  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1213       |
|    time_elapsed         | 18389      |
|    total_timesteps      | 310528 

---------------------------------------
| time/                   |           |
|    fps                  | 16        |
|    iterations           | 1223      |
|    time_elapsed         | 18508     |
|    total_timesteps      | 313088    |
| train/                  |           |
|    approx_kl            | 0.0682466 |
|    clip_fraction        | 0.795     |
|    clip_range           | 0.2       |
|    entropy_loss         | 20.8      |
|    explained_variance   | 0.965     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.129    |
|    n_updates            | 24440     |
|    policy_gradient_loss | -0.113    |
|    std                  | 0.173     |
|    value_loss           | 0.000418  |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1224       |
|    time_elapsed         | 18520      |
|    total_timesteps      | 313344 

-----------------------------------------
| time/                   |             |
|    fps                  | 16          |
|    iterations           | 1234        |
|    time_elapsed         | 18690       |
|    total_timesteps      | 315904      |
| train/                  |             |
|    approx_kl            | 0.108974695 |
|    clip_fraction        | 0.829       |
|    clip_range           | 0.2         |
|    entropy_loss         | 20.6        |
|    explained_variance   | 0.974       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.124      |
|    n_updates            | 24660       |
|    policy_gradient_loss | -0.117      |
|    std                  | 0.173       |
|    value_loss           | 0.000323    |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1235       |
|    time_elapsed         | 18701     

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1245       |
|    time_elapsed         | 18821      |
|    total_timesteps      | 318720     |
| train/                  |            |
|    approx_kl            | 0.18660995 |
|    clip_fraction        | 0.82       |
|    clip_range           | 0.2        |
|    entropy_loss         | 20.5       |
|    explained_variance   | 0.979      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0916    |
|    n_updates            | 24880      |
|    policy_gradient_loss | -0.116     |
|    std                  | 0.174      |
|    value_loss           | 0.000314   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 16        |
|    iterations           | 1246      |
|    time_elapsed         | 18832     |
|    total_timesteps 

-----------------------------------------
| time/                   |             |
|    fps                  | 16          |
|    iterations           | 1256        |
|    time_elapsed         | 18997       |
|    total_timesteps      | 321536      |
| train/                  |             |
|    approx_kl            | 0.111630775 |
|    clip_fraction        | 0.824       |
|    clip_range           | 0.2         |
|    entropy_loss         | 20.4        |
|    explained_variance   | 0.956       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0811     |
|    n_updates            | 25100       |
|    policy_gradient_loss | -0.106      |
|    std                  | 0.174       |
|    value_loss           | 0.00034     |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1257       |
|    time_elapsed         | 19009     

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1267       |
|    time_elapsed         | 19130      |
|    total_timesteps      | 324352     |
| train/                  |            |
|    approx_kl            | 0.08659922 |
|    clip_fraction        | 0.839      |
|    clip_range           | 0.2        |
|    entropy_loss         | 20.3       |
|    explained_variance   | 0.975      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.128     |
|    n_updates            | 25320      |
|    policy_gradient_loss | -0.12      |
|    std                  | 0.175      |
|    value_loss           | 0.000229   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1268       |
|    time_elapsed         | 19142      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1278       |
|    time_elapsed         | 19309      |
|    total_timesteps      | 327168     |
| train/                  |            |
|    approx_kl            | 0.13576329 |
|    clip_fraction        | 0.817      |
|    clip_range           | 0.2        |
|    entropy_loss         | 20.2       |
|    explained_variance   | 0.978      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0715    |
|    n_updates            | 25540      |
|    policy_gradient_loss | -0.111     |
|    std                  | 0.175      |
|    value_loss           | 0.000269   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1279       |
|    time_elapsed         | 19322      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1289       |
|    time_elapsed         | 19442      |
|    total_timesteps      | 329984     |
| train/                  |            |
|    approx_kl            | 0.19346665 |
|    clip_fraction        | 0.807      |
|    clip_range           | 0.2        |
|    entropy_loss         | 20         |
|    explained_variance   | 0.978      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.115     |
|    n_updates            | 25760      |
|    policy_gradient_loss | -0.11      |
|    std                  | 0.175      |
|    value_loss           | 0.000403   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1290       |
|    time_elapsed         | 19454      |
|    total_times

Eval num_timesteps=332800, episode_reward=0.38 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=332800, episode_reward=0.38 +/- 0.00
Episode length: 5.00 +/- 0.00
---------------------------------------
| eval/                   |           |
|    mean_ep_length       | 5         |
|    mean_reward          | 0.375     |
| time/                   |           |
|    fps                  | 16        |
|    iterations           | 1300      |
|    time_elapsed         | 19623     |
|    total_timesteps      | 332800    |
| train/                  |           |
|    approx_kl            | 0.2254889 |
|    clip_fraction        | 0.797     |
|    clip_range           | 0.2       |
|    entropy_loss         | 19.9      |
|    explained_variance   | 0.979     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.0996   |
|    n_updates            | 25980     |
|    policy_gradient_loss | -0.107    |
|    std                  | 0.176     |
|    value_loss           | 

----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1311       |
|    time_elapsed         | 19753      |
|    total_timesteps      | 335616     |
| train/                  |            |
|    approx_kl            | 0.21386227 |
|    clip_fraction        | 0.845      |
|    clip_range           | 0.2        |
|    entropy_loss         | 19.8       |
|    explained_variance   | 0.957      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.123     |
|    n_updates            | 26200      |
|    policy_gradient_loss | -0.116     |
|    std                  | 0.176      |
|    value_loss           | 0.000305   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 16         |
|    iterations           | 1312       |
|    time_elapsed         | 19765      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1322       |
|    time_elapsed         | 19886      |
|    total_timesteps      | 338432     |
| train/                  |            |
|    approx_kl            | 0.09272714 |
|    clip_fraction        | 0.791      |
|    clip_range           | 0.2        |
|    entropy_loss         | 19.7       |
|    explained_variance   | 0.929      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0564    |
|    n_updates            | 26420      |
|    policy_gradient_loss | -0.109     |
|    std                  | 0.176      |
|    value_loss           | 0.000427   |
----------------------------------------
--------------------------------------
| time/                   |          |
|    fps                  | 17       |
|    iterations           | 1323     |
|    time_elapsed         | 19899    |
|    total_timesteps      

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1333       |
|    time_elapsed         | 20062      |
|    total_timesteps      | 341248     |
| train/                  |            |
|    approx_kl            | 0.10274041 |
|    clip_fraction        | 0.804      |
|    clip_range           | 0.2        |
|    entropy_loss         | 19.6       |
|    explained_variance   | 0.975      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0857    |
|    n_updates            | 26640      |
|    policy_gradient_loss | -0.115     |
|    std                  | 0.177      |
|    value_loss           | 0.000297   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1334       |
|    time_elapsed         | 20074      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1344       |
|    time_elapsed         | 20189      |
|    total_timesteps      | 344064     |
| train/                  |            |
|    approx_kl            | 0.18954909 |
|    clip_fraction        | 0.832      |
|    clip_range           | 0.2        |
|    entropy_loss         | 19.4       |
|    explained_variance   | 0.974      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.119     |
|    n_updates            | 26860      |
|    policy_gradient_loss | -0.119     |
|    std                  | 0.177      |
|    value_loss           | 0.000416   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1345       |
|    time_elapsed         | 20201      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1355       |
|    time_elapsed         | 20367      |
|    total_timesteps      | 346880     |
| train/                  |            |
|    approx_kl            | 0.13666503 |
|    clip_fraction        | 0.795      |
|    clip_range           | 0.2        |
|    entropy_loss         | 19.3       |
|    explained_variance   | 0.974      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0143    |
|    n_updates            | 27080      |
|    policy_gradient_loss | -0.11      |
|    std                  | 0.177      |
|    value_loss           | 0.000296   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1356       |
|    time_elapsed         | 20378      |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 17          |
|    iterations           | 1366        |
|    time_elapsed         | 20497       |
|    total_timesteps      | 349696      |
| train/                  |             |
|    approx_kl            | 0.063927256 |
|    clip_fraction        | 0.823       |
|    clip_range           | 0.2         |
|    entropy_loss         | 19.2        |
|    explained_variance   | 0.976       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.112      |
|    n_updates            | 27300       |
|    policy_gradient_loss | -0.106      |
|    std                  | 0.178       |
|    value_loss           | 0.000212    |
-----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 17        |
|    iterations           | 1367      |
|    time_elapsed         | 20509     |
| 

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1377       |
|    time_elapsed         | 20672      |
|    total_timesteps      | 352512     |
| train/                  |            |
|    approx_kl            | 0.21730463 |
|    clip_fraction        | 0.835      |
|    clip_range           | 0.2        |
|    entropy_loss         | 19.1       |
|    explained_variance   | 0.968      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0788    |
|    n_updates            | 27520      |
|    policy_gradient_loss | -0.119     |
|    std                  | 0.178      |
|    value_loss           | 0.000367   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1378       |
|    time_elapsed         | 20683      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1388       |
|    time_elapsed         | 20798      |
|    total_timesteps      | 355328     |
| train/                  |            |
|    approx_kl            | 0.18638077 |
|    clip_fraction        | 0.83       |
|    clip_range           | 0.2        |
|    entropy_loss         | 19         |
|    explained_variance   | 0.951      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.146     |
|    n_updates            | 27740      |
|    policy_gradient_loss | -0.12      |
|    std                  | 0.178      |
|    value_loss           | 0.00056    |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 17        |
|    iterations           | 1389      |
|    time_elapsed         | 20810     |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1399       |
|    time_elapsed         | 20925      |
|    total_timesteps      | 358144     |
| train/                  |            |
|    approx_kl            | 0.24730667 |
|    clip_fraction        | 0.807      |
|    clip_range           | 0.2        |
|    entropy_loss         | 18.9       |
|    explained_variance   | 0.975      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.15      |
|    n_updates            | 27960      |
|    policy_gradient_loss | -0.11      |
|    std                  | 0.179      |
|    value_loss           | 0.000421   |
----------------------------------------
Eval num_timesteps=358400, episode_reward=0.38 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=358400, episode_reward=0.38 +/- 0.00
Episode length: 5.00 +/- 0.00
----------------------------------------
| eval/ 

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1410       |
|    time_elapsed         | 21101      |
|    total_timesteps      | 360960     |
| train/                  |            |
|    approx_kl            | 0.17195486 |
|    clip_fraction        | 0.781      |
|    clip_range           | 0.2        |
|    entropy_loss         | 18.8       |
|    explained_variance   | 0.966      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0775    |
|    n_updates            | 28180      |
|    policy_gradient_loss | -0.101     |
|    std                  | 0.179      |
|    value_loss           | 0.000349   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1411       |
|    time_elapsed         | 21112      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1421       |
|    time_elapsed         | 21226      |
|    total_timesteps      | 363776     |
| train/                  |            |
|    approx_kl            | 0.04235985 |
|    clip_fraction        | 0.812      |
|    clip_range           | 0.2        |
|    entropy_loss         | 18.7       |
|    explained_variance   | 0.967      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.128     |
|    n_updates            | 28400      |
|    policy_gradient_loss | -0.12      |
|    std                  | 0.179      |
|    value_loss           | 0.000269   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1422       |
|    time_elapsed         | 21237      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1432       |
|    time_elapsed         | 21399      |
|    total_timesteps      | 366592     |
| train/                  |            |
|    approx_kl            | 0.20990457 |
|    clip_fraction        | 0.823      |
|    clip_range           | 0.2        |
|    entropy_loss         | 18.6       |
|    explained_variance   | 0.969      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.155     |
|    n_updates            | 28620      |
|    policy_gradient_loss | -0.122     |
|    std                  | 0.179      |
|    value_loss           | 0.000234   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1433       |
|    time_elapsed         | 21411      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1443       |
|    time_elapsed         | 21525      |
|    total_timesteps      | 369408     |
| train/                  |            |
|    approx_kl            | 0.09427226 |
|    clip_fraction        | 0.809      |
|    clip_range           | 0.2        |
|    entropy_loss         | 18.4       |
|    explained_variance   | 0.972      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.125     |
|    n_updates            | 28840      |
|    policy_gradient_loss | -0.115     |
|    std                  | 0.18       |
|    value_loss           | 0.000302   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1444       |
|    time_elapsed         | 21536      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1454       |
|    time_elapsed         | 21703      |
|    total_timesteps      | 372224     |
| train/                  |            |
|    approx_kl            | 0.15165688 |
|    clip_fraction        | 0.801      |
|    clip_range           | 0.2        |
|    entropy_loss         | 18.3       |
|    explained_variance   | 0.984      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0758    |
|    n_updates            | 29060      |
|    policy_gradient_loss | -0.105     |
|    std                  | 0.18       |
|    value_loss           | 0.000236   |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 17          |
|    iterations           | 1455        |
|    time_elapsed         | 21714       |
|    total_

---------------------------------------
| time/                   |           |
|    fps                  | 17        |
|    iterations           | 1465      |
|    time_elapsed         | 21828     |
|    total_timesteps      | 375040    |
| train/                  |           |
|    approx_kl            | 0.2388404 |
|    clip_fraction        | 0.791     |
|    clip_range           | 0.2       |
|    entropy_loss         | 18.2      |
|    explained_variance   | 0.973     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.123    |
|    n_updates            | 29280     |
|    policy_gradient_loss | -0.103    |
|    std                  | 0.18      |
|    value_loss           | 0.000297  |
---------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 17          |
|    iterations           | 1466        |
|    time_elapsed         | 21840       |
|    total_timesteps      | 37

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1476       |
|    time_elapsed         | 22008      |
|    total_timesteps      | 377856     |
| train/                  |            |
|    approx_kl            | 0.03676594 |
|    clip_fraction        | 0.805      |
|    clip_range           | 0.2        |
|    entropy_loss         | 18.1       |
|    explained_variance   | 0.969      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.11      |
|    n_updates            | 29500      |
|    policy_gradient_loss | -0.109     |
|    std                  | 0.181      |
|    value_loss           | 0.000253   |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 17          |
|    iterations           | 1477        |
|    time_elapsed         | 22020       |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1487       |
|    time_elapsed         | 22140      |
|    total_timesteps      | 380672     |
| train/                  |            |
|    approx_kl            | 0.25888705 |
|    clip_fraction        | 0.791      |
|    clip_range           | 0.2        |
|    entropy_loss         | 17.9       |
|    explained_variance   | 0.92       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.146     |
|    n_updates            | 29720      |
|    policy_gradient_loss | -0.113     |
|    std                  | 0.181      |
|    value_loss           | 0.000435   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1488       |
|    time_elapsed         | 22152      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1498       |
|    time_elapsed         | 22272      |
|    total_timesteps      | 383488     |
| train/                  |            |
|    approx_kl            | 0.21501023 |
|    clip_fraction        | 0.817      |
|    clip_range           | 0.2        |
|    entropy_loss         | 17.8       |
|    explained_variance   | 0.967      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.128     |
|    n_updates            | 29940      |
|    policy_gradient_loss | -0.123     |
|    std                  | 0.182      |
|    value_loss           | 0.000424   |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 17        |
|    iterations           | 1499      |
|    time_elapsed         | 22285     |
|    total_timesteps 

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1509       |
|    time_elapsed         | 22455      |
|    total_timesteps      | 386304     |
| train/                  |            |
|    approx_kl            | 0.09270718 |
|    clip_fraction        | 0.812      |
|    clip_range           | 0.2        |
|    entropy_loss         | 17.7       |
|    explained_variance   | 0.979      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.131     |
|    n_updates            | 30160      |
|    policy_gradient_loss | -0.118     |
|    std                  | 0.182      |
|    value_loss           | 0.000264   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1510       |
|    time_elapsed         | 22468      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1520       |
|    time_elapsed         | 22589      |
|    total_timesteps      | 389120     |
| train/                  |            |
|    approx_kl            | 0.36965972 |
|    clip_fraction        | 0.787      |
|    clip_range           | 0.2        |
|    entropy_loss         | 17.6       |
|    explained_variance   | 0.956      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.111     |
|    n_updates            | 30380      |
|    policy_gradient_loss | -0.0973    |
|    std                  | 0.182      |
|    value_loss           | 0.000328   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1521       |
|    time_elapsed         | 22601      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1531       |
|    time_elapsed         | 22770      |
|    total_timesteps      | 391936     |
| train/                  |            |
|    approx_kl            | 0.26840097 |
|    clip_fraction        | 0.83       |
|    clip_range           | 0.2        |
|    entropy_loss         | 17.5       |
|    explained_variance   | 0.98       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0997    |
|    n_updates            | 30600      |
|    policy_gradient_loss | -0.111     |
|    std                  | 0.183      |
|    value_loss           | 0.000191   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1532       |
|    time_elapsed         | 22781      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1542       |
|    time_elapsed         | 22901      |
|    total_timesteps      | 394752     |
| train/                  |            |
|    approx_kl            | 0.07506109 |
|    clip_fraction        | 0.826      |
|    clip_range           | 0.2        |
|    entropy_loss         | 17.3       |
|    explained_variance   | 0.969      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0712    |
|    n_updates            | 30820      |
|    policy_gradient_loss | -0.123     |
|    std                  | 0.183      |
|    value_loss           | 0.000265   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1543       |
|    time_elapsed         | 22914      |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1553       |
|    time_elapsed         | 23079      |
|    total_timesteps      | 397568     |
| train/                  |            |
|    approx_kl            | 0.16724804 |
|    clip_fraction        | 0.817      |
|    clip_range           | 0.2        |
|    entropy_loss         | 17.2       |
|    explained_variance   | 0.973      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0837    |
|    n_updates            | 31040      |
|    policy_gradient_loss | -0.122     |
|    std                  | 0.183      |
|    value_loss           | 0.000328   |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 17         |
|    iterations           | 1554       |
|    time_elapsed         | 23091      |
|    total_times

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [9]:
env.close()

In [10]:
# action = [0.001, 1., 1., 0.48112956, 0.001, 0.001, 0.001, 1., 1., 0.001, 0.001, 0.001, 1., 1., 0.01334454, 0.77358127, 0.001, 0.001, 0.001, 1., 0.001, 0.001, 0.001, 0.001, 1.,    0.001, 1.,    1.,    1.,    0.001,  0.001, 1.,    1.,    0.001, 1.,  0.001, 0.001, 0.001, 0.001, 0.001, 1.,    1., 1.,    1.,    1.,    0.001, 0.001, 0.001,  0.001, 0.001, 1.,    0.001, 0.001, 1., 0.001, 1.,    0.001, 1.,    0.001, 1., 1.,0.001]
# self = env_eval

# # convert input array into producer/injector 
# inj_flow = action[:self.n_inj] / np.sum(action[:self.n_inj])
# inj_flow = self.Q * inj_flow
# prod_flow = action[self.n_inj:] / np.sum(action[self.n_inj:])
# prod_flow = -self.Q * prod_flow

# assert np.sum(inj_flow)>0, 'Invalid action: zero injector flow'
# assert np.sum(prod_flow)<0, 'Invalid action: zero producer flow'

# # add producer/injector flow values
# q = np.zeros(self.grid.shape)
# for i,(x,y) in enumerate( zip(self.i_x, self.i_y) ):
#     q[x,y] = inj_flow[i]

# for i,(x,y) in enumerate( zip(self.p_x, self.p_y) ):
#     q[x,y] = prod_flow[i]
    
# # if np.abs(np.sum(q)) < self.tol:
# #     q[3,3] = q[3,3] - np.sum(q) # to adjust unbalanced source term in arbitary location in the field due to precision error

In [11]:
# np.abs(np.sum(q))