In [1]:
import os
import gym
from IPython import display
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import SubprocVecEnv
from stable_baselines.common.vec_env import DummyVecEnv
from utils.ppo import PPO
from utils.models import Policy

In [2]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [3]:
LOGS = os.getcwd()

In [4]:
def makedirs(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [5]:
env_name = 'Pendulum-v0'
run_id = 1
n_steps = 250
total_timesteps = 1000000

LOGS = os.path.join(LOGS, env_name, 'run{}'.format(run_id))
makedirs(LOGS)
tb_log = os.path.join(LOGS, 'tb')
makedirs(tb_log)
model_dir = os.path.join(LOGS, 'models')
makedirs(model_dir)
final_model_dir = os.path.join(LOGS, 'model')
n_cpu = 4

In [6]:
env = SubprocVecEnv([lambda: gym.make(env_name) for i in range(n_cpu)])

model = PPO(Policy, env, n_steps=n_steps, tensorboard_log=tb_log, verbose=1, full_tensorboard_log=True)
model.learn(total_timesteps, env, save_file=os.path.join(model_dir, 'model'))
model.save(final_model_dir)
del model # remove to demonstrate saving and loading

INFO:tensorflow:Summary name model/pi_fc0/w:0 is illegal; using model/pi_fc0/w_0 instead.
INFO:tensorflow:Summary name model/pi_fc0/b:0 is illegal; using model/pi_fc0/b_0 instead.
INFO:tensorflow:Summary name model/vf_fc0/w:0 is illegal; using model/vf_fc0/w_0 instead.
INFO:tensorflow:Summary name model/vf_fc0/b:0 is illegal; using model/vf_fc0/b_0 instead.
INFO:tensorflow:Summary name model/pi_fc1/w:0 is illegal; using model/pi_fc1/w_0 instead.
INFO:tensorflow:Summary name model/pi_fc1/b:0 is illegal; using model/pi_fc1/b_0 instead.
INFO:tensorflow:Summary name model/vf_fc1/w:0 is illegal; using model/vf_fc1/w_0 instead.
INFO:tensorflow:Summary name model/vf_fc1/b:0 is illegal; using model/vf_fc1/b_0 instead.
INFO:tensorflow:Summary name model/vf/w:0 is illegal; using model/vf/w_0 instead.
INFO:tensorflow:Summary name model/vf/b:0 is illegal; using model/vf/b_0 instead.
INFO:tensorflow:Summary name model/pi/w:0 is illegal; using model/pi/w_0 instead.
INFO:tensorflow:Summary name model

--------------------------------------
| approxkl           | 3.4624583e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.0348        |
| fps                | 953           |
| nupdates           | 12            |
| policy_entropy     | 1.429468      |
| policy_loss        | 0.0001518665  |
| serial_timesteps   | 3000          |
| time_elapsed       | 12.4          |
| total_timesteps    | 12096         |
| value_loss         | 3227.9985     |
--------------------------------------
---------------------------------------
| approxkl           | 3.929981e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.0498         |
| fps                | 930            |
| nupdates           | 13             |
| policy_entropy     | 1.4293364      |
| policy_loss        | -4.7711954e-05 |
| serial_timesteps   | 3250           |
| time_elapsed       | 13.4           |
| total_timesteps    | 13104          |
| value_loss         | 4663.3228      |
-------------

--------------------------------------
| approxkl           | 5.3413896e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.083         |
| fps                | 940           |
| nupdates           | 29            |
| policy_entropy     | 1.4381051     |
| policy_loss        | -2.084315e-05 |
| serial_timesteps   | 7250          |
| time_elapsed       | 30            |
| total_timesteps    | 29232         |
| value_loss         | 4045.2612     |
--------------------------------------
-------------------------------------
| approxkl           | 2.243118e-07 |
| clipfrac           | 0.0          |
| explained_variance | 0.0495       |
| fps                | 954          |
| nupdates           | 30           |
| policy_entropy     | 1.4383502    |
| policy_loss        | 8.148521e-06 |
| serial_timesteps   | 7500         |
| time_elapsed       | 31           |
| total_timesteps    | 30240        |
| value_loss         | 3468.83      |
-------------------------------------

--------------------------------------
| approxkl           | 4.3566342e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.0825        |
| fps                | 965           |
| nupdates           | 46            |
| policy_entropy     | 1.4552053     |
| policy_loss        | 1.2952562e-05 |
| serial_timesteps   | 11500         |
| time_elapsed       | 47.5          |
| total_timesteps    | 46368         |
| value_loss         | 4660.9795     |
--------------------------------------
---------------------------------------
| approxkl           | 6.0266466e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.0716         |
| fps                | 982            |
| nupdates           | 47             |
| policy_entropy     | 1.4584583      |
| policy_loss        | -0.00014844647 |
| serial_timesteps   | 11750          |
| time_elapsed       | 48.6           |
| total_timesteps    | 47376          |
| value_loss         | 5552.3447      |
-------------

--------------------------------------
| approxkl           | 1.1782247e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.0617        |
| fps                | 937           |
| nupdates           | 63            |
| policy_entropy     | 1.4637526     |
| policy_loss        | 4.4247423e-05 |
| serial_timesteps   | 15750         |
| time_elapsed       | 65            |
| total_timesteps    | 63504         |
| value_loss         | 3973.4956     |
--------------------------------------
--------------------------------------
| approxkl           | 1.6240701e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.0437        |
| fps                | 943           |
| nupdates           | 64            |
| policy_entropy     | 1.4637041     |
| policy_loss        | -4.791258e-05 |
| serial_timesteps   | 16000         |
| time_elapsed       | 66.1          |
| total_timesteps    | 64512         |
| value_loss         | 4177.723      |
-------------------------

---------------------------------------
| approxkl           | 1.461462e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.0537         |
| fps                | 979            |
| nupdates           | 80             |
| policy_entropy     | 1.4692733      |
| policy_loss        | -4.4896493e-05 |
| serial_timesteps   | 20000          |
| time_elapsed       | 82.8           |
| total_timesteps    | 80640          |
| value_loss         | 3996.306       |
---------------------------------------
---------------------------------------
| approxkl           | 1.6696484e-07  |
| clipfrac           | 0.0            |
| explained_variance | 0.0469         |
| fps                | 986            |
| nupdates           | 81             |
| policy_entropy     | 1.469126       |
| policy_loss        | -1.5297383e-05 |
| serial_timesteps   | 20250          |
| time_elapsed       | 83.8           |
| total_timesteps    | 81648          |
| value_loss         | 5780.85        |


-------------------------------------
| approxkl           | 0.0007570097 |
| clipfrac           | 0.00075      |
| explained_variance | 0.0207       |
| fps                | 933          |
| nupdates           | 96           |
| policy_entropy     | 1.4694961    |
| policy_loss        | 0.0024053038 |
| serial_timesteps   | 24000        |
| time_elapsed       | 99.3         |
| total_timesteps    | 96768        |
| value_loss         | 2678.7913    |
-------------------------------------
---------------------------------------
| approxkl           | 2.6239211e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.0242         |
| fps                | 1005           |
| nupdates           | 97             |
| policy_entropy     | 1.4702423      |
| policy_loss        | 0.000116006704 |
| serial_timesteps   | 24250          |
| time_elapsed       | 100            |
| total_timesteps    | 97776          |
| value_loss         | 4241.1157      |
--------------------------

--------------------------------------
| approxkl           | 1.3714362e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.0159        |
| fps                | 957           |
| nupdates           | 113           |
| policy_entropy     | 1.4715304     |
| policy_loss        | -2.794026e-06 |
| serial_timesteps   | 28250         |
| time_elapsed       | 117           |
| total_timesteps    | 113904        |
| value_loss         | 4646.6196     |
--------------------------------------
----------------------------------------
| approxkl           | 1.1831749e-05   |
| clipfrac           | 0.0             |
| explained_variance | 0.0164          |
| fps                | 984             |
| nupdates           | 114             |
| policy_entropy     | 1.4715917       |
| policy_loss        | -0.000105159255 |
| serial_timesteps   | 28500           |
| time_elapsed       | 118             |
| total_timesteps    | 114912          |
| value_loss         | 3945.5474       |
-

---------------------------------------
| approxkl           | 7.211231e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.00741        |
| fps                | 977            |
| nupdates           | 130            |
| policy_entropy     | 1.476626       |
| policy_loss        | -0.00029920661 |
| serial_timesteps   | 32500          |
| time_elapsed       | 135            |
| total_timesteps    | 131040         |
| value_loss         | 2882.3174      |
---------------------------------------
---------------------------------------
| approxkl           | 1.2587045e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.00888        |
| fps                | 955            |
| nupdates           | 131            |
| policy_entropy     | 1.4767109      |
| policy_loss        | -1.4437021e-05 |
| serial_timesteps   | 32750          |
| time_elapsed       | 136            |
| total_timesteps    | 132048         |
| value_loss         | 4103.7275      |


--------------------------------------
| approxkl           | 3.1370155e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.00608       |
| fps                | 991           |
| nupdates           | 147           |
| policy_entropy     | 1.475718      |
| policy_loss        | 4.6551857e-05 |
| serial_timesteps   | 36750         |
| time_elapsed       | 153           |
| total_timesteps    | 148176        |
| value_loss         | 3498.7961     |
--------------------------------------
--------------------------------------
| approxkl           | 3.3193749e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.00461       |
| fps                | 962           |
| nupdates           | 148           |
| policy_entropy     | 1.4755436     |
| policy_loss        | 7.3116826e-05 |
| serial_timesteps   | 37000         |
| time_elapsed       | 154           |
| total_timesteps    | 149184        |
| value_loss         | 2833.5        |
-------------------------

--------------------------------------
| approxkl           | 1.3280785e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.00386       |
| fps                | 949           |
| nupdates           | 163           |
| policy_entropy     | 1.4744583     |
| policy_loss        | 1.1936459e-05 |
| serial_timesteps   | 40750         |
| time_elapsed       | 169           |
| total_timesteps    | 164304        |
| value_loss         | 4011.842      |
--------------------------------------
--------------------------------------
| approxkl           | 1.7842863e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.00128       |
| fps                | 968           |
| nupdates           | 164           |
| policy_entropy     | 1.4743823     |
| policy_loss        | -3.406225e-05 |
| serial_timesteps   | 41000         |
| time_elapsed       | 170           |
| total_timesteps    | 165312        |
| value_loss         | 2842.1565     |
-------------------------

---------------------------------------
| approxkl           | 3.7419554e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.00248        |
| fps                | 971            |
| nupdates           | 180            |
| policy_entropy     | 1.4743599      |
| policy_loss        | -6.7236056e-06 |
| serial_timesteps   | 45000          |
| time_elapsed       | 187            |
| total_timesteps    | 181440         |
| value_loss         | 3962.102       |
---------------------------------------
---------------------------------------
| approxkl           | 1.070626e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.00218        |
| fps                | 977            |
| nupdates           | 181            |
| policy_entropy     | 1.4743975      |
| policy_loss        | -4.0184732e-05 |
| serial_timesteps   | 45250          |
| time_elapsed       | 188            |
| total_timesteps    | 182448         |
| value_loss         | 3924.567       |


--------------------------------------
| approxkl           | 4.1604733e-07 |
| clipfrac           | 0.0           |
| explained_variance | 0.00209       |
| fps                | 1014          |
| nupdates           | 197           |
| policy_entropy     | 1.4735582     |
| policy_loss        | 5.1754123e-06 |
| serial_timesteps   | 49250         |
| time_elapsed       | 204           |
| total_timesteps    | 198576        |
| value_loss         | 3027.1772     |
--------------------------------------
--------------------------------------
| approxkl           | 2.2791677e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.00177       |
| fps                | 961           |
| nupdates           | 198           |
| policy_entropy     | 1.4735905     |
| policy_loss        | 0.00010862364 |
| serial_timesteps   | 49500         |
| time_elapsed       | 205           |
| total_timesteps    | 199584        |
| value_loss         | 3176.6343     |
-------------------------

--------------------------------------
| approxkl           | 0.000100352   |
| clipfrac           | 0.0           |
| explained_variance | 0.00166       |
| fps                | 981           |
| nupdates           | 214           |
| policy_entropy     | 1.4763342     |
| policy_loss        | -0.0003949151 |
| serial_timesteps   | 53500         |
| time_elapsed       | 221           |
| total_timesteps    | 215712        |
| value_loss         | 3234.0945     |
--------------------------------------
---------------------------------------
| approxkl           | 8.732189e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.00154        |
| fps                | 1029           |
| nupdates           | 215            |
| policy_entropy     | 1.4764723      |
| policy_loss        | -0.00030740455 |
| serial_timesteps   | 53750          |
| time_elapsed       | 222            |
| total_timesteps    | 216720         |
| value_loss         | 3516.8213      |
-------------

--------------------------------------
| approxkl           | 5.9411573e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.00114       |
| fps                | 1038          |
| nupdates           | 230           |
| policy_entropy     | 1.4769571     |
| policy_loss        | 1.8791558e-05 |
| serial_timesteps   | 57500         |
| time_elapsed       | 237           |
| total_timesteps    | 231840        |
| value_loss         | 3048.3748     |
--------------------------------------
--------------------------------------
| approxkl           | 7.756029e-07  |
| clipfrac           | 0.0           |
| explained_variance | 0.00095       |
| fps                | 1048          |
| nupdates           | 231           |
| policy_entropy     | 1.4770904     |
| policy_loss        | 1.2455911e-05 |
| serial_timesteps   | 57750         |
| time_elapsed       | 238           |
| total_timesteps    | 232848        |
| value_loss         | 3393.9163     |
-------------------------

--------------------------------------
| approxkl           | 2.7353602e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.000775      |
| fps                | 947           |
| nupdates           | 246           |
| policy_entropy     | 1.4804817     |
| policy_loss        | 8.151958e-05  |
| serial_timesteps   | 61500         |
| time_elapsed       | 254           |
| total_timesteps    | 247968        |
| value_loss         | 3106.873      |
--------------------------------------
--------------------------------------
| approxkl           | 2.8440675e-07 |
| clipfrac           | 0.0           |
| explained_variance | 0.000784      |
| fps                | 962           |
| nupdates           | 247           |
| policy_entropy     | 1.480721      |
| policy_loss        | 1.5804202e-05 |
| serial_timesteps   | 61750         |
| time_elapsed       | 255           |
| total_timesteps    | 248976        |
| value_loss         | 5167.2036     |
-------------------------

--------------------------------------
| approxkl           | 1.086359e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.000809      |
| fps                | 1005          |
| nupdates           | 262           |
| policy_entropy     | 1.4773715     |
| policy_loss        | 3.5309185e-06 |
| serial_timesteps   | 65500         |
| time_elapsed       | 270           |
| total_timesteps    | 264096        |
| value_loss         | 3678.444      |
--------------------------------------
--------------------------------------
| approxkl           | 2.4142682e-07 |
| clipfrac           | 0.0           |
| explained_variance | 0.000741      |
| fps                | 957           |
| nupdates           | 263           |
| policy_entropy     | 1.4775013     |
| policy_loss        | -9.356887e-06 |
| serial_timesteps   | 65750         |
| time_elapsed       | 271           |
| total_timesteps    | 265104        |
| value_loss         | 2373.2957     |
-------------------------

----------------------------------------
| approxkl           | 4.52724e-06     |
| clipfrac           | 0.0             |
| explained_variance | 0.000541        |
| fps                | 1005            |
| nupdates           | 279             |
| policy_entropy     | 1.4812129       |
| policy_loss        | -0.000107131804 |
| serial_timesteps   | 69750           |
| time_elapsed       | 288             |
| total_timesteps    | 281232          |
| value_loss         | 3423.1794       |
----------------------------------------
--------------------------------------
| approxkl           | 0.0001139017  |
| clipfrac           | 0.0           |
| explained_variance | 0.000352      |
| fps                | 979           |
| nupdates           | 280           |
| policy_entropy     | 1.4810375     |
| policy_loss        | 2.7588474e-05 |
| serial_timesteps   | 70000         |
| time_elapsed       | 289           |
| total_timesteps    | 282240        |
| value_loss         | 3637.287      |

--------------------------------------
| approxkl           | 1.4123255e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.000346      |
| fps                | 1009          |
| nupdates           | 296           |
| policy_entropy     | 1.4834081     |
| policy_loss        | -7.562998e-05 |
| serial_timesteps   | 74000         |
| time_elapsed       | 305           |
| total_timesteps    | 298368        |
| value_loss         | 3405.029      |
--------------------------------------
---------------------------------------
| approxkl           | 7.255934e-07   |
| clipfrac           | 0.0            |
| explained_variance | 0.000403       |
| fps                | 1063           |
| nupdates           | 297            |
| policy_entropy     | 1.4834228      |
| policy_loss        | -6.4575164e-05 |
| serial_timesteps   | 74250          |
| time_elapsed       | 306            |
| total_timesteps    | 299376         |
| value_loss         | 4122.0396      |
-------------

--------------------------------------
| approxkl           | 7.1305094e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.000417      |
| fps                | 949           |
| nupdates           | 313           |
| policy_entropy     | 1.4831209     |
| policy_loss        | 1.3223588e-05 |
| serial_timesteps   | 78250         |
| time_elapsed       | 323           |
| total_timesteps    | 315504        |
| value_loss         | 2237.8901     |
--------------------------------------
--------------------------------------
| approxkl           | 1.2133312e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.000394      |
| fps                | 969           |
| nupdates           | 314           |
| policy_entropy     | 1.4831569     |
| policy_loss        | -0.0002606598 |
| serial_timesteps   | 78500         |
| time_elapsed       | 324           |
| total_timesteps    | 316512        |
| value_loss         | 2446.3467     |
-------------------------

--------------------------------------
| approxkl           | 0.00023411159 |
| clipfrac           | 0.0           |
| explained_variance | 0.000376      |
| fps                | 1009          |
| nupdates           | 330           |
| policy_entropy     | 1.4833485     |
| policy_loss        | 0.00034770177 |
| serial_timesteps   | 82500         |
| time_elapsed       | 341           |
| total_timesteps    | 332640        |
| value_loss         | 3125.9863     |
--------------------------------------
---------------------------------------
| approxkl           | 8.0294685e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.000418       |
| fps                | 996            |
| nupdates           | 331            |
| policy_entropy     | 1.48366        |
| policy_loss        | -6.0676095e-05 |
| serial_timesteps   | 82750          |
| time_elapsed       | 342            |
| total_timesteps    | 333648         |
| value_loss         | 2269.7837      |
-------------

---------------------------------------
| approxkl           | 1.7112023e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.000281       |
| fps                | 992            |
| nupdates           | 346            |
| policy_entropy     | 1.4843607      |
| policy_loss        | 1.21216835e-05 |
| serial_timesteps   | 86500          |
| time_elapsed       | 357            |
| total_timesteps    | 348768         |
| value_loss         | 2715.4565      |
---------------------------------------
--------------------------------------
| approxkl           | 1.6590293e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.000315      |
| fps                | 934           |
| nupdates           | 347           |
| policy_entropy     | 1.4844205     |
| policy_loss        | 4.058181e-06  |
| serial_timesteps   | 86750         |
| time_elapsed       | 358           |
| total_timesteps    | 349776        |
| value_loss         | 3854.1284     |
------------

--------------------------------------
| approxkl           | 3.9557526e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.000215      |
| fps                | 1015          |
| nupdates           | 362           |
| policy_entropy     | 1.4850566     |
| policy_loss        | -8.857473e-05 |
| serial_timesteps   | 90500         |
| time_elapsed       | 374           |
| total_timesteps    | 364896        |
| value_loss         | 3270.194      |
--------------------------------------
--------------------------------------
| approxkl           | 3.0415051e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.000215      |
| fps                | 977           |
| nupdates           | 363           |
| policy_entropy     | 1.4849474     |
| policy_loss        | -3.174034e-05 |
| serial_timesteps   | 90750         |
| time_elapsed       | 375           |
| total_timesteps    | 365904        |
| value_loss         | 2993.5178     |
-------------------------

---------------------------------------
| approxkl           | 1.4357995e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.000208       |
| fps                | 981            |
| nupdates           | 379            |
| policy_entropy     | 1.4831455      |
| policy_loss        | -6.1845494e-05 |
| serial_timesteps   | 94750          |
| time_elapsed       | 392            |
| total_timesteps    | 382032         |
| value_loss         | 3023.7744      |
---------------------------------------
--------------------------------------
| approxkl           | 5.2126336e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.000205      |
| fps                | 926           |
| nupdates           | 380           |
| policy_entropy     | 1.4833815     |
| policy_loss        | -4.495358e-05 |
| serial_timesteps   | 95000         |
| time_elapsed       | 393           |
| total_timesteps    | 383040        |
| value_loss         | 2775.287      |
------------

---------------------------------------
| approxkl           | 1.1249133e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.000197       |
| fps                | 958            |
| nupdates           | 395            |
| policy_entropy     | 1.482946       |
| policy_loss        | -4.6946643e-05 |
| serial_timesteps   | 98750          |
| time_elapsed       | 408            |
| total_timesteps    | 398160         |
| value_loss         | 2728.7676      |
---------------------------------------
---------------------------------------
| approxkl           | 4.190758e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.000139       |
| fps                | 929            |
| nupdates           | 396            |
| policy_entropy     | 1.4830505      |
| policy_loss        | -0.00057988835 |
| serial_timesteps   | 99000          |
| time_elapsed       | 409            |
| total_timesteps    | 399168         |
| value_loss         | 2940.265       |


--------------------------------------
| approxkl           | 0.00015665447 |
| clipfrac           | 0.0           |
| explained_variance | 8.81e-05      |
| fps                | 977           |
| nupdates           | 412           |
| policy_entropy     | 1.4814187     |
| policy_loss        | -0.0001812434 |
| serial_timesteps   | 103000        |
| time_elapsed       | 426           |
| total_timesteps    | 415296        |
| value_loss         | 2848.0945     |
--------------------------------------
--------------------------------------
| approxkl           | 3.4749607e-06 |
| clipfrac           | 0.0           |
| explained_variance | 5.02e-05      |
| fps                | 1029          |
| nupdates           | 413           |
| policy_entropy     | 1.4814022     |
| policy_loss        | 3.0193149e-05 |
| serial_timesteps   | 103250        |
| time_elapsed       | 427           |
| total_timesteps    | 416304        |
| value_loss         | 4777.605      |
-------------------------

---------------------------------------
| approxkl           | 3.6664428e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.0001         |
| fps                | 990            |
| nupdates           | 429            |
| policy_entropy     | 1.4841807      |
| policy_loss        | -1.3828131e-05 |
| serial_timesteps   | 107250         |
| time_elapsed       | 444            |
| total_timesteps    | 432432         |
| value_loss         | 3259.5642      |
---------------------------------------
---------------------------------------
| approxkl           | 1.900681e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.000122       |
| fps                | 943            |
| nupdates           | 430            |
| policy_entropy     | 1.4851865      |
| policy_loss        | -0.00011280664 |
| serial_timesteps   | 107500         |
| time_elapsed       | 445            |
| total_timesteps    | 433440         |
| value_loss         | 2963.1597      |


--------------------------------------
| approxkl           | 1.2367707e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.000126      |
| fps                | 938           |
| nupdates           | 445           |
| policy_entropy     | 1.4877266     |
| policy_loss        | 1.1540591e-05 |
| serial_timesteps   | 111250        |
| time_elapsed       | 460           |
| total_timesteps    | 448560        |
| value_loss         | 1749.5874     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00013005793 |
| clipfrac           | 0.0           |
| explained_variance | 9.79e-05      |
| fps                | 918           |
| nupdates           | 446           |
| policy_entropy     | 1.4878541     |
| policy_loss        | -5.974291e-05 |
| serial_timesteps   | 111500        |
| time_elapsed       | 461           |
| total_timesteps    | 449568        |
| value_loss         | 3393.735      |
-------------------------

--------------------------------------
| approxkl           | 1.7086068e-05 |
| clipfrac           | 0.0           |
| explained_variance | 9.58e-05      |
| fps                | 1022          |
| nupdates           | 462           |
| policy_entropy     | 1.4861611     |
| policy_loss        | 3.139895e-05  |
| serial_timesteps   | 115500        |
| time_elapsed       | 478           |
| total_timesteps    | 465696        |
| value_loss         | 1927.4841     |
--------------------------------------
---------------------------------------
| approxkl           | 7.20898e-06    |
| clipfrac           | 0.0            |
| explained_variance | 0.000109       |
| fps                | 969            |
| nupdates           | 463            |
| policy_entropy     | 1.4862896      |
| policy_loss        | -0.00016467716 |
| serial_timesteps   | 115750         |
| time_elapsed       | 479            |
| total_timesteps    | 466704         |
| value_loss         | 2135.287       |
-------------

--------------------------------------
| approxkl           | 0.00020002818 |
| clipfrac           | 0.0           |
| explained_variance | 8.09e-05      |
| fps                | 914           |
| nupdates           | 478           |
| policy_entropy     | 1.4896834     |
| policy_loss        | 0.00025921356 |
| serial_timesteps   | 119500        |
| time_elapsed       | 494           |
| total_timesteps    | 481824        |
| value_loss         | 2287.4968     |
--------------------------------------
---------------------------------------
| approxkl           | 9.191999e-06   |
| clipfrac           | 0.0            |
| explained_variance | 3.98e-05       |
| fps                | 964            |
| nupdates           | 479            |
| policy_entropy     | 1.4900192      |
| policy_loss        | -0.00015192696 |
| serial_timesteps   | 119750         |
| time_elapsed       | 495            |
| total_timesteps    | 482832         |
| value_loss         | 3375.7202      |
-------------

--------------------------------------
| approxkl           | 3.4526151e-06 |
| clipfrac           | 0.0           |
| explained_variance | 2.58e-05      |
| fps                | 921           |
| nupdates           | 495           |
| policy_entropy     | 1.4918336     |
| policy_loss        | -9.247806e-05 |
| serial_timesteps   | 123750        |
| time_elapsed       | 512           |
| total_timesteps    | 498960        |
| value_loss         | 3239.684      |
--------------------------------------
--------------------------------------
| approxkl           | 0.00014154759 |
| clipfrac           | 0.0           |
| explained_variance | 1e-05         |
| fps                | 949           |
| nupdates           | 496           |
| policy_entropy     | 1.4914273     |
| policy_loss        | -0.0006312949 |
| serial_timesteps   | 124000        |
| time_elapsed       | 513           |
| total_timesteps    | 499968        |
| value_loss         | 3517.638      |
-------------------------

--------------------------------------
| approxkl           | 2.2476183e-06 |
| clipfrac           | 0.0           |
| explained_variance | 3.09e-05      |
| fps                | 1006          |
| nupdates           | 512           |
| policy_entropy     | 1.4977428     |
| policy_loss        | 4.343078e-05  |
| serial_timesteps   | 128000        |
| time_elapsed       | 530           |
| total_timesteps    | 516096        |
| value_loss         | 2848.5508     |
--------------------------------------
-------------------------------------
| approxkl           | 8.938541e-06 |
| clipfrac           | 0.0          |
| explained_variance | 1.23e-05     |
| fps                | 966          |
| nupdates           | 513          |
| policy_entropy     | 1.4982085    |
| policy_loss        | -9.25467e-05 |
| serial_timesteps   | 128250       |
| time_elapsed       | 531          |
| total_timesteps    | 517104       |
| value_loss         | 3723.6772    |
-------------------------------------

---------------------------------------
| approxkl           | 1.3135324e-05  |
| clipfrac           | 0.0            |
| explained_variance | 6.53e-05       |
| fps                | 889            |
| nupdates           | 529            |
| policy_entropy     | 1.501202       |
| policy_loss        | -5.6028453e-05 |
| serial_timesteps   | 132250         |
| time_elapsed       | 548            |
| total_timesteps    | 533232         |
| value_loss         | 1951.738       |
---------------------------------------
--------------------------------------
| approxkl           | 3.072755e-05  |
| clipfrac           | 0.0           |
| explained_variance | 5.95e-05      |
| fps                | 957           |
| nupdates           | 530           |
| policy_entropy     | 1.5009626     |
| policy_loss        | -7.295543e-05 |
| serial_timesteps   | 132500        |
| time_elapsed       | 549           |
| total_timesteps    | 534240        |
| value_loss         | 2017.5265     |
------------

--------------------------------------
| approxkl           | 0.00014432713 |
| clipfrac           | 0.0           |
| explained_variance | 4.39e-05      |
| fps                | 988           |
| nupdates           | 546           |
| policy_entropy     | 1.5016012     |
| policy_loss        | 0.00056981185 |
| serial_timesteps   | 136500        |
| time_elapsed       | 565           |
| total_timesteps    | 550368        |
| value_loss         | 1866.4133     |
--------------------------------------
---------------------------------------
| approxkl           | 1.2816825e-05  |
| clipfrac           | 0.0            |
| explained_variance | 5.33e-05       |
| fps                | 1026           |
| nupdates           | 547            |
| policy_entropy     | 1.501421       |
| policy_loss        | -0.00025767204 |
| serial_timesteps   | 136750         |
| time_elapsed       | 566            |
| total_timesteps    | 551376         |
| value_loss         | 2374.7375      |
-------------

---------------------------------------
| approxkl           | 2.9635044e-06  |
| clipfrac           | 0.0            |
| explained_variance | 1.18e-05       |
| fps                | 958            |
| nupdates           | 563            |
| policy_entropy     | 1.5011559      |
| policy_loss        | -3.2518506e-05 |
| serial_timesteps   | 140750         |
| time_elapsed       | 583            |
| total_timesteps    | 567504         |
| value_loss         | 2929.413       |
---------------------------------------
---------------------------------------
| approxkl           | 1.6981521e-05  |
| clipfrac           | 0.0            |
| explained_variance | 8.7e-06        |
| fps                | 934            |
| nupdates           | 564            |
| policy_entropy     | 1.5011438      |
| policy_loss        | -5.3697317e-05 |
| serial_timesteps   | 141000         |
| time_elapsed       | 584            |
| total_timesteps    | 568512         |
| value_loss         | 3527.5825      |


---------------------------------------
| approxkl           | 2.0581014e-05  |
| clipfrac           | 0.0            |
| explained_variance | 1.44e-05       |
| fps                | 976            |
| nupdates           | 580            |
| policy_entropy     | 1.505113       |
| policy_loss        | -0.00021726753 |
| serial_timesteps   | 145000         |
| time_elapsed       | 600            |
| total_timesteps    | 584640         |
| value_loss         | 2314.6335      |
---------------------------------------
---------------------------------------
| approxkl           | 3.3944166e-06  |
| clipfrac           | 0.0            |
| explained_variance | 3.29e-05       |
| fps                | 930            |
| nupdates           | 581            |
| policy_entropy     | 1.5052896      |
| policy_loss        | -4.4183555e-05 |
| serial_timesteps   | 145250         |
| time_elapsed       | 601            |
| total_timesteps    | 585648         |
| value_loss         | 2860.272       |


--------------------------------------
| approxkl           | 4.7923945e-06 |
| clipfrac           | 0.0           |
| explained_variance | 1.88e-05      |
| fps                | 922           |
| nupdates           | 596           |
| policy_entropy     | 1.5051509     |
| policy_loss        | 1.882283e-06  |
| serial_timesteps   | 149000        |
| time_elapsed       | 617           |
| total_timesteps    | 600768        |
| value_loss         | 1963.9313     |
--------------------------------------
---------------------------------------
| approxkl           | 8.583316e-06   |
| clipfrac           | 0.0            |
| explained_variance | 2.58e-05       |
| fps                | 994            |
| nupdates           | 597            |
| policy_entropy     | 1.5051373      |
| policy_loss        | -5.6768156e-05 |
| serial_timesteps   | 149250         |
| time_elapsed       | 618            |
| total_timesteps    | 601776         |
| value_loss         | 3069.28        |
-------------

--------------------------------------
| approxkl           | 0.00016380026 |
| clipfrac           | 0.0           |
| explained_variance | 2.34e-05      |
| fps                | 954           |
| nupdates           | 612           |
| policy_entropy     | 1.5084498     |
| policy_loss        | 0.0001501264  |
| serial_timesteps   | 153000        |
| time_elapsed       | 634           |
| total_timesteps    | 616896        |
| value_loss         | 2081.384      |
--------------------------------------
--------------------------------------
| approxkl           | 3.2730443e-06 |
| clipfrac           | 0.0           |
| explained_variance | 2.71e-05      |
| fps                | 925           |
| nupdates           | 613           |
| policy_entropy     | 1.508018      |
| policy_loss        | -4.642923e-05 |
| serial_timesteps   | 153250        |
| time_elapsed       | 635           |
| total_timesteps    | 617904        |
| value_loss         | 2678.832      |
-------------------------

--------------------------------------
| approxkl           | 5.46212e-06   |
| clipfrac           | 0.0           |
| explained_variance | 1.91e-05      |
| fps                | 943           |
| nupdates           | 629           |
| policy_entropy     | 1.503009      |
| policy_loss        | -4.974215e-05 |
| serial_timesteps   | 157250        |
| time_elapsed       | 652           |
| total_timesteps    | 634032        |
| value_loss         | 1914.1555     |
--------------------------------------
---------------------------------------
| approxkl           | 1.8026358e-05  |
| clipfrac           | 0.0            |
| explained_variance | 2.01e-05       |
| fps                | 942            |
| nupdates           | 630            |
| policy_entropy     | 1.5022187      |
| policy_loss        | -0.00015427588 |
| serial_timesteps   | 157500         |
| time_elapsed       | 653            |
| total_timesteps    | 635040         |
| value_loss         | 2042.3589      |
-------------

---------------------------------------
| approxkl           | 0.0003213606   |
| clipfrac           | 0.00025        |
| explained_variance | 1.58e-05       |
| fps                | 988            |
| nupdates           | 646            |
| policy_entropy     | 1.4999183      |
| policy_loss        | -0.00069846946 |
| serial_timesteps   | 161500         |
| time_elapsed       | 670            |
| total_timesteps    | 651168         |
| value_loss         | 2241.4763      |
---------------------------------------
--------------------------------------
| approxkl           | 2.0905129e-05 |
| clipfrac           | 0.0           |
| explained_variance | 1.34e-05      |
| fps                | 968           |
| nupdates           | 647           |
| policy_entropy     | 1.5005624     |
| policy_loss        | -3.888781e-05 |
| serial_timesteps   | 161750        |
| time_elapsed       | 671           |
| total_timesteps    | 652176        |
| value_loss         | 2409.1382     |
------------

---------------------------------------
| approxkl           | 1.7542601e-05  |
| clipfrac           | 0.0            |
| explained_variance | 1.6e-05        |
| fps                | 940            |
| nupdates           | 663            |
| policy_entropy     | 1.4990413      |
| policy_loss        | -0.00021585503 |
| serial_timesteps   | 165750         |
| time_elapsed       | 687            |
| total_timesteps    | 668304         |
| value_loss         | 1741.6793      |
---------------------------------------
--------------------------------------
| approxkl           | 7.045778e-05  |
| clipfrac           | 0.0           |
| explained_variance | 1.12e-05      |
| fps                | 933           |
| nupdates           | 664           |
| policy_entropy     | 1.4985695     |
| policy_loss        | -9.524255e-05 |
| serial_timesteps   | 166000        |
| time_elapsed       | 688           |
| total_timesteps    | 669312        |
| value_loss         | 1894.6396     |
------------

--------------------------------------
| approxkl           | 6.1822786e-05 |
| clipfrac           | 0.0           |
| explained_variance | 1.45e-05      |
| fps                | 989           |
| nupdates           | 679           |
| policy_entropy     | 1.4947226     |
| policy_loss        | -6.006543e-05 |
| serial_timesteps   | 169750        |
| time_elapsed       | 703           |
| total_timesteps    | 684432        |
| value_loss         | 1981.0846     |
--------------------------------------
--------------------------------------
| approxkl           | 3.6669328e-05 |
| clipfrac           | 0.0           |
| explained_variance | 9e-06         |
| fps                | 982           |
| nupdates           | 680           |
| policy_entropy     | 1.4958414     |
| policy_loss        | 0.0003445766  |
| serial_timesteps   | 170000        |
| time_elapsed       | 704           |
| total_timesteps    | 685440        |
| value_loss         | 2349.6702     |
-------------------------

---------------------------------------
| approxkl           | 0.00018143561  |
| clipfrac           | 0.0            |
| explained_variance | 6.56e-06       |
| fps                | 1016           |
| nupdates           | 696            |
| policy_entropy     | 1.5061342      |
| policy_loss        | -0.00043162925 |
| serial_timesteps   | 174000         |
| time_elapsed       | 721            |
| total_timesteps    | 701568         |
| value_loss         | 2181.1704      |
---------------------------------------
---------------------------------------
| approxkl           | 2.2455515e-05  |
| clipfrac           | 0.0            |
| explained_variance | 1.18e-05       |
| fps                | 985            |
| nupdates           | 697            |
| policy_entropy     | 1.5063666      |
| policy_loss        | -0.00015452779 |
| serial_timesteps   | 174250         |
| time_elapsed       | 722            |
| total_timesteps    | 702576         |
| value_loss         | 1987.6864      |


---------------------------------------
| approxkl           | 3.9479422e-05  |
| clipfrac           | 0.0            |
| explained_variance | 5.19e-06       |
| fps                | 1039           |
| nupdates           | 712            |
| policy_entropy     | 1.5053697      |
| policy_loss        | -5.5443015e-05 |
| serial_timesteps   | 178000         |
| time_elapsed       | 736            |
| total_timesteps    | 717696         |
| value_loss         | 1917.7769      |
---------------------------------------
---------------------------------------
| approxkl           | 1.5734547e-06  |
| clipfrac           | 0.0            |
| explained_variance | 1.03e-05       |
| fps                | 984            |
| nupdates           | 713            |
| policy_entropy     | 1.5052701      |
| policy_loss        | -1.1674405e-05 |
| serial_timesteps   | 178250         |
| time_elapsed       | 737            |
| total_timesteps    | 718704         |
| value_loss         | 2398.534       |


--------------------------------------
| approxkl           | 0.0001243927  |
| clipfrac           | 0.0           |
| explained_variance | 5.07e-06      |
| fps                | 994           |
| nupdates           | 728           |
| policy_entropy     | 1.5055391     |
| policy_loss        | -0.0009130303 |
| serial_timesteps   | 182000        |
| time_elapsed       | 752           |
| total_timesteps    | 733824        |
| value_loss         | 2135.0947     |
--------------------------------------
---------------------------------------
| approxkl           | 3.552326e-05   |
| clipfrac           | 0.0            |
| explained_variance | 9.6e-06        |
| fps                | 993            |
| nupdates           | 729            |
| policy_entropy     | 1.5052606      |
| policy_loss        | -0.00010828278 |
| serial_timesteps   | 182250         |
| time_elapsed       | 753            |
| total_timesteps    | 734832         |
| value_loss         | 2019.654       |
-------------

---------------------------------------
| approxkl           | 0.000112069094 |
| clipfrac           | 0.0            |
| explained_variance | 2.56e-06       |
| fps                | 991            |
| nupdates           | 744            |
| policy_entropy     | 1.5045389      |
| policy_loss        | 3.6788308e-05  |
| serial_timesteps   | 186000         |
| time_elapsed       | 768            |
| total_timesteps    | 749952         |
| value_loss         | 1726.1729      |
---------------------------------------
---------------------------------------
| approxkl           | 4.250869e-06   |
| clipfrac           | 0.0            |
| explained_variance | 4.95e-06       |
| fps                | 950            |
| nupdates           | 745            |
| policy_entropy     | 1.5046786      |
| policy_loss        | -4.9302464e-05 |
| serial_timesteps   | 186250         |
| time_elapsed       | 769            |
| total_timesteps    | 750960         |
| value_loss         | 1981.3634      |


---------------------------------------
| approxkl           | 0.00017979069  |
| clipfrac           | 0.0            |
| explained_variance | 2.5e-06        |
| fps                | 994            |
| nupdates           | 760            |
| policy_entropy     | 1.5060493      |
| policy_loss        | -0.00095258147 |
| serial_timesteps   | 190000         |
| time_elapsed       | 784            |
| total_timesteps    | 766080         |
| value_loss         | 2130.9617      |
---------------------------------------
---------------------------------------
| approxkl           | 2.0987683e-05  |
| clipfrac           | 0.0            |
| explained_variance | 5.3e-06        |
| fps                | 1015           |
| nupdates           | 761            |
| policy_entropy     | 1.5062917      |
| policy_loss        | -3.8128106e-05 |
| serial_timesteps   | 190250         |
| time_elapsed       | 785            |
| total_timesteps    | 767088         |
| value_loss         | 1983.7284      |


-------------------------------------
| approxkl           | 4.841745e-05 |
| clipfrac           | 0.0          |
| explained_variance | 6.32e-06     |
| fps                | 1096         |
| nupdates           | 777          |
| policy_entropy     | 1.5078701    |
| policy_loss        | 0.0002536014 |
| serial_timesteps   | 194250       |
| time_elapsed       | 801          |
| total_timesteps    | 783216       |
| value_loss         | 2273.912     |
-------------------------------------
---------------------------------------
| approxkl           | 1.8578701e-05  |
| clipfrac           | 0.0            |
| explained_variance | 6.08e-06       |
| fps                | 1047           |
| nupdates           | 778            |
| policy_entropy     | 1.5081608      |
| policy_loss        | -0.00013880632 |
| serial_timesteps   | 194500         |
| time_elapsed       | 802            |
| total_timesteps    | 784224         |
| value_loss         | 2153.2861      |
--------------------------

---------------------------------------
| approxkl           | 2.2503258e-05  |
| clipfrac           | 0.0            |
| explained_variance | 3.16e-06       |
| fps                | 1024           |
| nupdates           | 793            |
| policy_entropy     | 1.5096908      |
| policy_loss        | -0.00023625727 |
| serial_timesteps   | 198250         |
| time_elapsed       | 817            |
| total_timesteps    | 799344         |
| value_loss         | 2438.2627      |
---------------------------------------
---------------------------------------
| approxkl           | 9.09157e-05    |
| clipfrac           | 0.0            |
| explained_variance | 1.19e-06       |
| fps                | 1024           |
| nupdates           | 794            |
| policy_entropy     | 1.5091347      |
| policy_loss        | -0.00040597882 |
| serial_timesteps   | 198500         |
| time_elapsed       | 818            |
| total_timesteps    | 800352         |
| value_loss         | 2474.0024      |


--------------------------------------
| approxkl           | 0.0004060148  |
| clipfrac           | 0.0           |
| explained_variance | 5.13e-06      |
| fps                | 1003          |
| nupdates           | 809           |
| policy_entropy     | 1.5093254     |
| policy_loss        | -0.0015407015 |
| serial_timesteps   | 202250        |
| time_elapsed       | 833           |
| total_timesteps    | 815472        |
| value_loss         | 1996.7172     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0013374562  |
| clipfrac           | 0.0065        |
| explained_variance | 4.77e-06      |
| fps                | 1039          |
| nupdates           | 810           |
| policy_entropy     | 1.5097101     |
| policy_loss        | -0.0018697904 |
| serial_timesteps   | 202500        |
| time_elapsed       | 834           |
| total_timesteps    | 816480        |
| value_loss         | 1349.3448     |
-------------------------

---------------------------------------
| approxkl           | 7.96046e-06    |
| clipfrac           | 0.0            |
| explained_variance | 1.55e-06       |
| fps                | 962            |
| nupdates           | 826            |
| policy_entropy     | 1.5106088      |
| policy_loss        | -0.00018790139 |
| serial_timesteps   | 206500         |
| time_elapsed       | 849            |
| total_timesteps    | 832608         |
| value_loss         | 2268.6829      |
---------------------------------------
---------------------------------------
| approxkl           | 3.377058e-05   |
| clipfrac           | 0.0            |
| explained_variance | 2.44e-06       |
| fps                | 1022           |
| nupdates           | 827            |
| policy_entropy     | 1.5106452      |
| policy_loss        | -0.00046685158 |
| serial_timesteps   | 206750         |
| time_elapsed       | 850            |
| total_timesteps    | 833616         |
| value_loss         | 2612.7944      |


--------------------------------------
| approxkl           | 0.00031615893 |
| clipfrac           | 0.0           |
| explained_variance | 4.35e-06      |
| fps                | 1145          |
| nupdates           | 842           |
| policy_entropy     | 1.5144103     |
| policy_loss        | 4.4487882e-05 |
| serial_timesteps   | 210500        |
| time_elapsed       | 864           |
| total_timesteps    | 848736        |
| value_loss         | 2096.506      |
--------------------------------------
---------------------------------------
| approxkl           | 5.408823e-05   |
| clipfrac           | 0.0            |
| explained_variance | 3.4e-06        |
| fps                | 1123           |
| nupdates           | 843            |
| policy_entropy     | 1.514732       |
| policy_loss        | -0.00025354212 |
| serial_timesteps   | 210750         |
| time_elapsed       | 865            |
| total_timesteps    | 849744         |
| value_loss         | 1877.3142      |
-------------

---------------------------------------
| approxkl           | 4.95992e-05    |
| clipfrac           | 0.0            |
| explained_variance | 1.97e-06       |
| fps                | 1067           |
| nupdates           | 859            |
| policy_entropy     | 1.5176445      |
| policy_loss        | -0.00020642116 |
| serial_timesteps   | 214750         |
| time_elapsed       | 880            |
| total_timesteps    | 865872         |
| value_loss         | 1666.6987      |
---------------------------------------
--------------------------------------
| approxkl           | 8.4778796e-05 |
| clipfrac           | 0.0           |
| explained_variance | 2.15e-06      |
| fps                | 1111          |
| nupdates           | 860           |
| policy_entropy     | 1.5170217     |
| policy_loss        | -7.388845e-05 |
| serial_timesteps   | 215000        |
| time_elapsed       | 881           |
| total_timesteps    | 866880        |
| value_loss         | 1910.5503     |
------------

--------------------------------------
| approxkl           | 0.00062887085 |
| clipfrac           | 0.00175       |
| explained_variance | 2.8e-06       |
| fps                | 1051          |
| nupdates           | 876           |
| policy_entropy     | 1.514912      |
| policy_loss        | -0.000963111  |
| serial_timesteps   | 219000        |
| time_elapsed       | 896           |
| total_timesteps    | 883008        |
| value_loss         | 1838.5283     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0009889866  |
| clipfrac           | 0.00425       |
| explained_variance | 5.84e-06      |
| fps                | 1116          |
| nupdates           | 877           |
| policy_entropy     | 1.5148435     |
| policy_loss        | -0.0019332479 |
| serial_timesteps   | 219250        |
| time_elapsed       | 897           |
| total_timesteps    | 884016        |
| value_loss         | 1460.4908     |
-------------------------

---------------------------------------
| approxkl           | 1.2059212e-05  |
| clipfrac           | 0.0            |
| explained_variance | 4.29e-06       |
| fps                | 1056           |
| nupdates           | 893            |
| policy_entropy     | 1.5126482      |
| policy_loss        | -5.8512214e-05 |
| serial_timesteps   | 223250         |
| time_elapsed       | 911            |
| total_timesteps    | 900144         |
| value_loss         | 1667.3579      |
---------------------------------------
---------------------------------------
| approxkl           | 0.001256       |
| clipfrac           | 0.0050000004   |
| explained_variance | 6.91e-06       |
| fps                | 1055           |
| nupdates           | 894            |
| policy_entropy     | 1.5114124      |
| policy_loss        | -6.0303573e-06 |
| serial_timesteps   | 223500         |
| time_elapsed       | 912            |
| total_timesteps    | 901152         |
| value_loss         | 1590.8645      |


---------------------------------------
| approxkl           | 5.5697248e-05  |
| clipfrac           | 0.0            |
| explained_variance | 3.22e-06       |
| fps                | 1089           |
| nupdates           | 910            |
| policy_entropy     | 1.5034955      |
| policy_loss        | -0.00024009438 |
| serial_timesteps   | 227500         |
| time_elapsed       | 927            |
| total_timesteps    | 917280         |
| value_loss         | 1784.1874      |
---------------------------------------
--------------------------------------
| approxkl           | 5.2939627e-05 |
| clipfrac           | 0.0           |
| explained_variance | 4.65e-06      |
| fps                | 1085          |
| nupdates           | 911           |
| policy_entropy     | 1.5037183     |
| policy_loss        | -0.0004910456 |
| serial_timesteps   | 227750        |
| time_elapsed       | 928           |
| total_timesteps    | 918288        |
| value_loss         | 1570.3054     |
------------

---------------------------------------
| approxkl           | 2.392048e-06   |
| clipfrac           | 0.0            |
| explained_variance | 1.07e-06       |
| fps                | 1082           |
| nupdates           | 926            |
| policy_entropy     | 1.5056841      |
| policy_loss        | -1.8806548e-05 |
| serial_timesteps   | 231500         |
| time_elapsed       | 942            |
| total_timesteps    | 933408         |
| value_loss         | 2419.9404      |
---------------------------------------
---------------------------------------
| approxkl           | 4.7305362e-05  |
| clipfrac           | 0.0            |
| explained_variance | 3.34e-06       |
| fps                | 1143           |
| nupdates           | 927            |
| policy_entropy     | 1.5056611      |
| policy_loss        | -0.00055463467 |
| serial_timesteps   | 231750         |
| time_elapsed       | 943            |
| total_timesteps    | 934416         |
| value_loss         | 2115.715       |


--------------------------------------
| approxkl           | 0.00010113459 |
| clipfrac           | 0.0           |
| explained_variance | 2.5e-06       |
| fps                | 1106          |
| nupdates           | 942           |
| policy_entropy     | 1.5066661     |
| policy_loss        | -0.0010447499 |
| serial_timesteps   | 235500        |
| time_elapsed       | 956           |
| total_timesteps    | 949536        |
| value_loss         | 1718.3097     |
--------------------------------------
--------------------------------------
| approxkl           | 7.2000665e-05 |
| clipfrac           | 0.0           |
| explained_variance | 3.93e-06      |
| fps                | 1096          |
| nupdates           | 943           |
| policy_entropy     | 1.5069662     |
| policy_loss        | 4.7133162e-05 |
| serial_timesteps   | 235750        |
| time_elapsed       | 957           |
| total_timesteps    | 950544        |
| value_loss         | 1882.3658     |
-------------------------

--------------------------------------
| approxkl           | 0.00022512488 |
| clipfrac           | 0.0           |
| explained_variance | -0.00552      |
| fps                | 1121          |
| nupdates           | 959           |
| policy_entropy     | 1.5111995     |
| policy_loss        | 0.00076145    |
| serial_timesteps   | 239750        |
| time_elapsed       | 972           |
| total_timesteps    | 966672        |
| value_loss         | 1776.7804     |
--------------------------------------
-------------------------------------
| approxkl           | 0.0009744242 |
| clipfrac           | 0.0039999997 |
| explained_variance | 0.00206      |
| fps                | 1084         |
| nupdates           | 960          |
| policy_entropy     | 1.5115327    |
| policy_loss        | -0.001492139 |
| serial_timesteps   | 240000       |
| time_elapsed       | 973          |
| total_timesteps    | 967680       |
| value_loss         | 2394.3596    |
-------------------------------------

---------------------------------------
| approxkl           | 0.00014740096  |
| clipfrac           | 0.0            |
| explained_variance | 0.0212         |
| fps                | 1108           |
| nupdates           | 975            |
| policy_entropy     | 1.508676       |
| policy_loss        | -0.00091115624 |
| serial_timesteps   | 243750         |
| time_elapsed       | 986            |
| total_timesteps    | 982800         |
| value_loss         | 2000.6178      |
---------------------------------------
--------------------------------------
| approxkl           | 0.00073519885 |
| clipfrac           | 0.00275       |
| explained_variance | 0.00681       |
| fps                | 1092          |
| nupdates           | 976           |
| policy_entropy     | 1.5086424     |
| policy_loss        | -0.001546359  |
| serial_timesteps   | 244000        |
| time_elapsed       | 987           |
| total_timesteps    | 983808        |
| value_loss         | 2473.9988     |
------------

--------------------------------------
| approxkl           | 0.0009148503  |
| clipfrac           | 0.0045        |
| explained_variance | 0.00785       |
| fps                | 1125          |
| nupdates           | 992           |
| policy_entropy     | 1.5111074     |
| policy_loss        | -0.0008455517 |
| serial_timesteps   | 248000        |
| time_elapsed       | 1e+03         |
| total_timesteps    | 999936        |
| value_loss         | 2202.671      |
--------------------------------------
---------------------------------------
| approxkl           | 9.6926844e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.00144        |
| fps                | 1056           |
| nupdates           | 993            |
| policy_entropy     | 1.5119182      |
| policy_loss        | -7.0805763e-06 |
| serial_timesteps   | 248250         |
| time_elapsed       | 1e+03          |
| total_timesteps    | 1000944        |
| value_loss         | 1941.9663      |
-------------

In [None]:
from stable_baselines.common.policies import FeedForwardPolicy

In [None]:
model = PPO2.load(final_model_dir)

# Enjoy trained agent
env = gym.make(env_name)
obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()