In [1]:
import os
import gym
from IPython import display
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import SubprocVecEnv
from stable_baselines.common.vec_env import DummyVecEnv
from utils.ppo import PPO
from utils.models import Policy

In [2]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [3]:
LOGS = os.getcwd()

In [4]:
def makedirs(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [5]:
env_name = 'MountainCarContinuous-v0'
run_id = 1
n_steps = 250
total_timesteps = 1000000

LOGS = os.path.join(LOGS, env_name, 'run{}'.format(run_id))
makedirs(LOGS)
tb_log = os.path.join(LOGS, 'tb')
makedirs(tb_log)
model_dir = os.path.join(LOGS, 'models')
makedirs(model_dir)
final_model_dir = os.path.join(LOGS, 'model')
n_cpu = 4

In [6]:
env = SubprocVecEnv([lambda: gym.make(env_name) for i in range(n_cpu)])

model = PPO(Policy, env, n_steps=n_steps, tensorboard_log=tb_log, verbose=1, full_tensorboard_log=True)
model.learn(total_timesteps, env, save_file=os.path.join(model_dir, 'model'))
model.save(final_model_dir)
del model # remove to demonstrate saving and loading

INFO:tensorflow:Summary name model/pi_fc0/w:0 is illegal; using model/pi_fc0/w_0 instead.
INFO:tensorflow:Summary name model/pi_fc0/b:0 is illegal; using model/pi_fc0/b_0 instead.
INFO:tensorflow:Summary name model/vf_fc0/w:0 is illegal; using model/vf_fc0/w_0 instead.
INFO:tensorflow:Summary name model/vf_fc0/b:0 is illegal; using model/vf_fc0/b_0 instead.
INFO:tensorflow:Summary name model/pi_fc1/w:0 is illegal; using model/pi_fc1/w_0 instead.
INFO:tensorflow:Summary name model/pi_fc1/b:0 is illegal; using model/pi_fc1/b_0 instead.
INFO:tensorflow:Summary name model/vf_fc1/w:0 is illegal; using model/vf_fc1/w_0 instead.
INFO:tensorflow:Summary name model/vf_fc1/b:0 is illegal; using model/vf_fc1/b_0 instead.
INFO:tensorflow:Summary name model/vf/w:0 is illegal; using model/vf/w_0 instead.
INFO:tensorflow:Summary name model/vf/b:0 is illegal; using model/vf/b_0 instead.
INFO:tensorflow:Summary name model/pi/w:0 is illegal; using model/pi/w_0 instead.
INFO:tensorflow:Summary name model

--------------------------------------
| approxkl           | 0.00013409875 |
| clipfrac           | 0.0           |
| explained_variance | -0.678        |
| fps                | 1096          |
| nupdates           | 12            |
| policy_entropy     | 1.3922168     |
| policy_loss        | 0.00029280435 |
| serial_timesteps   | 3000          |
| time_elapsed       | 10.9          |
| total_timesteps    | 12096         |
| value_loss         | 0.2138084     |
--------------------------------------
---------------------------------------
| approxkl           | 7.961658e-05   |
| clipfrac           | 0.0            |
| explained_variance | -0.596         |
| fps                | 1022           |
| nupdates           | 13             |
| policy_entropy     | 1.3919744      |
| policy_loss        | -0.00027413017 |
| serial_timesteps   | 3250           |
| time_elapsed       | 11.8           |
| total_timesteps    | 13104          |
| value_loss         | 0.12668937     |
-------------

--------------------------------------
| approxkl           | 4.4274697e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.0983        |
| fps                | 1036          |
| nupdates           | 28            |
| policy_entropy     | 1.3623475     |
| policy_loss        | 4.124376e-05  |
| serial_timesteps   | 7000          |
| time_elapsed       | 25.7          |
| total_timesteps    | 28224         |
| value_loss         | 0.250048      |
--------------------------------------
--------------------------------------
| approxkl           | 5.34986e-05   |
| clipfrac           | 0.0           |
| explained_variance | -0.914        |
| fps                | 1046          |
| nupdates           | 29            |
| policy_entropy     | 1.3596253     |
| policy_loss        | -0.0005518995 |
| serial_timesteps   | 7250          |
| time_elapsed       | 26.7          |
| total_timesteps    | 29232         |
| value_loss         | 0.021274276   |
-------------------------

--------------------------------------
| approxkl           | 0.0003274093  |
| clipfrac           | 0.0           |
| explained_variance | 0.002         |
| fps                | 1107          |
| nupdates           | 44            |
| policy_entropy     | 1.3068849     |
| policy_loss        | -0.0004101209 |
| serial_timesteps   | 11000         |
| time_elapsed       | 40.8          |
| total_timesteps    | 44352         |
| value_loss         | 0.23629628    |
--------------------------------------
---------------------------------------
| approxkl           | 3.2568543e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.275         |
| fps                | 1087           |
| nupdates           | 45             |
| policy_entropy     | 1.3053087      |
| policy_loss        | -0.00045985042 |
| serial_timesteps   | 11250          |
| time_elapsed       | 41.7           |
| total_timesteps    | 45360          |
| value_loss         | 0.011458207    |
-------------

--------------------------------------
| approxkl           | 2.498443e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.0138        |
| fps                | 1116          |
| nupdates           | 60            |
| policy_entropy     | 1.2322708     |
| policy_loss        | 0.00011959973 |
| serial_timesteps   | 15000         |
| time_elapsed       | 55.6          |
| total_timesteps    | 60480         |
| value_loss         | 45.91621      |
--------------------------------------
--------------------------------------
| approxkl           | 9.32257e-06   |
| clipfrac           | 0.0           |
| explained_variance | -0.169        |
| fps                | 1017          |
| nupdates           | 61            |
| policy_entropy     | 1.2303951     |
| policy_loss        | -0.0006043467 |
| serial_timesteps   | 15250         |
| time_elapsed       | 56.5          |
| total_timesteps    | 61488         |
| value_loss         | 0.010532533   |
-------------------------

-------------------------------------
| approxkl           | 6.970368e-05 |
| clipfrac           | 0.0          |
| explained_variance | -0.128       |
| fps                | 1119         |
| nupdates           | 77           |
| policy_entropy     | 1.168732     |
| policy_loss        | -0.001076065 |
| serial_timesteps   | 19250        |
| time_elapsed       | 71.7         |
| total_timesteps    | 77616        |
| value_loss         | 0.0071364553 |
-------------------------------------
--------------------------------------
| approxkl           | 2.1442134e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.154        |
| fps                | 1119          |
| nupdates           | 78            |
| policy_entropy     | 1.1631868     |
| policy_loss        | -0.0012003391 |
| serial_timesteps   | 19500         |
| time_elapsed       | 72.6          |
| total_timesteps    | 78624         |
| value_loss         | 0.008783837   |
--------------------------------------

--------------------------------------
| approxkl           | 4.8405826e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.264        |
| fps                | 1049          |
| nupdates           | 94            |
| policy_entropy     | 1.0951107     |
| policy_loss        | -0.0012162388 |
| serial_timesteps   | 23500         |
| time_elapsed       | 87.4          |
| total_timesteps    | 94752         |
| value_loss         | 0.005569847   |
--------------------------------------
---------------------------------------
| approxkl           | 9.705831e-06   |
| clipfrac           | 0.0            |
| explained_variance | -0.08          |
| fps                | 1018           |
| nupdates           | 95             |
| policy_entropy     | 1.0900573      |
| policy_loss        | -0.00036653026 |
| serial_timesteps   | 23750          |
| time_elapsed       | 88.4           |
| total_timesteps    | 95760          |
| value_loss         | 0.06346244     |
-------------

--------------------------------------
| approxkl           | 9.580263e-06  |
| clipfrac           | 0.0           |
| explained_variance | -0.0184       |
| fps                | 1039          |
| nupdates           | 110           |
| policy_entropy     | 1.0357068     |
| policy_loss        | -0.0009998562 |
| serial_timesteps   | 27500         |
| time_elapsed       | 103           |
| total_timesteps    | 110880        |
| value_loss         | 0.0065146512  |
--------------------------------------
---------------------------------------
| approxkl           | 5.2363052e-06  |
| clipfrac           | 0.0            |
| explained_variance | -0.0393        |
| fps                | 1027           |
| nupdates           | 111            |
| policy_entropy     | 1.0307649      |
| policy_loss        | -0.00022777401 |
| serial_timesteps   | 27750          |
| time_elapsed       | 104            |
| total_timesteps    | 111888         |
| value_loss         | 0.051435173    |
-------------

--------------------------------------
| approxkl           | 2.1613414e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.00827       |
| fps                | 1152          |
| nupdates           | 126           |
| policy_entropy     | 0.9710978     |
| policy_loss        | -0.0013989084 |
| serial_timesteps   | 31500         |
| time_elapsed       | 117           |
| total_timesteps    | 127008        |
| value_loss         | 0.006410154   |
--------------------------------------
--------------------------------------
| approxkl           | 9.839781e-06  |
| clipfrac           | 0.0           |
| explained_variance | 0.00792       |
| fps                | 1172          |
| nupdates           | 127           |
| policy_entropy     | 0.9655444     |
| policy_loss        | -0.0003918501 |
| serial_timesteps   | 31750         |
| time_elapsed       | 118           |
| total_timesteps    | 128016        |
| value_loss         | 0.0489365     |
-------------------------

--------------------------------------
| approxkl           | 1.2011603e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.00638       |
| fps                | 994           |
| nupdates           | 142           |
| policy_entropy     | 0.90920436    |
| policy_loss        | -0.0012090479 |
| serial_timesteps   | 35500         |
| time_elapsed       | 132           |
| total_timesteps    | 143136        |
| value_loss         | 0.0042621978  |
--------------------------------------
---------------------------------------
| approxkl           | 1.3415525e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.0162        |
| fps                | 996            |
| nupdates           | 143            |
| policy_entropy     | 0.90398896     |
| policy_loss        | -0.00046372047 |
| serial_timesteps   | 35750          |
| time_elapsed       | 133            |
| total_timesteps    | 144144         |
| value_loss         | 0.039941926    |
-------------

--------------------------------------
| approxkl           | 3.521145e-05  |
| clipfrac           | 0.0           |
| explained_variance | -0.0005       |
| fps                | 998           |
| nupdates           | 159           |
| policy_entropy     | 0.8439486     |
| policy_loss        | -0.0006585348 |
| serial_timesteps   | 39750         |
| time_elapsed       | 149           |
| total_timesteps    | 160272        |
| value_loss         | 0.026935663   |
--------------------------------------
---------------------------------------
| approxkl           | 4.4726836e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.00667       |
| fps                | 991            |
| nupdates           | 160            |
| policy_entropy     | 0.8417362      |
| policy_loss        | -0.00025556795 |
| serial_timesteps   | 40000          |
| time_elapsed       | 150            |
| total_timesteps    | 161280         |
| value_loss         | 0.08394533     |
-------------

--------------------------------------
| approxkl           | 2.3778869e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.00792       |
| fps                | 1020          |
| nupdates           | 176           |
| policy_entropy     | 0.7826803     |
| policy_loss        | -3.613135e-05 |
| serial_timesteps   | 44000         |
| time_elapsed       | 166           |
| total_timesteps    | 177408        |
| value_loss         | 0.066330716   |
--------------------------------------
--------------------------------------
| approxkl           | 4.1039326e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.0105       |
| fps                | 993           |
| nupdates           | 177           |
| policy_entropy     | 0.7807936     |
| policy_loss        | -0.0010163591 |
| serial_timesteps   | 44250         |
| time_elapsed       | 167           |
| total_timesteps    | 178416        |
| value_loss         | 0.004929582   |
-------------------------

--------------------------------------
| approxkl           | 5.8833575e-06 |
| clipfrac           | 0.0           |
| explained_variance | -0.17         |
| fps                | 973           |
| nupdates           | 193           |
| policy_entropy     | 0.7227603     |
| policy_loss        | -0.0005026009 |
| serial_timesteps   | 48250         |
| time_elapsed       | 183           |
| total_timesteps    | 194544        |
| value_loss         | 0.0033182327  |
--------------------------------------
---------------------------------------
| approxkl           | 8.772134e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.0148         |
| fps                | 967            |
| nupdates           | 194            |
| policy_entropy     | 0.7184165      |
| policy_loss        | -0.00094315986 |
| serial_timesteps   | 48500          |
| time_elapsed       | 184            |
| total_timesteps    | 195552         |
| value_loss         | 0.0028082484   |
-------------

---------------------------------------
| approxkl           | 4.3947275e-06  |
| clipfrac           | 0.0            |
| explained_variance | -0.0697        |
| fps                | 985            |
| nupdates           | 209            |
| policy_entropy     | 0.6640358      |
| policy_loss        | -0.00069659547 |
| serial_timesteps   | 52250          |
| time_elapsed       | 199            |
| total_timesteps    | 210672         |
| value_loss         | 0.0027587358   |
---------------------------------------
---------------------------------------
| approxkl           | 1.34689735e-05 |
| clipfrac           | 0.0            |
| explained_variance | -0.0822        |
| fps                | 1020           |
| nupdates           | 210            |
| policy_entropy     | 0.6593151      |
| policy_loss        | -0.0011349177  |
| serial_timesteps   | 52500          |
| time_elapsed       | 200            |
| total_timesteps    | 211680         |
| value_loss         | 0.0040021925   |


--------------------------------------
| approxkl           | 1.966278e-05  |
| clipfrac           | 0.0           |
| explained_variance | -0.34         |
| fps                | 981           |
| nupdates           | 225           |
| policy_entropy     | 0.6048838     |
| policy_loss        | -0.0009436711 |
| serial_timesteps   | 56250         |
| time_elapsed       | 216           |
| total_timesteps    | 226800        |
| value_loss         | 0.002613995   |
--------------------------------------
--------------------------------------
| approxkl           | 8.151008e-06  |
| clipfrac           | 0.0           |
| explained_variance | -0.25         |
| fps                | 958           |
| nupdates           | 226           |
| policy_entropy     | 0.60030377    |
| policy_loss        | -0.0010200943 |
| serial_timesteps   | 56500         |
| time_elapsed       | 217           |
| total_timesteps    | 227808        |
| value_loss         | 0.0022102345  |
-------------------------

--------------------------------------
| approxkl           | 2.6129046e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.0185        |
| fps                | 942           |
| nupdates           | 241           |
| policy_entropy     | 0.5459329     |
| policy_loss        | -0.0008136635 |
| serial_timesteps   | 60250         |
| time_elapsed       | 232           |
| total_timesteps    | 242928        |
| value_loss         | 0.0024741772  |
--------------------------------------
--------------------------------------
| approxkl           | 9.721679e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.0259        |
| fps                | 971           |
| nupdates           | 242           |
| policy_entropy     | 0.5413947     |
| policy_loss        | -0.0013479438 |
| serial_timesteps   | 60500         |
| time_elapsed       | 233           |
| total_timesteps    | 243936        |
| value_loss         | 0.0020271512  |
-------------------------

--------------------------------------
| approxkl           | 8.69224e-06   |
| clipfrac           | 0.0           |
| explained_variance | -0.118        |
| fps                | 962           |
| nupdates           | 258           |
| policy_entropy     | 0.48365784    |
| policy_loss        | -0.0010453156 |
| serial_timesteps   | 64500         |
| time_elapsed       | 249           |
| total_timesteps    | 260064        |
| value_loss         | 0.0016101119  |
--------------------------------------
---------------------------------------
| approxkl           | 2.399393e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.00595        |
| fps                | 974            |
| nupdates           | 259            |
| policy_entropy     | 0.47922945     |
| policy_loss        | -0.00058614777 |
| serial_timesteps   | 64750          |
| time_elapsed       | 250            |
| total_timesteps    | 261072         |
| value_loss         | 0.017587058    |
-------------

---------------------------------------
| approxkl           | 6.9904327e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.00172        |
| fps                | 1035           |
| nupdates           | 275            |
| policy_entropy     | 0.42100656     |
| policy_loss        | -0.00028790542 |
| serial_timesteps   | 68750          |
| time_elapsed       | 266            |
| total_timesteps    | 277200         |
| value_loss         | 0.015005831    |
---------------------------------------
---------------------------------------
| approxkl           | 9.686957e-06   |
| clipfrac           | 0.0            |
| explained_variance | -0.0125        |
| fps                | 967            |
| nupdates           | 276            |
| policy_entropy     | 0.41860443     |
| policy_loss        | -0.00018022324 |
| serial_timesteps   | 69000          |
| time_elapsed       | 267            |
| total_timesteps    | 278208         |
| value_loss         | 0.013346184    |


--------------------------------------
| approxkl           | 1.4765133e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.00502      |
| fps                | 1047          |
| nupdates           | 292           |
| policy_entropy     | 0.36412388    |
| policy_loss        | -0.0003641159 |
| serial_timesteps   | 73000         |
| time_elapsed       | 283           |
| total_timesteps    | 294336        |
| value_loss         | 0.01329381    |
--------------------------------------
--------------------------------------
| approxkl           | 1.1774027e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.0206        |
| fps                | 1009          |
| nupdates           | 293           |
| policy_entropy     | 0.36094368    |
| policy_loss        | -0.0008743335 |
| serial_timesteps   | 73250         |
| time_elapsed       | 284           |
| total_timesteps    | 295344        |
| value_loss         | 0.0011318295  |
-------------------------

--------------------------------------
| approxkl           | 1.403853e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.00314       |
| fps                | 993           |
| nupdates           | 309           |
| policy_entropy     | 0.30766854    |
| policy_loss        | -0.0009013333 |
| serial_timesteps   | 77250         |
| time_elapsed       | 300           |
| total_timesteps    | 311472        |
| value_loss         | 0.0009496292  |
--------------------------------------
--------------------------------------
| approxkl           | 1.1505436e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.00677       |
| fps                | 1001          |
| nupdates           | 310           |
| policy_entropy     | 0.30327934    |
| policy_loss        | -0.0010666545 |
| serial_timesteps   | 77500         |
| time_elapsed       | 301           |
| total_timesteps    | 312480        |
| value_loss         | 0.0010366327  |
-------------------------

--------------------------------------
| approxkl           | 9.553652e-06  |
| clipfrac           | 0.0           |
| explained_variance | -0.0461       |
| fps                | 969           |
| nupdates           | 326           |
| policy_entropy     | 0.24547546    |
| policy_loss        | -0.0012196451 |
| serial_timesteps   | 81500         |
| time_elapsed       | 317           |
| total_timesteps    | 328608        |
| value_loss         | 0.00067803025 |
--------------------------------------
---------------------------------------
| approxkl           | 5.2841497e-06  |
| clipfrac           | 0.0            |
| explained_variance | 0.0172         |
| fps                | 967            |
| nupdates           | 327            |
| policy_entropy     | 0.24075243     |
| policy_loss        | -0.00034094488 |
| serial_timesteps   | 81750          |
| time_elapsed       | 318            |
| total_timesteps    | 329616         |
| value_loss         | 0.007981266    |
-------------

--------------------------------------
| approxkl           | 1.3512471e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.0246        |
| fps                | 1015          |
| nupdates           | 342           |
| policy_entropy     | 0.18900347    |
| policy_loss        | -0.0012254748 |
| serial_timesteps   | 85500         |
| time_elapsed       | 333           |
| total_timesteps    | 344736        |
| value_loss         | 0.0005225834  |
--------------------------------------
---------------------------------------
| approxkl           | 4.787009e-06   |
| clipfrac           | 0.0            |
| explained_variance | 0.0398         |
| fps                | 988            |
| nupdates           | 343            |
| policy_entropy     | 0.18482102     |
| policy_loss        | -0.00020209869 |
| serial_timesteps   | 85750          |
| time_elapsed       | 334            |
| total_timesteps    | 345744         |
| value_loss         | 0.006009347    |
-------------

--------------------------------------
| approxkl           | 1.2596392e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.0365        |
| fps                | 998           |
| nupdates           | 358           |
| policy_entropy     | 0.13353094    |
| policy_loss        | -0.0010744937 |
| serial_timesteps   | 89500         |
| time_elapsed       | 349           |
| total_timesteps    | 360864        |
| value_loss         | 0.0004810217  |
--------------------------------------
---------------------------------------
| approxkl           | 2.6636726e-06  |
| clipfrac           | 0.0            |
| explained_variance | -0.0384        |
| fps                | 1047           |
| nupdates           | 359            |
| policy_entropy     | 0.12930372     |
| policy_loss        | -0.00022701992 |
| serial_timesteps   | 89750          |
| time_elapsed       | 350            |
| total_timesteps    | 361872         |
| value_loss         | 0.004980671    |
-------------

--------------------------------------
| approxkl           | 6.799852e-05  |
| clipfrac           | 0.0           |
| explained_variance | -0.0928       |
| fps                | 1035          |
| nupdates           | 374           |
| policy_entropy     | 0.07510652    |
| policy_loss        | -0.0016629584 |
| serial_timesteps   | 93500         |
| time_elapsed       | 365           |
| total_timesteps    | 376992        |
| value_loss         | 0.0006482456  |
--------------------------------------
--------------------------------------
| approxkl           | 6.7866e-05    |
| clipfrac           | 0.0           |
| explained_variance | 0.01          |
| fps                | 1056          |
| nupdates           | 375           |
| policy_entropy     | 0.07049365    |
| policy_loss        | -0.0007083533 |
| serial_timesteps   | 93750         |
| time_elapsed       | 366           |
| total_timesteps    | 378000        |
| value_loss         | 0.004227478   |
-------------------------

--------------------------------------
| approxkl           | 6.663343e-05  |
| clipfrac           | 0.0           |
| explained_variance | -0.049        |
| fps                | 999           |
| nupdates           | 390           |
| policy_entropy     | 0.01832562    |
| policy_loss        | -0.0017590623 |
| serial_timesteps   | 97500         |
| time_elapsed       | 381           |
| total_timesteps    | 393120        |
| value_loss         | 0.00029172216 |
--------------------------------------
---------------------------------------
| approxkl           | 0.00023673964  |
| clipfrac           | 0.0            |
| explained_variance | -0.0192        |
| fps                | 973            |
| nupdates           | 391            |
| policy_entropy     | 0.014450654    |
| policy_loss        | -0.00057695655 |
| serial_timesteps   | 97750          |
| time_elapsed       | 382            |
| total_timesteps    | 394128         |
| value_loss         | 0.0032369972   |
-------------

--------------------------------------
| approxkl           | 1.5935959e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.00907       |
| fps                | 968           |
| nupdates           | 406           |
| policy_entropy     | -0.035086952  |
| policy_loss        | -0.001305053  |
| serial_timesteps   | 101500        |
| time_elapsed       | 396           |
| total_timesteps    | 409248        |
| value_loss         | 0.00025802405 |
--------------------------------------
--------------------------------------
| approxkl           | 3.1751977e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.0185       |
| fps                | 1000          |
| nupdates           | 407           |
| policy_entropy     | -0.039408453  |
| policy_loss        | -0.0004059174 |
| serial_timesteps   | 101750        |
| time_elapsed       | 397           |
| total_timesteps    | 410256        |
| value_loss         | 0.0026992136  |
-------------------------

--------------------------------------
| approxkl           | 1.8806306e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.00303      |
| fps                | 1047          |
| nupdates           | 422           |
| policy_entropy     | -0.08838909   |
| policy_loss        | -0.0010296567 |
| serial_timesteps   | 105500        |
| time_elapsed       | 412           |
| total_timesteps    | 425376        |
| value_loss         | 0.00029795966 |
--------------------------------------
--------------------------------------
| approxkl           | 6.901731e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.00324       |
| fps                | 1127          |
| nupdates           | 423           |
| policy_entropy     | -0.092595756  |
| policy_loss        | -0.0006960058 |
| serial_timesteps   | 105750        |
| time_elapsed       | 413           |
| total_timesteps    | 426384        |
| value_loss         | 0.0020118016  |
-------------------------

--------------------------------------
| approxkl           | 0.00012410118 |
| clipfrac           | 0.0           |
| explained_variance | 0.00594       |
| fps                | 1007          |
| nupdates           | 439           |
| policy_entropy     | -0.14791499   |
| policy_loss        | -0.0007138683 |
| serial_timesteps   | 109750        |
| time_elapsed       | 429           |
| total_timesteps    | 442512        |
| value_loss         | 0.0017163081  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0001327228  |
| clipfrac           | 0.0           |
| explained_variance | -0.0148       |
| fps                | 1025          |
| nupdates           | 440           |
| policy_entropy     | -0.15065736   |
| policy_loss        | -0.0006813606 |
| serial_timesteps   | 110000        |
| time_elapsed       | 430           |
| total_timesteps    | 443520        |
| value_loss         | 0.001527582   |
-------------------------

--------------------------------------
| approxkl           | 1.4526845e-06 |
| clipfrac           | 0.0           |
| explained_variance | 0.00264       |
| fps                | 1002          |
| nupdates           | 456           |
| policy_entropy     | -0.20476255   |
| policy_loss        | -0.000145403  |
| serial_timesteps   | 114000        |
| time_elapsed       | 446           |
| total_timesteps    | 459648        |
| value_loss         | 0.0011834353  |
--------------------------------------
--------------------------------------
| approxkl           | 2.1906944e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.0727       |
| fps                | 1008          |
| nupdates           | 457           |
| policy_entropy     | -0.2072545    |
| policy_loss        | -0.0010849643 |
| serial_timesteps   | 114250        |
| time_elapsed       | 447           |
| total_timesteps    | 460656        |
| value_loss         | 0.00020812197 |
-------------------------

---------------------------------------
| approxkl           | 0.00010122939  |
| clipfrac           | 0.0            |
| explained_variance | 0.00829        |
| fps                | 1027           |
| nupdates           | 473            |
| policy_entropy     | -0.2606537     |
| policy_loss        | -0.00068175455 |
| serial_timesteps   | 118250         |
| time_elapsed       | 463            |
| total_timesteps    | 476784         |
| value_loss         | 7.319355e-05   |
---------------------------------------
--------------------------------------
| approxkl           | 5.311819e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.0381        |
| fps                | 984           |
| nupdates           | 474           |
| policy_entropy     | -0.26487386   |
| policy_loss        | -0.0015868336 |
| serial_timesteps   | 118500        |
| time_elapsed       | 464           |
| total_timesteps    | 477792        |
| value_loss         | 6.523513e-05  |
------------

---------------------------------------
| approxkl           | 5.495788e-06   |
| clipfrac           | 0.0            |
| explained_variance | -0.033         |
| fps                | 1081           |
| nupdates           | 489            |
| policy_entropy     | -0.3180645     |
| policy_loss        | -0.00077739754 |
| serial_timesteps   | 122250         |
| time_elapsed       | 478            |
| total_timesteps    | 492912         |
| value_loss         | 9.537649e-05   |
---------------------------------------
--------------------------------------
| approxkl           | 1.2966109e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.0442       |
| fps                | 1032          |
| nupdates           | 490           |
| policy_entropy     | -0.32269225   |
| policy_loss        | -0.0012872308 |
| serial_timesteps   | 122500        |
| time_elapsed       | 479           |
| total_timesteps    | 493920        |
| value_loss         | 0.00017140049 |
------------

--------------------------------------
| approxkl           | 0.00019783931 |
| clipfrac           | 0.0           |
| explained_variance | -0.034        |
| fps                | 974           |
| nupdates           | 505           |
| policy_entropy     | -0.37220418   |
| policy_loss        | -0.0018798652 |
| serial_timesteps   | 126250        |
| time_elapsed       | 494           |
| total_timesteps    | 509040        |
| value_loss         | 6.840723e-05  |
--------------------------------------
-------------------------------------
| approxkl           | 2.278329e-05 |
| clipfrac           | 0.0          |
| explained_variance | 0.046        |
| fps                | 1027         |
| nupdates           | 506          |
| policy_entropy     | -0.37623504  |
| policy_loss        | -0.000799131 |
| serial_timesteps   | 126500       |
| time_elapsed       | 495          |
| total_timesteps    | 510048       |
| value_loss         | 7.095365e-05 |
-------------------------------------

---------------------------------------
| approxkl           | 9.392026e-06   |
| clipfrac           | 0.0            |
| explained_variance | -0.036         |
| fps                | 1021           |
| nupdates           | 522            |
| policy_entropy     | -0.4292115     |
| policy_loss        | -0.00089526887 |
| serial_timesteps   | 130500         |
| time_elapsed       | 511            |
| total_timesteps    | 526176         |
| value_loss         | 4.9632745e-05  |
---------------------------------------
---------------------------------------
| approxkl           | 2.649828e-05   |
| clipfrac           | 0.0            |
| explained_variance | -0.0088        |
| fps                | 992            |
| nupdates           | 523            |
| policy_entropy     | -0.43343285    |
| policy_loss        | -0.00046550934 |
| serial_timesteps   | 130750         |
| time_elapsed       | 512            |
| total_timesteps    | 527184         |
| value_loss         | 0.00049504626  |


--------------------------------------
| approxkl           | 3.846538e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.00568       |
| fps                | 979           |
| nupdates           | 538           |
| policy_entropy     | -0.48393908   |
| policy_loss        | -0.0012755503 |
| serial_timesteps   | 134500        |
| time_elapsed       | 527           |
| total_timesteps    | 542304        |
| value_loss         | 4.614283e-05  |
--------------------------------------
---------------------------------------
| approxkl           | 3.4518875e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.0173         |
| fps                | 943            |
| nupdates           | 539            |
| policy_entropy     | -0.48825878    |
| policy_loss        | -0.00027495998 |
| serial_timesteps   | 134750         |
| time_elapsed       | 528            |
| total_timesteps    | 543312         |
| value_loss         | 0.0004001754   |
-------------

--------------------------------------
| approxkl           | 1.284999e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.00881       |
| fps                | 1003          |
| nupdates           | 554           |
| policy_entropy     | -0.5368097    |
| policy_loss        | -0.0011309896 |
| serial_timesteps   | 138500        |
| time_elapsed       | 543           |
| total_timesteps    | 558432        |
| value_loss         | 3.1406227e-05 |
--------------------------------------
---------------------------------------
| approxkl           | 5.3350337e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.00588       |
| fps                | 996            |
| nupdates           | 555            |
| policy_entropy     | -0.54092485    |
| policy_loss        | -0.00041077958 |
| serial_timesteps   | 138750         |
| time_elapsed       | 544            |
| total_timesteps    | 559440         |
| value_loss         | 0.00033237916  |
-------------

--------------------------------------
| approxkl           | 8.914465e-06  |
| clipfrac           | 0.0           |
| explained_variance | -0.0381       |
| fps                | 1039          |
| nupdates           | 570           |
| policy_entropy     | -0.59012103   |
| policy_loss        | -0.0011067089 |
| serial_timesteps   | 142500        |
| time_elapsed       | 559           |
| total_timesteps    | 574560        |
| value_loss         | 2.8568906e-05 |
--------------------------------------
--------------------------------------
| approxkl           | 6.8032923e-06 |
| clipfrac           | 0.0           |
| explained_variance | -0.00684      |
| fps                | 957           |
| nupdates           | 571           |
| policy_entropy     | -0.5945012    |
| policy_loss        | -0.0004031076 |
| serial_timesteps   | 142750        |
| time_elapsed       | 560           |
| total_timesteps    | 575568        |
| value_loss         | 0.00029063353 |
-------------------------

--------------------------------------
| approxkl           | 0.00020360839 |
| clipfrac           | 0.0           |
| explained_variance | 0.00821       |
| fps                | 1005          |
| nupdates           | 586           |
| policy_entropy     | -0.6440298    |
| policy_loss        | -0.0019372992 |
| serial_timesteps   | 146500        |
| time_elapsed       | 574           |
| total_timesteps    | 590688        |
| value_loss         | 2.269446e-05  |
--------------------------------------
---------------------------------------
| approxkl           | 1.807701e-05   |
| clipfrac           | 0.0            |
| explained_variance | -0.0131        |
| fps                | 1041           |
| nupdates           | 587            |
| policy_entropy     | -0.6480146     |
| policy_loss        | -0.00025261016 |
| serial_timesteps   | 146750         |
| time_elapsed       | 575            |
| total_timesteps    | 591696         |
| value_loss         | 0.00022795702  |
-------------

--------------------------------------
| approxkl           | 8.398565e-06  |
| clipfrac           | 0.0           |
| explained_variance | 0.0637        |
| fps                | 1019          |
| nupdates           | 602           |
| policy_entropy     | -0.69843596   |
| policy_loss        | -0.0010164022 |
| serial_timesteps   | 150500        |
| time_elapsed       | 590           |
| total_timesteps    | 606816        |
| value_loss         | 2.0243973e-05 |
--------------------------------------
---------------------------------------
| approxkl           | 1.7832243e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.009          |
| fps                | 991            |
| nupdates           | 603            |
| policy_entropy     | -0.70250285    |
| policy_loss        | -0.00037204984 |
| serial_timesteps   | 150750         |
| time_elapsed       | 591            |
| total_timesteps    | 607824         |
| value_loss         | 0.00017399395  |
-------------

--------------------------------------
| approxkl           | 0.00024054913 |
| clipfrac           | 0.0           |
| explained_variance | -0.028        |
| fps                | 955           |
| nupdates           | 618           |
| policy_entropy     | -0.7494705    |
| policy_loss        | -0.002191721  |
| serial_timesteps   | 154500        |
| time_elapsed       | 606           |
| total_timesteps    | 622944        |
| value_loss         | 1.9520268e-05 |
--------------------------------------
--------------------------------------
| approxkl           | 0.00046122455 |
| clipfrac           | 0.0           |
| explained_variance | 0.00485       |
| fps                | 1066          |
| nupdates           | 619           |
| policy_entropy     | -0.7532563    |
| policy_loss        | 0.0006172818  |
| serial_timesteps   | 154750        |
| time_elapsed       | 607           |
| total_timesteps    | 623952        |
| value_loss         | 0.00012679856 |
-------------------------

--------------------------------------
| approxkl           | 0.00022065153 |
| clipfrac           | 0.0           |
| explained_variance | -0.00749      |
| fps                | 1019          |
| nupdates           | 635           |
| policy_entropy     | -0.8029768    |
| policy_loss        | -0.0003365555 |
| serial_timesteps   | 158750        |
| time_elapsed       | 623           |
| total_timesteps    | 640080        |
| value_loss         | 0.00012664581 |
--------------------------------------
---------------------------------------
| approxkl           | 2.1207012e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.00647        |
| fps                | 969            |
| nupdates           | 636            |
| policy_entropy     | -0.80599004    |
| policy_loss        | -0.00010423699 |
| serial_timesteps   | 159000         |
| time_elapsed       | 624            |
| total_timesteps    | 641088         |
| value_loss         | 0.000116178664 |
-------------

---------------------------------------
| approxkl           | 4.3157434e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.0136        |
| fps                | 977            |
| nupdates           | 651            |
| policy_entropy     | -0.84917104    |
| policy_loss        | -9.987372e-05  |
| serial_timesteps   | 162750         |
| time_elapsed       | 639            |
| total_timesteps    | 656208         |
| value_loss         | 0.000109344684 |
---------------------------------------
--------------------------------------
| approxkl           | 7.3808624e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.0112       |
| fps                | 1033          |
| nupdates           | 652           |
| policy_entropy     | -0.8516544    |
| policy_loss        | -0.000547639  |
| serial_timesteps   | 163000        |
| time_elapsed       | 640           |
| total_timesteps    | 657216        |
| value_loss         | 0.00010836651 |
------------

--------------------------------------
| approxkl           | 4.0502737e-06 |
| clipfrac           | 0.0           |
| explained_variance | 4.11e-05      |
| fps                | 953           |
| nupdates           | 668           |
| policy_entropy     | -0.90262973   |
| policy_loss        | -8.87841e-05  |
| serial_timesteps   | 167000        |
| time_elapsed       | 656           |
| total_timesteps    | 673344        |
| value_loss         | 7.968969e-05  |
--------------------------------------
---------------------------------------
| approxkl           | 7.05438e-06    |
| clipfrac           | 0.0            |
| explained_variance | 0.0146         |
| fps                | 1032           |
| nupdates           | 669            |
| policy_entropy     | -0.9048386     |
| policy_loss        | -0.00059212546 |
| serial_timesteps   | 167250         |
| time_elapsed       | 657            |
| total_timesteps    | 674352         |
| value_loss         | 5.6625604e-06  |
-------------

--------------------------------------
| approxkl           | 2.847188e-06  |
| clipfrac           | 0.0           |
| explained_variance | 0.0347        |
| fps                | 983           |
| nupdates           | 685           |
| policy_entropy     | -0.9553966    |
| policy_loss        | -0.0006206626 |
| serial_timesteps   | 171250        |
| time_elapsed       | 673           |
| total_timesteps    | 690480        |
| value_loss         | 8.214842e-06  |
--------------------------------------
--------------------------------------
| approxkl           | 1.2287267e-05 |
| clipfrac           | 0.0           |
| explained_variance | 0.00261       |
| fps                | 982           |
| nupdates           | 686           |
| policy_entropy     | -0.9591349    |
| policy_loss        | -0.0009147455 |
| serial_timesteps   | 171500        |
| time_elapsed       | 674           |
| total_timesteps    | 691488        |
| value_loss         | 4.775978e-06  |
-------------------------

---------------------------------------
| approxkl           | 3.4508805e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.0103        |
| fps                | 1082           |
| nupdates           | 702            |
| policy_entropy     | -1.0123086     |
| policy_loss        | -0.00089674187 |
| serial_timesteps   | 175500         |
| time_elapsed       | 690            |
| total_timesteps    | 707616         |
| value_loss         | 9.573334e-06   |
---------------------------------------
--------------------------------------
| approxkl           | 0.00012268449 |
| clipfrac           | 0.0           |
| explained_variance | 0.0179        |
| fps                | 1026          |
| nupdates           | 703           |
| policy_entropy     | -1.0163138    |
| policy_loss        | -0.0008254586 |
| serial_timesteps   | 175750        |
| time_elapsed       | 690           |
| total_timesteps    | 708624        |
| value_loss         | 5.0028266e-05 |
------------

---------------------------------------
| approxkl           | 9.683156e-05   |
| clipfrac           | 0.0            |
| explained_variance | -0.258         |
| fps                | 1041           |
| nupdates           | 718            |
| policy_entropy     | -1.0648394     |
| policy_loss        | -0.00092396897 |
| serial_timesteps   | 179500         |
| time_elapsed       | 705            |
| total_timesteps    | 723744         |
| value_loss         | 4.3988516e-06  |
---------------------------------------
--------------------------------------
| approxkl           | 0.0006499819  |
| clipfrac           | 0.0           |
| explained_variance | -0.0135       |
| fps                | 1004          |
| nupdates           | 719           |
| policy_entropy     | -1.0684223    |
| policy_loss        | -0.0013733184 |
| serial_timesteps   | 179750        |
| time_elapsed       | 706           |
| total_timesteps    | 724752        |
| value_loss         | 3.973817e-05  |
------------

--------------------------------------
| approxkl           | 0.0006990861  |
| clipfrac           | 0.0005        |
| explained_variance | 0.0176        |
| fps                | 983           |
| nupdates           | 734           |
| policy_entropy     | -1.1141055    |
| policy_loss        | -0.0028021645 |
| serial_timesteps   | 183500        |
| time_elapsed       | 721           |
| total_timesteps    | 739872        |
| value_loss         | 3.2395565e-06 |
--------------------------------------
---------------------------------------
| approxkl           | 0.00014661616  |
| clipfrac           | 0.0            |
| explained_variance | -0.0196        |
| fps                | 984            |
| nupdates           | 735            |
| policy_entropy     | -1.1179278     |
| policy_loss        | -0.00033946647 |
| serial_timesteps   | 183750         |
| time_elapsed       | 723            |
| total_timesteps    | 740880         |
| value_loss         | 2.6913252e-05  |
-------------

--------------------------------------
| approxkl           | 0.00011673126 |
| clipfrac           | 0.0           |
| explained_variance | 0.0759        |
| fps                | 978           |
| nupdates           | 750           |
| policy_entropy     | -1.1665115    |
| policy_loss        | -0.0013704735 |
| serial_timesteps   | 187500        |
| time_elapsed       | 738           |
| total_timesteps    | 756000        |
| value_loss         | 3.869787e-06  |
--------------------------------------
--------------------------------------
| approxkl           | 1.2389453e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.0066       |
| fps                | 1012          |
| nupdates           | 751           |
| policy_entropy     | -1.170866     |
| policy_loss        | -0.0002859414 |
| serial_timesteps   | 187750        |
| time_elapsed       | 739           |
| total_timesteps    | 757008        |
| value_loss         | 2.5231137e-05 |
-------------------------

--------------------------------------
| approxkl           | 1.82114e-05   |
| clipfrac           | 0.0           |
| explained_variance | -0.0574       |
| fps                | 962           |
| nupdates           | 766           |
| policy_entropy     | -1.2180457    |
| policy_loss        | -0.0007579294 |
| serial_timesteps   | 191500        |
| time_elapsed       | 753           |
| total_timesteps    | 772128        |
| value_loss         | 1.8405734e-06 |
--------------------------------------
--------------------------------------
| approxkl           | 4.375123e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.00458       |
| fps                | 1008          |
| nupdates           | 767           |
| policy_entropy     | -1.2216666    |
| policy_loss        | -0.0004900841 |
| serial_timesteps   | 191750        |
| time_elapsed       | 754           |
| total_timesteps    | 773136        |
| value_loss         | 1.5647762e-05 |
-------------------------

---------------------------------------
| approxkl           | 5.733396e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.0196         |
| fps                | 1032           |
| nupdates           | 783            |
| policy_entropy     | -1.2737643     |
| policy_loss        | -0.00015446066 |
| serial_timesteps   | 195750         |
| time_elapsed       | 771            |
| total_timesteps    | 789264         |
| value_loss         | 1.0072691e-05  |
---------------------------------------
---------------------------------------
| approxkl           | 2.680566e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.00664        |
| fps                | 1010           |
| nupdates           | 784            |
| policy_entropy     | -1.2756279     |
| policy_loss        | -0.00027343706 |
| serial_timesteps   | 196000         |
| time_elapsed       | 772            |
| total_timesteps    | 790272         |
| value_loss         | 1.7506625e-05  |


--------------------------------------
| approxkl           | 0.0052329977  |
| clipfrac           | 0.07125       |
| explained_variance | 0.00627       |
| fps                | 983           |
| nupdates           | 800           |
| policy_entropy     | -1.3121966    |
| policy_loss        | -0.0053957636 |
| serial_timesteps   | 200000        |
| time_elapsed       | 788           |
| total_timesteps    | 806400        |
| value_loss         | 1.7125933e-05 |
--------------------------------------
-------------------------------------
| approxkl           | 0.0009893419 |
| clipfrac           | 0.00175      |
| explained_variance | -0.0402      |
| fps                | 1012         |
| nupdates           | 801          |
| policy_entropy     | -1.3144875   |
| policy_loss        | -0.002191648 |
| serial_timesteps   | 200250       |
| time_elapsed       | 789          |
| total_timesteps    | 807408       |
| value_loss         | 1.278934e-06 |
-------------------------------------

--------------------------------------
| approxkl           | 7.0373176e-06 |
| clipfrac           | 0.0           |
| explained_variance | -0.0774       |
| fps                | 1019          |
| nupdates           | 817           |
| policy_entropy     | -1.3585291    |
| policy_loss        | -0.00078386   |
| serial_timesteps   | 204250        |
| time_elapsed       | 805           |
| total_timesteps    | 823536        |
| value_loss         | 7.0883384e-06 |
--------------------------------------
---------------------------------------
| approxkl           | 4.868405e-05   |
| clipfrac           | 0.0            |
| explained_variance | -0.0128        |
| fps                | 978            |
| nupdates           | 818            |
| policy_entropy     | -1.3618877     |
| policy_loss        | -5.3705087e-05 |
| serial_timesteps   | 204500         |
| time_elapsed       | 806            |
| total_timesteps    | 824544         |
| value_loss         | 2.4736511e-05  |
-------------

---------------------------------------
| approxkl           | 4.8281813e-06  |
| clipfrac           | 0.0            |
| explained_variance | -0.0991        |
| fps                | 1002           |
| nupdates           | 833            |
| policy_entropy     | -1.4014493     |
| policy_loss        | -0.00077255536 |
| serial_timesteps   | 208250         |
| time_elapsed       | 821            |
| total_timesteps    | 839664         |
| value_loss         | 8.9752916e-07  |
---------------------------------------
--------------------------------------
| approxkl           | 9.31937e-06   |
| clipfrac           | 0.0           |
| explained_variance | 0.00718       |
| fps                | 1057          |
| nupdates           | 834           |
| policy_entropy     | -1.4057658    |
| policy_loss        | -0.0002654685 |
| serial_timesteps   | 208500        |
| time_elapsed       | 822           |
| total_timesteps    | 840672        |
| value_loss         | 5.1334705e-06 |
------------

---------------------------------------
| approxkl           | 0.0009453941   |
| clipfrac           | 0.00025        |
| explained_variance | 0.0303         |
| fps                | 1031           |
| nupdates           | 850            |
| policy_entropy     | -1.4514732     |
| policy_loss        | -0.00052220304 |
| serial_timesteps   | 212500         |
| time_elapsed       | 837            |
| total_timesteps    | 856800         |
| value_loss         | 4.1533667e-06  |
---------------------------------------
--------------------------------------
| approxkl           | 0.0011038714  |
| clipfrac           | 0.00325       |
| explained_variance | 0.000258      |
| fps                | 1028          |
| nupdates           | 851           |
| policy_entropy     | -1.454141     |
| policy_loss        | -0.0029133223 |
| serial_timesteps   | 212750        |
| time_elapsed       | 838           |
| total_timesteps    | 857808        |
| value_loss         | 5.2649034e-06 |
------------

--------------------------------------
| approxkl           | 9.9460834e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.00942      |
| fps                | 1072          |
| nupdates           | 866           |
| policy_entropy     | -1.5012711    |
| policy_loss        | -0.0009220578 |
| serial_timesteps   | 216500        |
| time_elapsed       | 853           |
| total_timesteps    | 872928        |
| value_loss         | 5.788692e-06  |
--------------------------------------
---------------------------------------
| approxkl           | 7.3496285e-06  |
| clipfrac           | 0.0            |
| explained_variance | -0.0418        |
| fps                | 1110           |
| nupdates           | 867            |
| policy_entropy     | -1.505409      |
| policy_loss        | -0.00036701278 |
| serial_timesteps   | 216750         |
| time_elapsed       | 854            |
| total_timesteps    | 873936         |
| value_loss         | 5.4147954e-06  |
-------------

---------------------------------------
| approxkl           | 5.3228792e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.0145        |
| fps                | 1001           |
| nupdates           | 883            |
| policy_entropy     | -1.5556743     |
| policy_loss        | -0.00041199726 |
| serial_timesteps   | 220750         |
| time_elapsed       | 869            |
| total_timesteps    | 890064         |
| value_loss         | 2.67949e-06    |
---------------------------------------
----------------------------------------
| approxkl           | 8.676061e-06    |
| clipfrac           | 0.0             |
| explained_variance | 0.0356          |
| fps                | 1010            |
| nupdates           | 884             |
| policy_entropy     | -1.5577484      |
| policy_loss        | -0.000110479894 |
| serial_timesteps   | 221000          |
| time_elapsed       | 870             |
| total_timesteps    | 891072          |
| value_loss         | 5.3753

--------------------------------------
| approxkl           | 0.0010845135  |
| clipfrac           | 0.0015        |
| explained_variance | 0.0513        |
| fps                | 994           |
| nupdates           | 899           |
| policy_entropy     | -1.6060238    |
| policy_loss        | -0.0019177425 |
| serial_timesteps   | 224750        |
| time_elapsed       | 884           |
| total_timesteps    | 906192        |
| value_loss         | 2.73843e-06   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00030627876 |
| clipfrac           | 0.0           |
| explained_variance | 0.0232        |
| fps                | 1056          |
| nupdates           | 900           |
| policy_entropy     | -1.6085464    |
| policy_loss        | -0.0007695021 |
| serial_timesteps   | 225000        |
| time_elapsed       | 885           |
| total_timesteps    | 907200        |
| value_loss         | 4.7314e-06    |
-------------------------

---------------------------------------
| approxkl           | 8.505612e-05   |
| clipfrac           | 0.0            |
| explained_variance | 0.0192         |
| fps                | 1070           |
| nupdates           | 916            |
| policy_entropy     | -1.653836      |
| policy_loss        | -0.00092225586 |
| serial_timesteps   | 229000         |
| time_elapsed       | 900            |
| total_timesteps    | 923328         |
| value_loss         | 3.895761e-06   |
---------------------------------------
--------------------------------------
| approxkl           | 0.00022807058 |
| clipfrac           | 0.0           |
| explained_variance | -0.0142       |
| fps                | 1091          |
| nupdates           | 917           |
| policy_entropy     | -1.6564173    |
| policy_loss        | -0.0005769968 |
| serial_timesteps   | 229250        |
| time_elapsed       | 901           |
| total_timesteps    | 924336        |
| value_loss         | 3.10796e-07   |
------------

--------------------------------------
| approxkl           | 0.00022771496 |
| clipfrac           | 0.0           |
| explained_variance | 0.00693       |
| fps                | 1074          |
| nupdates           | 932           |
| policy_entropy     | -1.6974196    |
| policy_loss        | 9.268275e-05  |
| serial_timesteps   | 233000        |
| time_elapsed       | 915           |
| total_timesteps    | 939456        |
| value_loss         | 3.2270136e-06 |
--------------------------------------
--------------------------------------
| approxkl           | 4.204547e-06  |
| clipfrac           | 0.0           |
| explained_variance | -0.0832       |
| fps                | 1025          |
| nupdates           | 933           |
| policy_entropy     | -1.6998729    |
| policy_loss        | -0.0006104429 |
| serial_timesteps   | 233250        |
| time_elapsed       | 916           |
| total_timesteps    | 940464        |
| value_loss         | 2.8409633e-07 |
-------------------------

--------------------------------------
| approxkl           | 0.00022059324 |
| clipfrac           | 0.0           |
| explained_variance | -0.0084       |
| fps                | 1032          |
| nupdates           | 949           |
| policy_entropy     | -1.7440851    |
| policy_loss        | -0.0015625472 |
| serial_timesteps   | 237250        |
| time_elapsed       | 931           |
| total_timesteps    | 956592        |
| value_loss         | 2.7410067e-07 |
--------------------------------------
--------------------------------------
| approxkl           | 0.00038951996 |
| clipfrac           | 0.0005        |
| explained_variance | -0.0732       |
| fps                | 1042          |
| nupdates           | 950           |
| policy_entropy     | -1.7472224    |
| policy_loss        | -0.0014846076 |
| serial_timesteps   | 237500        |
| time_elapsed       | 932           |
| total_timesteps    | 957600        |
| value_loss         | 2.1821256e-06 |
-------------------------

--------------------------------------
| approxkl           | 0.00022672713 |
| clipfrac           | 0.0           |
| explained_variance | 0.013         |
| fps                | 1027          |
| nupdates           | 966           |
| policy_entropy     | -1.7891467    |
| policy_loss        | -0.0005820679 |
| serial_timesteps   | 241500        |
| time_elapsed       | 947           |
| total_timesteps    | 973728        |
| value_loss         | 1.1937735e-05 |
--------------------------------------
---------------------------------------
| approxkl           | 2.2347773e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.022         |
| fps                | 1071           |
| nupdates           | 967            |
| policy_entropy     | -1.7917259     |
| policy_loss        | -0.00027649768 |
| serial_timesteps   | 241750         |
| time_elapsed       | 948            |
| total_timesteps    | 974736         |
| value_loss         | 1.699467e-06   |
-------------

-------------------------------------
| approxkl           | 0.0006665159 |
| clipfrac           | 0.00075      |
| explained_variance | 0.00276      |
| fps                | 1109         |
| nupdates           | 982          |
| policy_entropy     | -1.8371931   |
| policy_loss        | -0.001337591 |
| serial_timesteps   | 245500       |
| time_elapsed       | 962          |
| total_timesteps    | 989856       |
| value_loss         | 1.291048e-06 |
-------------------------------------
---------------------------------------
| approxkl           | 7.043872e-05   |
| clipfrac           | 0.0            |
| explained_variance | -0.00576       |
| fps                | 1069           |
| nupdates           | 983            |
| policy_entropy     | -1.8401947     |
| policy_loss        | -0.00024895513 |
| serial_timesteps   | 245750         |
| time_elapsed       | 963            |
| total_timesteps    | 990864         |
| value_loss         | 1.1393109e-06  |
--------------------------

--------------------------------------
| approxkl           | 0.00096596475 |
| clipfrac           | 0.0019999999  |
| explained_variance | -0.00613      |
| fps                | 994           |
| nupdates           | 999           |
| policy_entropy     | -1.8856413    |
| policy_loss        | -0.0026085603 |
| serial_timesteps   | 249750        |
| time_elapsed       | 978           |
| total_timesteps    | 1006992       |
| value_loss         | 5.0535677e-06 |
--------------------------------------
--------------------------------------
| approxkl           | 0.0008803947  |
| clipfrac           | 0.0           |
| explained_variance | -0.0203       |
| fps                | 1028          |
| nupdates           | 1000          |
| policy_entropy     | -1.8874116    |
| policy_loss        | 4.0823055e-05 |
| serial_timesteps   | 250000        |
| time_elapsed       | 979           |
| total_timesteps    | 1008000       |
| value_loss         | 8.413224e-07  |
-------------------------

In [11]:
model = PPO.load(final_model_dir)

# Enjoy trained agent
env = gym.make(env_name)
obs = env.reset()
dones = False
while not dones:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    print(dones)
    env.render()

Loading a model without an environment, this model cannot be trained until it has a valid environment.
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


ValueError: too many values to unpack (expected 4)