In [1]:
import os
import gym
from IPython import display
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import SubprocVecEnv
from stable_baselines.common.vec_env import DummyVecEnv
from utils.ppo import PPO
from utils.models import Policy, CNNPolicy

In [2]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [3]:
LOGS = os.getcwd()

In [4]:
def makedirs(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [5]:
env_name = 'LunarLanderContinuous-v2'
run_id = 1
n_steps = 250
total_timesteps = 1000000
cnn_policy = False

LOGS = os.path.join(LOGS, env_name, 'run{}'.format(run_id))
makedirs(LOGS)
tb_log = os.path.join(LOGS, 'tb')
makedirs(tb_log)
model_dir = os.path.join(LOGS, 'models')
makedirs(model_dir)
final_model_dir = os.path.join(LOGS, 'model')
n_cpu = 4

In [6]:
env = SubprocVecEnv([lambda: gym.make(env_name) for i in range(n_cpu)])

if cnn_policy:
    print('Using CNN policy network')
    model = PPO(CNNPolicy, env, n_steps=n_steps, tensorboard_log=tb_log, verbose=1, full_tensorboard_log=True)
else:
    print('Using MLP policy network')
    model = PPO(Policy, env, n_steps=n_steps, tensorboard_log=tb_log, verbose=1, full_tensorboard_log=True)
model.learn(total_timesteps, env, save_file=os.path.join(model_dir, 'model'))
model.save(final_model_dir)
del model # remove to demonstrate saving and loading

Using MLP policy network
INFO:tensorflow:Summary name model/pi_fc0/w:0 is illegal; using model/pi_fc0/w_0 instead.
INFO:tensorflow:Summary name model/pi_fc0/b:0 is illegal; using model/pi_fc0/b_0 instead.
INFO:tensorflow:Summary name model/vf_fc0/w:0 is illegal; using model/vf_fc0/w_0 instead.
INFO:tensorflow:Summary name model/vf_fc0/b:0 is illegal; using model/vf_fc0/b_0 instead.
INFO:tensorflow:Summary name model/pi_fc1/w:0 is illegal; using model/pi_fc1/w_0 instead.
INFO:tensorflow:Summary name model/pi_fc1/b:0 is illegal; using model/pi_fc1/b_0 instead.
INFO:tensorflow:Summary name model/vf_fc1/w:0 is illegal; using model/vf_fc1/w_0 instead.
INFO:tensorflow:Summary name model/vf_fc1/b:0 is illegal; using model/vf_fc1/b_0 instead.
INFO:tensorflow:Summary name model/vf/w:0 is illegal; using model/vf/w_0 instead.
INFO:tensorflow:Summary name model/vf/b:0 is illegal; using model/vf/b_0 instead.
INFO:tensorflow:Summary name model/pi/w:0 is illegal; using model/pi/w_0 instead.
INFO:tens

--------------------------------------
| approxkl           | 6.547676e-05  |
| clipfrac           | 0.0           |
| explained_variance | -0.0266       |
| fps                | 863           |
| nupdates           | 12            |
| policy_entropy     | 2.8451319     |
| policy_loss        | 0.00045908216 |
| serial_timesteps   | 3000          |
| time_elapsed       | 14.2          |
| total_timesteps    | 12096         |
| value_loss         | 613.09344     |
--------------------------------------
--------------------------------------
| approxkl           | 2.0207011e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.0146       |
| fps                | 814           |
| nupdates           | 13            |
| policy_entropy     | 2.842715      |
| policy_loss        | -0.0005128372 |
| serial_timesteps   | 3250          |
| time_elapsed       | 15.4          |
| total_timesteps    | 13104         |
| value_loss         | 343.76205     |
-------------------------

---------------------------------------
| approxkl           | 1.7470093e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.00923       |
| fps                | 852            |
| nupdates           | 28             |
| policy_entropy     | 2.8534067      |
| policy_loss        | -0.00035596418 |
| serial_timesteps   | 7000           |
| time_elapsed       | 33.4           |
| total_timesteps    | 28224          |
| value_loss         | 291.19534      |
---------------------------------------
---------------------------------------
| approxkl           | 2.0160594e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.0201        |
| fps                | 789            |
| nupdates           | 29             |
| policy_entropy     | 2.8531022      |
| policy_loss        | -0.00016061719 |
| serial_timesteps   | 7250           |
| time_elapsed       | 34.6           |
| total_timesteps    | 29232          |
| value_loss         | 444.83185      |


---------------------------------------
| approxkl           | 2.3114259e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.000354      |
| fps                | 804            |
| nupdates           | 44             |
| policy_entropy     | 2.8561544      |
| policy_loss        | -0.00046216996 |
| serial_timesteps   | 11000          |
| time_elapsed       | 52.9           |
| total_timesteps    | 44352          |
| value_loss         | 309.61893      |
---------------------------------------
--------------------------------------
| approxkl           | 2.1378582e-05 |
| clipfrac           | 0.0           |
| explained_variance | -0.0295       |
| fps                | 879           |
| nupdates           | 45            |
| policy_entropy     | 2.8566282     |
| policy_loss        | -0.0001002972 |
| serial_timesteps   | 11250         |
| time_elapsed       | 54.1          |
| total_timesteps    | 45360         |
| value_loss         | 412.80466     |
------------

--------------------------------------
| approxkl           | 3.658375e-05  |
| clipfrac           | 0.0           |
| explained_variance | 0.038         |
| fps                | 609           |
| nupdates           | 61            |
| policy_entropy     | 2.853183      |
| policy_loss        | -0.0002678751 |
| serial_timesteps   | 15250         |
| time_elapsed       | 74.6          |
| total_timesteps    | 61488         |
| value_loss         | 257.16235     |
--------------------------------------
---------------------------------------
| approxkl           | 3.4376408e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.00535        |
| fps                | 524            |
| nupdates           | 62             |
| policy_entropy     | 2.852376       |
| policy_loss        | -0.00013489905 |
| serial_timesteps   | 15500          |
| time_elapsed       | 76.2           |
| total_timesteps    | 62496          |
| value_loss         | 324.0538       |
-------------

--------------------------------------
| approxkl           | 0.0006655718  |
| clipfrac           | 0.0034999999  |
| explained_variance | 0.0494        |
| fps                | 581           |
| nupdates           | 78            |
| policy_entropy     | 2.847319      |
| policy_loss        | -0.0033562067 |
| serial_timesteps   | 19500         |
| time_elapsed       | 110           |
| total_timesteps    | 78624         |
| value_loss         | 96.28621      |
--------------------------------------
--------------------------------------
| approxkl           | 0.0014667364  |
| clipfrac           | 0.009750001   |
| explained_variance | 0.0751        |
| fps                | 514           |
| nupdates           | 79            |
| policy_entropy     | 2.8436916     |
| policy_loss        | -0.0035269097 |
| serial_timesteps   | 19750         |
| time_elapsed       | 111           |
| total_timesteps    | 79632         |
| value_loss         | 72.18471      |
-------------------------

---------------------------------------
| approxkl           | 0.000781114    |
| clipfrac           | 0.00075        |
| explained_variance | 0.225          |
| fps                | 534            |
| nupdates           | 95             |
| policy_entropy     | 2.8307528      |
| policy_loss        | -0.00040361087 |
| serial_timesteps   | 23750          |
| time_elapsed       | 143            |
| total_timesteps    | 95760          |
| value_loss         | 46.340958      |
---------------------------------------
--------------------------------------
| approxkl           | 0.0012993449  |
| clipfrac           | 0.0075        |
| explained_variance | 0.312         |
| fps                | 454           |
| nupdates           | 96            |
| policy_entropy     | 2.8287055     |
| policy_loss        | -0.0029781156 |
| serial_timesteps   | 24000         |
| time_elapsed       | 145           |
| total_timesteps    | 96768         |
| value_loss         | 42.668777     |
------------

-------------------------------------
| approxkl           | 0.0048033    |
| clipfrac           | 0.05775      |
| explained_variance | 0.221        |
| fps                | 617          |
| nupdates           | 112          |
| policy_entropy     | 2.8231866    |
| policy_loss        | -0.002926089 |
| serial_timesteps   | 28000        |
| time_elapsed       | 178          |
| total_timesteps    | 112896       |
| value_loss         | 28.780752    |
-------------------------------------
--------------------------------------
| approxkl           | 0.0031656523  |
| clipfrac           | 0.040499996   |
| explained_variance | 0.275         |
| fps                | 549           |
| nupdates           | 113           |
| policy_entropy     | 2.8221395     |
| policy_loss        | -0.0026387135 |
| serial_timesteps   | 28250         |
| time_elapsed       | 180           |
| total_timesteps    | 113904        |
| value_loss         | 40.70146      |
--------------------------------------

--------------------------------------
| approxkl           | 0.002578252   |
| clipfrac           | 0.023000002   |
| explained_variance | 0.115         |
| fps                | 513           |
| nupdates           | 129           |
| policy_entropy     | 2.8056257     |
| policy_loss        | -0.0030105733 |
| serial_timesteps   | 32250         |
| time_elapsed       | 211           |
| total_timesteps    | 130032        |
| value_loss         | 11.633674     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0014317841  |
| clipfrac           | 0.0115        |
| explained_variance | 0.183         |
| fps                | 448           |
| nupdates           | 130           |
| policy_entropy     | 2.8081596     |
| policy_loss        | -0.0031917896 |
| serial_timesteps   | 32500         |
| time_elapsed       | 213           |
| total_timesteps    | 131040        |
| value_loss         | 12.396698     |
-------------------------

--------------------------------------
| approxkl           | 0.012165088   |
| clipfrac           | 0.17525       |
| explained_variance | 0.00194       |
| fps                | 412           |
| nupdates           | 146           |
| policy_entropy     | 2.7860184     |
| policy_loss        | -0.0028526438 |
| serial_timesteps   | 36500         |
| time_elapsed       | 249           |
| total_timesteps    | 147168        |
| value_loss         | 5.9576874     |
--------------------------------------
-------------------------------------
| approxkl           | 0.0066359392 |
| clipfrac           | 0.0885       |
| explained_variance | -0.0442      |
| fps                | 426          |
| nupdates           | 147          |
| policy_entropy     | 2.7936692    |
| policy_loss        | 0.0036990372 |
| serial_timesteps   | 36750        |
| time_elapsed       | 251          |
| total_timesteps    | 148176       |
| value_loss         | 11.513737    |
-------------------------------------

--------------------------------------
| approxkl           | 0.0023374434  |
| clipfrac           | 0.014249998   |
| explained_variance | -0.116        |
| fps                | 461           |
| nupdates           | 163           |
| policy_entropy     | 2.782298      |
| policy_loss        | -0.0020771602 |
| serial_timesteps   | 40750         |
| time_elapsed       | 289           |
| total_timesteps    | 164304        |
| value_loss         | 9.238883      |
--------------------------------------
--------------------------------------
| approxkl           | 0.0050876485  |
| clipfrac           | 0.0665        |
| explained_variance | 0.0587        |
| fps                | 385           |
| nupdates           | 164           |
| policy_entropy     | 2.778172      |
| policy_loss        | -0.0028467744 |
| serial_timesteps   | 41000         |
| time_elapsed       | 291           |
| total_timesteps    | 165312        |
| value_loss         | 5.561512      |
-------------------------

----------------------------------------
| approxkl           | 0.00019441811   |
| clipfrac           | 0.0             |
| explained_variance | 0.163           |
| fps                | 435             |
| nupdates           | 180             |
| policy_entropy     | 2.7598448       |
| policy_loss        | -0.000102671605 |
| serial_timesteps   | 45000           |
| time_elapsed       | 330             |
| total_timesteps    | 181440          |
| value_loss         | 5.492949        |
----------------------------------------
---------------------------------------
| approxkl           | 0.00026038993  |
| clipfrac           | 0.0            |
| explained_variance | 0.0491         |
| fps                | 473            |
| nupdates           | 181            |
| policy_entropy     | 2.7590652      |
| policy_loss        | -0.00081794383 |
| serial_timesteps   | 45250          |
| time_elapsed       | 332            |
| total_timesteps    | 182448         |
| value_loss         | 8.58

--------------------------------------
| approxkl           | 0.0033847135  |
| clipfrac           | 0.037         |
| explained_variance | 0.129         |
| fps                | 436           |
| nupdates           | 197           |
| policy_entropy     | 2.7505946     |
| policy_loss        | -0.0008700784 |
| serial_timesteps   | 49250         |
| time_elapsed       | 370           |
| total_timesteps    | 198576        |
| value_loss         | 5.128424      |
--------------------------------------
--------------------------------------
| approxkl           | 0.0011957729  |
| clipfrac           | 0.00775       |
| explained_variance | 0.109         |
| fps                | 382           |
| nupdates           | 198           |
| policy_entropy     | 2.748002      |
| policy_loss        | -0.0006935919 |
| serial_timesteps   | 49500         |
| time_elapsed       | 372           |
| total_timesteps    | 199584        |
| value_loss         | 3.5040803     |
-------------------------

--------------------------------------
| approxkl           | 0.0013395112  |
| clipfrac           | 0.01025       |
| explained_variance | 0.158         |
| fps                | 439           |
| nupdates           | 214           |
| policy_entropy     | 2.7270722     |
| policy_loss        | -0.0005557098 |
| serial_timesteps   | 53500         |
| time_elapsed       | 412           |
| total_timesteps    | 215712        |
| value_loss         | 5.6828704     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0009207411  |
| clipfrac           | 0.0032499998  |
| explained_variance | 0.244         |
| fps                | 594           |
| nupdates           | 215           |
| policy_entropy     | 2.7257707     |
| policy_loss        | -0.0009106191 |
| serial_timesteps   | 53750         |
| time_elapsed       | 414           |
| total_timesteps    | 216720        |
| value_loss         | 4.7356887     |
-------------------------

-------------------------------------
| approxkl           | 0.00623772   |
| clipfrac           | 0.084249996  |
| explained_variance | 0.497        |
| fps                | 481          |
| nupdates           | 231          |
| policy_entropy     | 2.725231     |
| policy_loss        | -0.004315398 |
| serial_timesteps   | 57750        |
| time_elapsed       | 453          |
| total_timesteps    | 232848       |
| value_loss         | 3.0251899    |
-------------------------------------
-------------------------------------
| approxkl           | 0.0030084203 |
| clipfrac           | 0.030249998  |
| explained_variance | 0.367        |
| fps                | 396          |
| nupdates           | 232          |
| policy_entropy     | 2.7210062    |
| policy_loss        | -0.004121919 |
| serial_timesteps   | 58000        |
| time_elapsed       | 455          |
| total_timesteps    | 233856       |
| value_loss         | 3.5394955    |
-------------------------------------
------------

--------------------------------------
| approxkl           | 0.001887114   |
| clipfrac           | 0.010249999   |
| explained_variance | 0.303         |
| fps                | 446           |
| nupdates           | 248           |
| policy_entropy     | 2.7104146     |
| policy_loss        | -0.0025261496 |
| serial_timesteps   | 62000         |
| time_elapsed       | 494           |
| total_timesteps    | 249984        |
| value_loss         | 2.9210386     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0028771851  |
| clipfrac           | 0.026500002   |
| explained_variance | 0.281         |
| fps                | 344           |
| nupdates           | 249           |
| policy_entropy     | 2.7094676     |
| policy_loss        | -0.0010198489 |
| serial_timesteps   | 62250         |
| time_elapsed       | 496           |
| total_timesteps    | 250992        |
| value_loss         | 4.7597494     |
-------------------------

--------------------------------------
| approxkl           | 0.0030044464  |
| clipfrac           | 0.024         |
| explained_variance | 0.467         |
| fps                | 385           |
| nupdates           | 265           |
| policy_entropy     | 2.7008715     |
| policy_loss        | -0.0014556783 |
| serial_timesteps   | 66250         |
| time_elapsed       | 535           |
| total_timesteps    | 267120        |
| value_loss         | 3.7468703     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0003392329  |
| clipfrac           | 0.0           |
| explained_variance | 0.569         |
| fps                | 351           |
| nupdates           | 266           |
| policy_entropy     | 2.6979814     |
| policy_loss        | -0.0005876223 |
| serial_timesteps   | 66500         |
| time_elapsed       | 538           |
| total_timesteps    | 268128        |
| value_loss         | 4.902916      |
-------------------------

-------------------------------------
| approxkl           | 0.0031322546 |
| clipfrac           | 0.029        |
| explained_variance | 0.729        |
| fps                | 462          |
| nupdates           | 282          |
| policy_entropy     | 2.6851575    |
| policy_loss        | -0.002833228 |
| serial_timesteps   | 70500        |
| time_elapsed       | 575          |
| total_timesteps    | 284256       |
| value_loss         | 4.3568583    |
-------------------------------------
--------------------------------------
| approxkl           | 0.0022310242  |
| clipfrac           | 0.01875       |
| explained_variance | 0.687         |
| fps                | 455           |
| nupdates           | 283           |
| policy_entropy     | 2.684146      |
| policy_loss        | -0.0012225661 |
| serial_timesteps   | 70750         |
| time_elapsed       | 577           |
| total_timesteps    | 285264        |
| value_loss         | 2.847412      |
--------------------------------------

--------------------------------------
| approxkl           | 0.0036082023  |
| clipfrac           | 0.027250001   |
| explained_variance | 0.354         |
| fps                | 462           |
| nupdates           | 299           |
| policy_entropy     | 2.6597128     |
| policy_loss        | 0.00015022351 |
| serial_timesteps   | 74750         |
| time_elapsed       | 618           |
| total_timesteps    | 301392        |
| value_loss         | 7.2191586     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0006514053  |
| clipfrac           | 0.00075       |
| explained_variance | 0.539         |
| fps                | 382           |
| nupdates           | 300           |
| policy_entropy     | 2.6572578     |
| policy_loss        | -0.0012372023 |
| serial_timesteps   | 75000         |
| time_elapsed       | 621           |
| total_timesteps    | 302400        |
| value_loss         | 2.9942946     |
-------------------------

-------------------------------------
| approxkl           | 0.0004757539 |
| clipfrac           | 0.0005       |
| explained_variance | 0.805        |
| fps                | 310          |
| nupdates           | 316          |
| policy_entropy     | 2.6592126    |
| policy_loss        | -0.000845217 |
| serial_timesteps   | 79000        |
| time_elapsed       | 661          |
| total_timesteps    | 318528       |
| value_loss         | 2.7098036    |
-------------------------------------
--------------------------------------
| approxkl           | 0.00087932136 |
| clipfrac           | 0.0077500003  |
| explained_variance | 0.778         |
| fps                | 375           |
| nupdates           | 317           |
| policy_entropy     | 2.6597629     |
| policy_loss        | -0.0016591037 |
| serial_timesteps   | 79250         |
| time_elapsed       | 664           |
| total_timesteps    | 319536        |
| value_loss         | 8.390352      |
--------------------------------------

---------------------------------------
| approxkl           | 0.0003057012   |
| clipfrac           | 0.0            |
| explained_variance | 0.839          |
| fps                | 387            |
| nupdates           | 333            |
| policy_entropy     | 2.6603723      |
| policy_loss        | -0.00050858076 |
| serial_timesteps   | 83250          |
| time_elapsed       | 704            |
| total_timesteps    | 335664         |
| value_loss         | 2.9183366      |
---------------------------------------
--------------------------------------
| approxkl           | 0.0075774686  |
| clipfrac           | 0.103250004   |
| explained_variance | 0.846         |
| fps                | 370           |
| nupdates           | 334           |
| policy_entropy     | 2.6626363     |
| policy_loss        | -0.0065630465 |
| serial_timesteps   | 83500         |
| time_elapsed       | 706           |
| total_timesteps    | 336672        |
| value_loss         | 3.108409      |
------------

--------------------------------------
| approxkl           | 0.0002788495  |
| clipfrac           | 0.0           |
| explained_variance | 0.813         |
| fps                | 446           |
| nupdates           | 350           |
| policy_entropy     | 2.6687953     |
| policy_loss        | -0.0010405546 |
| serial_timesteps   | 87500         |
| time_elapsed       | 745           |
| total_timesteps    | 352800        |
| value_loss         | 6.2283635     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00063666655 |
| clipfrac           | 0.00525       |
| explained_variance | 0.81          |
| fps                | 416           |
| nupdates           | 351           |
| policy_entropy     | 2.6671004     |
| policy_loss        | -0.0019999624 |
| serial_timesteps   | 87750         |
| time_elapsed       | 748           |
| total_timesteps    | 353808        |
| value_loss         | 5.786547      |
-------------------------

---------------------------------------
| approxkl           | 0.00065784575  |
| clipfrac           | 0.0037500001   |
| explained_variance | -0.0103        |
| fps                | 491            |
| nupdates           | 367            |
| policy_entropy     | 2.6658287      |
| policy_loss        | -0.00056079665 |
| serial_timesteps   | 91750          |
| time_elapsed       | 786            |
| total_timesteps    | 369936         |
| value_loss         | 126.83799      |
---------------------------------------
--------------------------------------
| approxkl           | 0.0007176482  |
| clipfrac           | 0.0065        |
| explained_variance | 0.517         |
| fps                | 475           |
| nupdates           | 368           |
| policy_entropy     | 2.6647422     |
| policy_loss        | -0.0031997068 |
| serial_timesteps   | 92000         |
| time_elapsed       | 788           |
| total_timesteps    | 370944        |
| value_loss         | 47.953773     |
------------

--------------------------------------
| approxkl           | 0.007846026   |
| clipfrac           | 0.11350001    |
| explained_variance | 0.858         |
| fps                | 418           |
| nupdates           | 384           |
| policy_entropy     | 2.6581457     |
| policy_loss        | -0.0049125776 |
| serial_timesteps   | 96000         |
| time_elapsed       | 824           |
| total_timesteps    | 387072        |
| value_loss         | 5.399824      |
--------------------------------------
--------------------------------------
| approxkl           | 0.002830909   |
| clipfrac           | 0.026         |
| explained_variance | 0.226         |
| fps                | 445           |
| nupdates           | 385           |
| policy_entropy     | 2.6572206     |
| policy_loss        | -0.0011779706 |
| serial_timesteps   | 96250         |
| time_elapsed       | 826           |
| total_timesteps    | 388080        |
| value_loss         | 70.85906      |
-------------------------

--------------------------------------
| approxkl           | 0.001943962   |
| clipfrac           | 0.0185        |
| explained_variance | 0.89          |
| fps                | 460           |
| nupdates           | 401           |
| policy_entropy     | 2.6576128     |
| policy_loss        | -0.0036820057 |
| serial_timesteps   | 100250        |
| time_elapsed       | 861           |
| total_timesteps    | 404208        |
| value_loss         | 3.3455617     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0036259443  |
| clipfrac           | 0.037         |
| explained_variance | 0.862         |
| fps                | 509           |
| nupdates           | 402           |
| policy_entropy     | 2.6569328     |
| policy_loss        | -0.0036989613 |
| serial_timesteps   | 100500        |
| time_elapsed       | 863           |
| total_timesteps    | 405216        |
| value_loss         | 5.950382      |
-------------------------

--------------------------------------
| approxkl           | 0.0010368826  |
| clipfrac           | 0.00275       |
| explained_variance | 0.278         |
| fps                | 507           |
| nupdates           | 418           |
| policy_entropy     | 2.6708057     |
| policy_loss        | -0.0046389033 |
| serial_timesteps   | 104500        |
| time_elapsed       | 895           |
| total_timesteps    | 421344        |
| value_loss         | 73.30038      |
--------------------------------------
-------------------------------------
| approxkl           | 0.0022336363 |
| clipfrac           | 0.017250001  |
| explained_variance | 0.718        |
| fps                | 478          |
| nupdates           | 419          |
| policy_entropy     | 2.6708565    |
| policy_loss        | -0.00359827  |
| serial_timesteps   | 104750       |
| time_elapsed       | 897          |
| total_timesteps    | 422352       |
| value_loss         | 9.014769     |
-------------------------------------

-------------------------------------
| approxkl           | 0.002419394  |
| clipfrac           | 0.02025      |
| explained_variance | 0.571        |
| fps                | 491          |
| nupdates           | 435          |
| policy_entropy     | 2.667796     |
| policy_loss        | -0.004180924 |
| serial_timesteps   | 108750       |
| time_elapsed       | 930          |
| total_timesteps    | 438480       |
| value_loss         | 44.204082    |
-------------------------------------
--------------------------------------
| approxkl           | 0.0017404212  |
| clipfrac           | 0.013249999   |
| explained_variance | 0.471         |
| fps                | 524           |
| nupdates           | 436           |
| policy_entropy     | 2.6686718     |
| policy_loss        | 0.00018870369 |
| serial_timesteps   | 109000        |
| time_elapsed       | 932           |
| total_timesteps    | 439488        |
| value_loss         | 50.954823     |
--------------------------------------

--------------------------------------
| approxkl           | 0.0006259704  |
| clipfrac           | 0.0039999997  |
| explained_variance | 0.253         |
| fps                | 563           |
| nupdates           | 452           |
| policy_entropy     | 2.6843944     |
| policy_loss        | -0.0027222112 |
| serial_timesteps   | 113000        |
| time_elapsed       | 963           |
| total_timesteps    | 455616        |
| value_loss         | 119.891       |
--------------------------------------
--------------------------------------
| approxkl           | 0.00075298874 |
| clipfrac           | 0.00275       |
| explained_variance | 0.23          |
| fps                | 433           |
| nupdates           | 453           |
| policy_entropy     | 2.685202      |
| policy_loss        | 0.00048857724 |
| serial_timesteps   | 113250        |
| time_elapsed       | 965           |
| total_timesteps    | 456624        |
| value_loss         | 153.77455     |
-------------------------

--------------------------------------
| approxkl           | 0.00018421367 |
| clipfrac           | 0.0           |
| explained_variance | 0.454         |
| fps                | 600           |
| nupdates           | 469           |
| policy_entropy     | 2.7076488     |
| policy_loss        | -0.0006333374 |
| serial_timesteps   | 117250        |
| time_elapsed       | 998           |
| total_timesteps    | 472752        |
| value_loss         | 133.27423     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00026140068 |
| clipfrac           | 0.00025       |
| explained_variance | 0.396         |
| fps                | 576           |
| nupdates           | 470           |
| policy_entropy     | 2.7075713     |
| policy_loss        | 9.882606e-05  |
| serial_timesteps   | 117500        |
| time_elapsed       | 999           |
| total_timesteps    | 473760        |
| value_loss         | 104.61235     |
-------------------------

--------------------------------------
| approxkl           | 0.005239592   |
| clipfrac           | 0.06525       |
| explained_variance | -0.0667       |
| fps                | 434           |
| nupdates           | 486           |
| policy_entropy     | 2.6995804     |
| policy_loss        | -0.0005880135 |
| serial_timesteps   | 121500        |
| time_elapsed       | 1.03e+03      |
| total_timesteps    | 489888        |
| value_loss         | 75.13905      |
--------------------------------------
---------------------------------------
| approxkl           | 0.00456692     |
| clipfrac           | 0.0565         |
| explained_variance | 0.182          |
| fps                | 543            |
| nupdates           | 487            |
| policy_entropy     | 2.6991663      |
| policy_loss        | -0.00019159308 |
| serial_timesteps   | 121750         |
| time_elapsed       | 1.03e+03       |
| total_timesteps    | 490896         |
| value_loss         | 53.690952      |
-------------

--------------------------------------
| approxkl           | 0.00049181184 |
| clipfrac           | 0.0015        |
| explained_variance | 0.354         |
| fps                | 530           |
| nupdates           | 503           |
| policy_entropy     | 2.6950088     |
| policy_loss        | -0.0019521948 |
| serial_timesteps   | 125750        |
| time_elapsed       | 1.07e+03      |
| total_timesteps    | 507024        |
| value_loss         | 143.03973     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0018140188  |
| clipfrac           | 0.01675       |
| explained_variance | 0.554         |
| fps                | 521           |
| nupdates           | 504           |
| policy_entropy     | 2.6963701     |
| policy_loss        | -0.0034533935 |
| serial_timesteps   | 126000        |
| time_elapsed       | 1.07e+03      |
| total_timesteps    | 508032        |
| value_loss         | 17.374958     |
-------------------------

---------------------------------------
| approxkl           | 0.0014243989   |
| clipfrac           | 0.0065         |
| explained_variance | 0.822          |
| fps                | 607            |
| nupdates           | 520            |
| policy_entropy     | 2.7050133      |
| policy_loss        | -0.00020428511 |
| serial_timesteps   | 130000         |
| time_elapsed       | 1.1e+03        |
| total_timesteps    | 524160         |
| value_loss         | 7.890533       |
---------------------------------------
--------------------------------------
| approxkl           | 0.002017554   |
| clipfrac           | 0.02025       |
| explained_variance | 0.0746        |
| fps                | 508           |
| nupdates           | 521           |
| policy_entropy     | 2.7066453     |
| policy_loss        | -0.0014164373 |
| serial_timesteps   | 130250        |
| time_elapsed       | 1.1e+03       |
| total_timesteps    | 525168        |
| value_loss         | 70.62716      |
------------

-------------------------------------
| approxkl           | 0.0019760132 |
| clipfrac           | 0.020750001  |
| explained_variance | 0.519        |
| fps                | 482          |
| nupdates           | 537          |
| policy_entropy     | 2.709366     |
| policy_loss        | -0.003119623 |
| serial_timesteps   | 134250       |
| time_elapsed       | 1.14e+03     |
| total_timesteps    | 541296       |
| value_loss         | 56.381615    |
-------------------------------------
--------------------------------------
| approxkl           | 0.0019006717  |
| clipfrac           | 0.010749999   |
| explained_variance | 0.939         |
| fps                | 474           |
| nupdates           | 538           |
| policy_entropy     | 2.708235      |
| policy_loss        | -0.0026052697 |
| serial_timesteps   | 134500        |
| time_elapsed       | 1.14e+03      |
| total_timesteps    | 542304        |
| value_loss         | 2.1779728     |
--------------------------------------

--------------------------------------
| approxkl           | 0.00089544465 |
| clipfrac           | 0.003         |
| explained_variance | 0.506         |
| fps                | 401           |
| nupdates           | 554           |
| policy_entropy     | 2.698782      |
| policy_loss        | -0.0011780473 |
| serial_timesteps   | 138500        |
| time_elapsed       | 1.17e+03      |
| total_timesteps    | 558432        |
| value_loss         | 48.787598     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00037041053 |
| clipfrac           | 0.00125       |
| explained_variance | 0.345         |
| fps                | 526           |
| nupdates           | 555           |
| policy_entropy     | 2.6983955     |
| policy_loss        | 1.4648169e-05 |
| serial_timesteps   | 138750        |
| time_elapsed       | 1.17e+03      |
| total_timesteps    | 559440        |
| value_loss         | 49.528393     |
-------------------------

--------------------------------------
| approxkl           | 0.002252806   |
| clipfrac           | 0.019         |
| explained_variance | 0.291         |
| fps                | 466           |
| nupdates           | 571           |
| policy_entropy     | 2.6789966     |
| policy_loss        | 0.00010229589 |
| serial_timesteps   | 142750        |
| time_elapsed       | 1.21e+03      |
| total_timesteps    | 575568        |
| value_loss         | 48.87523      |
--------------------------------------
--------------------------------------
| approxkl           | 0.0004996932  |
| clipfrac           | 0.00075       |
| explained_variance | 0.522         |
| fps                | 532           |
| nupdates           | 572           |
| policy_entropy     | 2.6789083     |
| policy_loss        | -0.0021957194 |
| serial_timesteps   | 143000        |
| time_elapsed       | 1.21e+03      |
| total_timesteps    | 576576        |
| value_loss         | 82.4991       |
-------------------------

--------------------------------------
| approxkl           | 0.00062622235 |
| clipfrac           | 0.00175       |
| explained_variance | 0.276         |
| fps                | 488           |
| nupdates           | 588           |
| policy_entropy     | 2.6698256     |
| policy_loss        | -0.0018941946 |
| serial_timesteps   | 147000        |
| time_elapsed       | 1.24e+03      |
| total_timesteps    | 592704        |
| value_loss         | 99.98329      |
--------------------------------------
-------------------------------------
| approxkl           | 0.001357809  |
| clipfrac           | 0.013249999  |
| explained_variance | 0.662        |
| fps                | 579          |
| nupdates           | 589          |
| policy_entropy     | 2.6691897    |
| policy_loss        | -0.002250676 |
| serial_timesteps   | 147250       |
| time_elapsed       | 1.25e+03     |
| total_timesteps    | 593712       |
| value_loss         | 25.834625    |
-------------------------------------

--------------------------------------
| approxkl           | 0.0019828714  |
| clipfrac           | 0.012         |
| explained_variance | 0.376         |
| fps                | 522           |
| nupdates           | 605           |
| policy_entropy     | 2.6867242     |
| policy_loss        | -0.0018659135 |
| serial_timesteps   | 151250        |
| time_elapsed       | 1.28e+03      |
| total_timesteps    | 609840        |
| value_loss         | 97.336174     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0008427009  |
| clipfrac           | 0.00975       |
| explained_variance | 0.381         |
| fps                | 499           |
| nupdates           | 606           |
| policy_entropy     | 2.6880322     |
| policy_loss        | -0.0010975181 |
| serial_timesteps   | 151500        |
| time_elapsed       | 1.29e+03      |
| total_timesteps    | 610848        |
| value_loss         | 49.170807     |
-------------------------

-------------------------------------
| approxkl           | 0.0010693134 |
| clipfrac           | 0.0037500001 |
| explained_variance | 0.502        |
| fps                | 490          |
| nupdates           | 622          |
| policy_entropy     | 2.6934388    |
| policy_loss        | 6.825586e-05 |
| serial_timesteps   | 155500       |
| time_elapsed       | 1.32e+03     |
| total_timesteps    | 626976       |
| value_loss         | 78.524025    |
-------------------------------------
-------------------------------------
| approxkl           | 0.0018607262 |
| clipfrac           | 0.0205       |
| explained_variance | 0.351        |
| fps                | 424          |
| nupdates           | 623          |
| policy_entropy     | 2.6930952    |
| policy_loss        | -0.004810151 |
| serial_timesteps   | 155750       |
| time_elapsed       | 1.32e+03     |
| total_timesteps    | 627984       |
| value_loss         | 46.328064    |
-------------------------------------
------------

--------------------------------------
| approxkl           | 0.00078289653 |
| clipfrac           | 0.0075000003  |
| explained_variance | 0.408         |
| fps                | 456           |
| nupdates           | 639           |
| policy_entropy     | 2.6945896     |
| policy_loss        | -0.002124384  |
| serial_timesteps   | 159750        |
| time_elapsed       | 1.36e+03      |
| total_timesteps    | 644112        |
| value_loss         | 53.217274     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0008438055  |
| clipfrac           | 0.0035        |
| explained_variance | 0.876         |
| fps                | 430           |
| nupdates           | 640           |
| policy_entropy     | 2.6955802     |
| policy_loss        | -0.0008753395 |
| serial_timesteps   | 160000        |
| time_elapsed       | 1.36e+03      |
| total_timesteps    | 645120        |
| value_loss         | 7.6580005     |
-------------------------

-------------------------------------
| approxkl           | 0.0023793818 |
| clipfrac           | 0.02175      |
| explained_variance | 0.277        |
| fps                | 300          |
| nupdates           | 656          |
| policy_entropy     | 2.7102094    |
| policy_loss        | -0.004031792 |
| serial_timesteps   | 164000       |
| time_elapsed       | 1.39e+03     |
| total_timesteps    | 661248       |
| value_loss         | 51.638866    |
-------------------------------------
-------------------------------------
| approxkl           | 0.0023353512 |
| clipfrac           | 0.0225       |
| explained_variance | 0.39         |
| fps                | 348          |
| nupdates           | 657          |
| policy_entropy     | 2.712578     |
| policy_loss        | -0.004287602 |
| serial_timesteps   | 164250       |
| time_elapsed       | 1.4e+03      |
| total_timesteps    | 662256       |
| value_loss         | 52.642826    |
-------------------------------------
------------

--------------------------------------
| approxkl           | 0.0050174193  |
| clipfrac           | 0.062499996   |
| explained_variance | 0.473         |
| fps                | 408           |
| nupdates           | 673           |
| policy_entropy     | 2.7151704     |
| policy_loss        | -0.0030583024 |
| serial_timesteps   | 168250        |
| time_elapsed       | 1.43e+03      |
| total_timesteps    | 678384        |
| value_loss         | 52.13675      |
--------------------------------------
--------------------------------------
| approxkl           | 0.001423146   |
| clipfrac           | 0.0115        |
| explained_variance | 0.861         |
| fps                | 352           |
| nupdates           | 674           |
| policy_entropy     | 2.7142904     |
| policy_loss        | -0.0018021996 |
| serial_timesteps   | 168500        |
| time_elapsed       | 1.43e+03      |
| total_timesteps    | 679392        |
| value_loss         | 8.852719      |
-------------------------

--------------------------------------
| approxkl           | 0.002735175   |
| clipfrac           | 0.036         |
| explained_variance | 0.626         |
| fps                | 431           |
| nupdates           | 690           |
| policy_entropy     | 2.7121496     |
| policy_loss        | -0.0025240732 |
| serial_timesteps   | 172500        |
| time_elapsed       | 1.47e+03      |
| total_timesteps    | 695520        |
| value_loss         | 43.79209      |
--------------------------------------
--------------------------------------
| approxkl           | 0.0035846457  |
| clipfrac           | 0.0435        |
| explained_variance | 0.849         |
| fps                | 568           |
| nupdates           | 691           |
| policy_entropy     | 2.7102103     |
| policy_loss        | -0.0032528113 |
| serial_timesteps   | 172750        |
| time_elapsed       | 1.47e+03      |
| total_timesteps    | 696528        |
| value_loss         | 7.670358      |
-------------------------

--------------------------------------
| approxkl           | 0.0017042094  |
| clipfrac           | 0.014249999   |
| explained_variance | 0.801         |
| fps                | 473           |
| nupdates           | 707           |
| policy_entropy     | 2.7326028     |
| policy_loss        | -0.0032104696 |
| serial_timesteps   | 176750        |
| time_elapsed       | 1.51e+03      |
| total_timesteps    | 712656        |
| value_loss         | 10.485652     |
--------------------------------------
--------------------------------------
| approxkl           | 0.003621627   |
| clipfrac           | 0.031999998   |
| explained_variance | 0.379         |
| fps                | 468           |
| nupdates           | 708           |
| policy_entropy     | 2.7308192     |
| policy_loss        | -0.0010523971 |
| serial_timesteps   | 177000        |
| time_elapsed       | 1.51e+03      |
| total_timesteps    | 713664        |
| value_loss         | 46.6626       |
-------------------------

--------------------------------------
| approxkl           | 0.0013517165  |
| clipfrac           | 0.0085        |
| explained_variance | 0.226         |
| fps                | 468           |
| nupdates           | 724           |
| policy_entropy     | 2.7363374     |
| policy_loss        | 0.00018724723 |
| serial_timesteps   | 181000        |
| time_elapsed       | 1.54e+03      |
| total_timesteps    | 729792        |
| value_loss         | 126.96146     |
--------------------------------------
--------------------------------------
| approxkl           | 0.00081954274 |
| clipfrac           | 0.0069999998  |
| explained_variance | 0.581         |
| fps                | 483           |
| nupdates           | 725           |
| policy_entropy     | 2.7355242     |
| policy_loss        | -0.0018604917 |
| serial_timesteps   | 181250        |
| time_elapsed       | 1.54e+03      |
| total_timesteps    | 730800        |
| value_loss         | 38.69745      |
-------------------------

--------------------------------------
| approxkl           | 0.0021496634  |
| clipfrac           | 0.01475       |
| explained_variance | 0.509         |
| fps                | 530           |
| nupdates           | 741           |
| policy_entropy     | 2.7332728     |
| policy_loss        | -0.0023642452 |
| serial_timesteps   | 185250        |
| time_elapsed       | 1.58e+03      |
| total_timesteps    | 746928        |
| value_loss         | 90.40693      |
--------------------------------------
--------------------------------------
| approxkl           | 0.0014534468  |
| clipfrac           | 0.014250001   |
| explained_variance | 0.461         |
| fps                | 690           |
| nupdates           | 742           |
| policy_entropy     | 2.733051      |
| policy_loss        | -0.0029460865 |
| serial_timesteps   | 185500        |
| time_elapsed       | 1.58e+03      |
| total_timesteps    | 747936        |
| value_loss         | 117.65758     |
-------------------------

--------------------------------------
| approxkl           | 0.0003913673  |
| clipfrac           | 0.0005        |
| explained_variance | 0.508         |
| fps                | 437           |
| nupdates           | 758           |
| policy_entropy     | 2.7469077     |
| policy_loss        | -6.251759e-05 |
| serial_timesteps   | 189500        |
| time_elapsed       | 1.61e+03      |
| total_timesteps    | 764064        |
| value_loss         | 83.165695     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0016203484  |
| clipfrac           | 0.015000001   |
| explained_variance | 0.545         |
| fps                | 429           |
| nupdates           | 759           |
| policy_entropy     | 2.7466614     |
| policy_loss        | -0.0021436897 |
| serial_timesteps   | 189750        |
| time_elapsed       | 1.61e+03      |
| total_timesteps    | 765072        |
| value_loss         | 45.166607     |
-------------------------

---------------------------------------
| approxkl           | 0.0007987698   |
| clipfrac           | 0.00125        |
| explained_variance | 0.54           |
| fps                | 393            |
| nupdates           | 775            |
| policy_entropy     | 2.7550824      |
| policy_loss        | -0.00033333251 |
| serial_timesteps   | 193750         |
| time_elapsed       | 1.64e+03       |
| total_timesteps    | 781200         |
| value_loss         | 51.801544      |
---------------------------------------
-------------------------------------
| approxkl           | 0.0027024702 |
| clipfrac           | 0.035        |
| explained_variance | 0.555        |
| fps                | 414          |
| nupdates           | 776          |
| policy_entropy     | 2.7573907    |
| policy_loss        | -0.003921244 |
| serial_timesteps   | 194000       |
| time_elapsed       | 1.65e+03     |
| total_timesteps    | 782208       |
| value_loss         | 48.82925     |
------------------------

---------------------------------------
| approxkl           | 0.0022034345   |
| clipfrac           | 0.023500001    |
| explained_variance | 0.669          |
| fps                | 392            |
| nupdates           | 792            |
| policy_entropy     | 2.7568452      |
| policy_loss        | -0.00053086615 |
| serial_timesteps   | 198000         |
| time_elapsed       | 1.68e+03       |
| total_timesteps    | 798336         |
| value_loss         | 45.662556      |
---------------------------------------
--------------------------------------
| approxkl           | 0.0021774303  |
| clipfrac           | 0.017250001   |
| explained_variance | 0.553         |
| fps                | 584           |
| nupdates           | 793           |
| policy_entropy     | 2.760314      |
| policy_loss        | -0.0019288087 |
| serial_timesteps   | 198250        |
| time_elapsed       | 1.68e+03      |
| total_timesteps    | 799344        |
| value_loss         | 45.86812      |
------------

---------------------------------------
| approxkl           | 0.0015617341   |
| clipfrac           | 0.01125        |
| explained_variance | 0.481          |
| fps                | 320            |
| nupdates           | 809            |
| policy_entropy     | 2.758541       |
| policy_loss        | -0.00087018625 |
| serial_timesteps   | 202250         |
| time_elapsed       | 1.72e+03       |
| total_timesteps    | 815472         |
| value_loss         | 69.270775      |
---------------------------------------
-------------------------------------
| approxkl           | 0.003609588  |
| clipfrac           | 0.041249998  |
| explained_variance | 0.591        |
| fps                | 606          |
| nupdates           | 810          |
| policy_entropy     | 2.75762      |
| policy_loss        | -0.002969307 |
| serial_timesteps   | 202500       |
| time_elapsed       | 1.72e+03     |
| total_timesteps    | 816480       |
| value_loss         | 53.57906     |
------------------------

---------------------------------------
| approxkl           | 0.00052239955  |
| clipfrac           | 0.0005         |
| explained_variance | 0.507          |
| fps                | 479            |
| nupdates           | 826            |
| policy_entropy     | 2.7632985      |
| policy_loss        | -0.00041571452 |
| serial_timesteps   | 206500         |
| time_elapsed       | 1.76e+03       |
| total_timesteps    | 832608         |
| value_loss         | 37.315655      |
---------------------------------------
---------------------------------------
| approxkl           | 0.00046870732  |
| clipfrac           | 0.0            |
| explained_variance | 0.648          |
| fps                | 357            |
| nupdates           | 827            |
| policy_entropy     | 2.7640564      |
| policy_loss        | -0.00023781262 |
| serial_timesteps   | 206750         |
| time_elapsed       | 1.76e+03       |
| total_timesteps    | 833616         |
| value_loss         | 37.23585       |


--------------------------------------
| approxkl           | 0.00043828887 |
| clipfrac           | 0.0015        |
| explained_variance | 0.486         |
| fps                | 455           |
| nupdates           | 843           |
| policy_entropy     | 2.7576504     |
| policy_loss        | -0.0006292611 |
| serial_timesteps   | 210750        |
| time_elapsed       | 1.8e+03       |
| total_timesteps    | 849744        |
| value_loss         | 37.05457      |
--------------------------------------
--------------------------------------
| approxkl           | 0.00022370732 |
| clipfrac           | 0.0005        |
| explained_variance | 0.385         |
| fps                | 363           |
| nupdates           | 844           |
| policy_entropy     | 2.758264      |
| policy_loss        | -0.0012706226 |
| serial_timesteps   | 211000        |
| time_elapsed       | 1.8e+03       |
| total_timesteps    | 850752        |
| value_loss         | 26.645092     |
-------------------------

--------------------------------------
| approxkl           | 0.00032594052 |
| clipfrac           | 0.00025       |
| explained_variance | 0.591         |
| fps                | 418           |
| nupdates           | 860           |
| policy_entropy     | 2.7540898     |
| policy_loss        | 0.00015438118 |
| serial_timesteps   | 215000        |
| time_elapsed       | 1.84e+03      |
| total_timesteps    | 866880        |
| value_loss         | 33.88799      |
--------------------------------------
--------------------------------------
| approxkl           | 0.00045361702 |
| clipfrac           | 0.00125       |
| explained_variance | 0.5           |
| fps                | 330           |
| nupdates           | 861           |
| policy_entropy     | 2.7559168     |
| policy_loss        | 0.0003917779  |
| serial_timesteps   | 215250        |
| time_elapsed       | 1.84e+03      |
| total_timesteps    | 867888        |
| value_loss         | 33.877693     |
-------------------------

--------------------------------------
| approxkl           | 0.0039496063  |
| clipfrac           | 0.0495        |
| explained_variance | 0.505         |
| fps                | 556           |
| nupdates           | 877           |
| policy_entropy     | 2.758274      |
| policy_loss        | -0.0041024685 |
| serial_timesteps   | 219250        |
| time_elapsed       | 1.88e+03      |
| total_timesteps    | 884016        |
| value_loss         | 36.12472      |
--------------------------------------
-------------------------------------
| approxkl           | 0.0044943905 |
| clipfrac           | 0.0585       |
| explained_variance | 0.765        |
| fps                | 488          |
| nupdates           | 878          |
| policy_entropy     | 2.7591224    |
| policy_loss        | -0.006106658 |
| serial_timesteps   | 219500       |
| time_elapsed       | 1.88e+03     |
| total_timesteps    | 885024       |
| value_loss         | 5.925179     |
-------------------------------------

--------------------------------------
| approxkl           | 0.0007737745  |
| clipfrac           | 0.0019999999  |
| explained_variance | 0.455         |
| fps                | 515           |
| nupdates           | 894           |
| policy_entropy     | 2.7444866     |
| policy_loss        | -0.0016606759 |
| serial_timesteps   | 223500        |
| time_elapsed       | 1.92e+03      |
| total_timesteps    | 901152        |
| value_loss         | 66.753784     |
--------------------------------------
-------------------------------------
| approxkl           | 0.0043687616 |
| clipfrac           | 0.058250003  |
| explained_variance | 0.852        |
| fps                | 551          |
| nupdates           | 895          |
| policy_entropy     | 2.7435927    |
| policy_loss        | -0.002946193 |
| serial_timesteps   | 223750       |
| time_elapsed       | 1.92e+03     |
| total_timesteps    | 902160       |
| value_loss         | 2.607597     |
-------------------------------------

--------------------------------------
| approxkl           | 0.0046149213  |
| clipfrac           | 0.059499998   |
| explained_variance | 0.744         |
| fps                | 441           |
| nupdates           | 911           |
| policy_entropy     | 2.7427542     |
| policy_loss        | -0.0021318805 |
| serial_timesteps   | 227750        |
| time_elapsed       | 1.96e+03      |
| total_timesteps    | 918288        |
| value_loss         | 3.1827533     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0023350725  |
| clipfrac           | 0.018250002   |
| explained_variance | 0.819         |
| fps                | 350           |
| nupdates           | 912           |
| policy_entropy     | 2.737879      |
| policy_loss        | 2.7898219e-05 |
| serial_timesteps   | 228000        |
| time_elapsed       | 1.96e+03      |
| total_timesteps    | 919296        |
| value_loss         | 3.5364087     |
-------------------------

---------------------------------------
| approxkl           | 0.0010168899   |
| clipfrac           | 0.0055000004   |
| explained_variance | 0.707          |
| fps                | 437            |
| nupdates           | 928            |
| policy_entropy     | 2.743082       |
| policy_loss        | -0.00026882396 |
| serial_timesteps   | 232000         |
| time_elapsed       | 2e+03          |
| total_timesteps    | 935424         |
| value_loss         | 4.9384065      |
---------------------------------------
--------------------------------------
| approxkl           | 0.0022510076  |
| clipfrac           | 0.023999998   |
| explained_variance | 0.608         |
| fps                | 381           |
| nupdates           | 929           |
| policy_entropy     | 2.7420998     |
| policy_loss        | -0.0035526892 |
| serial_timesteps   | 232250        |
| time_elapsed       | 2e+03         |
| total_timesteps    | 936432        |
| value_loss         | 17.17155      |
------------

--------------------------------------
| approxkl           | 0.0011259965  |
| clipfrac           | 0.007         |
| explained_variance | 0.701         |
| fps                | 496           |
| nupdates           | 945           |
| policy_entropy     | 2.7395806     |
| policy_loss        | -0.0016912417 |
| serial_timesteps   | 236250        |
| time_elapsed       | 2.04e+03      |
| total_timesteps    | 952560        |
| value_loss         | 8.759916      |
--------------------------------------
-------------------------------------
| approxkl           | 0.0015212712 |
| clipfrac           | 0.01025      |
| explained_variance | 0.786        |
| fps                | 419          |
| nupdates           | 946          |
| policy_entropy     | 2.7398682    |
| policy_loss        | -0.002531251 |
| serial_timesteps   | 236500       |
| time_elapsed       | 2.04e+03     |
| total_timesteps    | 953568       |
| value_loss         | 7.988607     |
-------------------------------------

-------------------------------------
| approxkl           | 0.0008766143 |
| clipfrac           | 0.003        |
| explained_variance | 0.549        |
| fps                | 404          |
| nupdates           | 962          |
| policy_entropy     | 2.7428017    |
| policy_loss        | 0.0015100229 |
| serial_timesteps   | 240500       |
| time_elapsed       | 2.07e+03     |
| total_timesteps    | 969696       |
| value_loss         | 34.97984     |
-------------------------------------
---------------------------------------
| approxkl           | 0.00039445193  |
| clipfrac           | 0.00075        |
| explained_variance | 0.547          |
| fps                | 506            |
| nupdates           | 963            |
| policy_entropy     | 2.742189       |
| policy_loss        | -0.00041970465 |
| serial_timesteps   | 240750         |
| time_elapsed       | 2.07e+03       |
| total_timesteps    | 970704         |
| value_loss         | 45.66147       |
--------------------------

--------------------------------------
| approxkl           | 0.0016139077  |
| clipfrac           | 0.00825       |
| explained_variance | 0.936         |
| fps                | 524           |
| nupdates           | 979           |
| policy_entropy     | 2.7378976     |
| policy_loss        | -0.0007980128 |
| serial_timesteps   | 244750        |
| time_elapsed       | 2.11e+03      |
| total_timesteps    | 986832        |
| value_loss         | 2.483998      |
--------------------------------------
---------------------------------------
| approxkl           | 0.001863884    |
| clipfrac           | 0.01175        |
| explained_variance | 0.846          |
| fps                | 489            |
| nupdates           | 980            |
| policy_entropy     | 2.7358072      |
| policy_loss        | -1.9370185e-05 |
| serial_timesteps   | 245000         |
| time_elapsed       | 2.11e+03       |
| total_timesteps    | 987840         |
| value_loss         | 4.440522       |
-------------

--------------------------------------
| approxkl           | 0.0010673238  |
| clipfrac           | 0.00175       |
| explained_variance | 0.63          |
| fps                | 326           |
| nupdates           | 996           |
| policy_entropy     | 2.7221897     |
| policy_loss        | -0.0015000311 |
| serial_timesteps   | 249000        |
| time_elapsed       | 2.15e+03      |
| total_timesteps    | 1003968       |
| value_loss         | 4.0014925     |
--------------------------------------
--------------------------------------
| approxkl           | 0.0038976986  |
| clipfrac           | 0.047         |
| explained_variance | 0.818         |
| fps                | 404           |
| nupdates           | 997           |
| policy_entropy     | 2.7179835     |
| policy_loss        | -0.0034862643 |
| serial_timesteps   | 249250        |
| time_elapsed       | 2.15e+03      |
| total_timesteps    | 1004976       |
| value_loss         | 7.3807187     |
-------------------------

In [None]:
from stable_baselines.common.policies import FeedForwardPolicy

In [None]:
model = PPO2.load(final_model_dir)

# Enjoy trained agent
env = gym.make(env_name)
obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()