In [1]:
#!/usr/bin/env python3

# Train single CPU PPO1 on slimevolley.
# Should solve it (beat existing AI on average over 1000 trials) in 3 hours on single CPU, within 3M steps.

import os
import gym
import slimevolleygym
from slimevolleygym import SurvivalRewardEnv

from stable_baselines.ppo1 import PPO1
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import logger
from stable_baselines.common.callbacks import EvalCallback

NUM_TIMESTEPS = int(5e6)
SEED = 721
EVAL_FREQ = 250000
EVAL_EPISODES = 10  # was 1000
LOGDIR = "ppo1" # moved to zoo afterwards.

logger.configure(folder=LOGDIR)

env = gym.make("SlimeVolley-v0")
env.seed(SEED)

# take mujoco hyperparams (but doubled timesteps_per_actorbatch to cover more steps.)
model = PPO1(MlpPolicy, env, timesteps_per_actorbatch=4096, clip_param=0.2, entcoeff=0.0, optim_epochs=10,
                 optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', verbose=2)

eval_callback = EvalCallback(env, best_model_save_path=LOGDIR, log_path=LOGDIR, eval_freq=EVAL_FREQ, n_eval_episodes=EVAL_EPISODES)

model.learn(total_timesteps=NUM_TIMESTEPS, callback=eval_callback)

model.save(os.path.join(LOGDIR, "final_model")) # probably never get to this point.

env.close()

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Logging to ppo1




Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where







********** Iteration 0 ************


  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00034 |       0.00000 |       0.06495 |       0.00014 |       2.07931
     -0.00115 |       0.00000 |       0.03890 |       0.00073 |       2.07875
     -0.00244 |       0.00000 |       0.03392 |       0.00185 |       2.07764
     -0.00442 |       0.00000 |       0.03138 |       0.00381 |       2.07571
     -0.00567 |       0.00000 |       0.02938 |       0.00628 |       2.07329
     -0.00661 |       0.00000 |       0.02765 |       0.00690 |       2.07268
     -0.00701 |       0.00000 |       0.02643 |       0.00740 |       2.07219
     -0.00723 |       0.00000 |       0.02581 |       0.00784 |       2.07178
     -0.00759 |       0.00000 |       0.02509 |       0.00711 |       2.07248
     -0.00777 |       0.00000 |       0.02434 |       0.00763 |       2.07197
Evaluating losses...
     -0.00887 |       0.00000 |       0.02400 |       0.00756 |       2.07204
-----------------------------

     -0.00627 |       0.00000 |       0.01360 |       0.00709 |       2.01527
     -0.00645 |       0.00000 |       0.01374 |       0.00676 |       2.01187
Evaluating losses...
     -0.00793 |       0.00000 |       0.01329 |       0.00686 |       2.01351
----------------------------------
| EpLenMean       | 576          |
| EpRewMean       | -4.93        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 42           |
| TimeElapsed     | 21           |
| TimestepsSoFar  | 24576        |
| ev_tdlam_before | 0.849        |
| loss_ent        | 2.0135064    |
| loss_kl         | 0.0068599577 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.007927746 |
| loss_vf_loss    | 0.013289055  |
----------------------------------
********** Iteration 6 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00066 |       0.00000 |       0.01996 |       0.00063 |       2.01830
     -0.00143 |       0.00000 |       0.01928 |  

********** Iteration 11 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00091 |       0.00000 |       0.02920 |       0.00076 |       1.97491
     -0.00186 |       0.00000 |       0.02615 |       0.00167 |       1.97544
     -0.00325 |       0.00000 |       0.02476 |       0.00346 |       1.97717
     -0.00435 |       0.00000 |       0.02356 |       0.00401 |       1.97709
     -0.00555 |       0.00000 |       0.02299 |       0.00404 |       1.97315
     -0.00610 |       0.00000 |       0.02260 |       0.00471 |       1.97436
     -0.00640 |       0.00000 |       0.02212 |       0.00510 |       1.97618
     -0.00646 |       0.00000 |       0.02172 |       0.00500 |       1.97438
     -0.00733 |       0.00000 |       0.02171 |       0.00445 |       1.97330
     -0.00734 |       0.00000 |       0.02122 |       0.00520 |       1.97239
Evaluating losses...
     -0.00923 |       0.00000 |       0.02067 |       0.00494 |       

     -0.01036 |       0.00000 |       0.01988 |       0.00737 |       1.94273
     -0.01084 |       0.00000 |       0.01993 |       0.00762 |       1.94457
Evaluating losses...
     -0.01252 |       0.00000 |       0.01959 |       0.00772 |       1.94193
----------------------------------
| EpLenMean       | 639          |
| EpRewMean       | -4.77        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 109          |
| TimeElapsed     | 63.9         |
| TimestepsSoFar  | 69632        |
| ev_tdlam_before | 0.779        |
| loss_ent        | 1.9419316    |
| loss_kl         | 0.007717775  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012524026 |
| loss_vf_loss    | 0.019585606  |
----------------------------------
********** Iteration 17 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00045 |       0.00000 |       0.01462 |       0.00097 |       1.93863
     -0.00494 |       0.00000 |       0.01343 | 

********** Iteration 22 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00029 |       0.00000 |       0.01752 |       0.00079 |       1.87393
     -0.00524 |       0.00000 |       0.01629 |       0.00353 |       1.85999
     -0.00702 |       0.00000 |       0.01580 |       0.00534 |       1.85875
     -0.00773 |       0.00000 |       0.01550 |       0.00556 |       1.85857
     -0.00875 |       0.00000 |       0.01514 |       0.00636 |       1.85027
     -0.00881 |       0.00000 |       0.01481 |       0.00643 |       1.85525
     -0.00893 |       0.00000 |       0.01483 |       0.00686 |       1.84914
     -0.01050 |       0.00000 |       0.01450 |       0.00656 |       1.85288
     -0.01058 |       0.00000 |       0.01426 |       0.00677 |       1.85124
     -0.01085 |       0.00000 |       0.01437 |       0.00702 |       1.84692
Evaluating losses...
     -0.01276 |       0.00000 |       0.01402 |       0.00743 |       

     -0.01143 |       0.00000 |       0.01279 |       0.00774 |       1.70423
     -0.01210 |       0.00000 |       0.01278 |       0.00735 |       1.70145
Evaluating losses...
     -0.01377 |       0.00000 |       0.01255 |       0.00824 |       1.70700
---------------------------------
| EpLenMean       | 634         |
| EpRewMean       | -4.81       |
| EpThisIter      | 6           |
| EpisodesSoFar   | 182         |
| TimeElapsed     | 105         |
| TimestepsSoFar  | 114688      |
| ev_tdlam_before | 0.836       |
| loss_ent        | 1.707002    |
| loss_kl         | 0.008239976 |
| loss_pol_entpen | 0.0         |
| loss_pol_surr   | -0.01376792 |
| loss_vf_loss    | 0.012551834 |
---------------------------------
********** Iteration 28 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00036 |       0.00000 |       0.02019 |       0.00100 |       1.71112
     -0.00337 |       0.00000 |       0.01870 |       0.00500 

********** Iteration 33 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00052 |       0.00000 |       0.01575 |       0.00180 |       1.63190
     -0.00506 |       0.00000 |       0.01455 |       0.00406 |       1.63779
     -0.00711 |       0.00000 |       0.01406 |       0.00524 |       1.64487
     -0.00888 |       0.00000 |       0.01381 |       0.00589 |       1.64137
     -0.00996 |       0.00000 |       0.01344 |       0.00549 |       1.64227
     -0.01051 |       0.00000 |       0.01306 |       0.00567 |       1.63601
     -0.01157 |       0.00000 |       0.01310 |       0.00571 |       1.64561
     -0.01243 |       0.00000 |       0.01274 |       0.00557 |       1.64121
     -0.01294 |       0.00000 |       0.01259 |       0.00581 |       1.63979
     -0.01368 |       0.00000 |       0.01254 |       0.00632 |       1.64075
Evaluating losses...
     -0.01570 |       0.00000 |       0.01220 |       0.00485 |       

     -0.01095 |       0.00000 |       0.01279 |       0.00855 |       1.50589
     -0.01137 |       0.00000 |       0.01264 |       0.00827 |       1.50619
Evaluating losses...
     -0.01365 |       0.00000 |       0.01218 |       0.00840 |       1.50443
----------------------------------
| EpLenMean       | 650          |
| EpRewMean       | -4.86        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 250          |
| TimeElapsed     | 151          |
| TimestepsSoFar  | 159744       |
| ev_tdlam_before | 0.824        |
| loss_ent        | 1.5044277    |
| loss_kl         | 0.008397078  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013646921 |
| loss_vf_loss    | 0.012179069  |
----------------------------------
********** Iteration 39 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00040 |       0.00000 |       0.01417 |       0.00135 |       1.48939
     -0.00281 |       0.00000 |       0.01319 | 

********** Iteration 44 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00027 |       0.00000 |       0.01601 |       0.00150 |       1.43682
     -0.00490 |       0.00000 |       0.01422 |       0.00297 |       1.43237
     -0.00608 |       0.00000 |       0.01342 |       0.00391 |       1.43195
     -0.00675 |       0.00000 |       0.01307 |       0.00431 |       1.42402
     -0.00758 |       0.00000 |       0.01265 |       0.00453 |       1.41891
     -0.00867 |       0.00000 |       0.01241 |       0.00524 |       1.42504
     -0.00945 |       0.00000 |       0.01214 |       0.00434 |       1.42827
     -0.00951 |       0.00000 |       0.01202 |       0.00565 |       1.42105
     -0.00984 |       0.00000 |       0.01212 |       0.00505 |       1.42311
     -0.01117 |       0.00000 |       0.01188 |       0.00511 |       1.42580
Evaluating losses...
     -0.01256 |       0.00000 |       0.01184 |       0.00521 |       

     -0.01167 |       0.00000 |       0.01101 |       0.00824 |       1.37493
     -0.01191 |       0.00000 |       0.01079 |       0.00682 |       1.37996
Evaluating losses...
     -0.01422 |       0.00000 |       0.01052 |       0.00707 |       1.37929
----------------------------------
| EpLenMean       | 651          |
| EpRewMean       | -4.91        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 320          |
| TimeElapsed     | 191          |
| TimestepsSoFar  | 204800       |
| ev_tdlam_before | 0.838        |
| loss_ent        | 1.3792915    |
| loss_kl         | 0.0070720934 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014220924 |
| loss_vf_loss    | 0.010522015  |
----------------------------------
********** Iteration 50 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00060 |       0.00000 |       0.02042 |       0.00169 |       1.38706
     -0.00359 |       0.00000 |       0.01855 | 

********** Iteration 55 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00195 |       0.00000 |       0.01615 |       0.00096 |       1.31642
     -0.00320 |       0.00000 |       0.01496 |       0.00281 |       1.29552
     -0.00517 |       0.00000 |       0.01439 |       0.00477 |       1.30409
     -0.00643 |       0.00000 |       0.01413 |       0.00502 |       1.30564
     -0.00752 |       0.00000 |       0.01382 |       0.00456 |       1.30066
     -0.00813 |       0.00000 |       0.01360 |       0.00484 |       1.30936
     -0.00965 |       0.00000 |       0.01351 |       0.00522 |       1.29612
     -0.00982 |       0.00000 |       0.01336 |       0.00606 |       1.30444
     -0.01055 |       0.00000 |       0.01313 |       0.00589 |       1.30945
     -0.01152 |       0.00000 |       0.01298 |       0.00573 |       1.30169
Evaluating losses...
     -0.01379 |       0.00000 |       0.01309 |       0.00541 |       

     -0.01074 |       0.00000 |       0.01635 |       0.00458 |       1.30444
     -0.01108 |       0.00000 |       0.01592 |       0.00494 |       1.30386
Evaluating losses...
     -0.01381 |       0.00000 |       0.01551 |       0.00585 |       1.30945
----------------------------------
| EpLenMean       | 665          |
| EpRewMean       | -4.87        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 387          |
| TimeElapsed     | 235          |
| TimestepsSoFar  | 249856       |
| ev_tdlam_before | 0.788        |
| loss_ent        | 1.3094532    |
| loss_kl         | 0.005851471  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013808857 |
| loss_vf_loss    | 0.015509328  |
----------------------------------
********** Iteration 61 ************
Eval num_timesteps=249856, episode_reward=-4.85 +/- 0.38
Episode length: 642.00 +/- 147.82
New best mean reward!
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00082

********** Iteration 66 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00012 |       0.00000 |       0.01730 |       0.00175 |       1.22110
     -0.00260 |       0.00000 |       0.01591 |       0.00358 |       1.21223
     -0.00449 |       0.00000 |       0.01531 |       0.00346 |       1.21139
     -0.00510 |       0.00000 |       0.01482 |       0.00458 |       1.21139
     -0.00650 |       0.00000 |       0.01444 |       0.00482 |       1.20836
     -0.00704 |       0.00000 |       0.01433 |       0.00432 |       1.20485
     -0.00800 |       0.00000 |       0.01409 |       0.00498 |       1.20329
     -0.00885 |       0.00000 |       0.01395 |       0.00512 |       1.20280
     -0.00933 |       0.00000 |       0.01384 |       0.00557 |       1.20422
     -0.00976 |       0.00000 |       0.01360 |       0.00611 |       1.19538
Evaluating losses...
     -0.01212 |       0.00000 |       0.01326 |       0.00586 |       

     -0.01081 |       0.00000 |       0.01384 |       0.00489 |       1.15811
     -0.01135 |       0.00000 |       0.01367 |       0.00483 |       1.16034
Evaluating losses...
     -0.01341 |       0.00000 |       0.01409 |       0.00413 |       1.16493
----------------------------------
| EpLenMean       | 715          |
| EpRewMean       | -4.81        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 448          |
| TimeElapsed     | 315          |
| TimestepsSoFar  | 294912       |
| ev_tdlam_before | 0.82         |
| loss_ent        | 1.1649255    |
| loss_kl         | 0.004125205  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013411194 |
| loss_vf_loss    | 0.014091918  |
----------------------------------
********** Iteration 72 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00034 |       0.00000 |       0.02086 |       0.00098 |       1.17288
     -0.00440 |       0.00000 |       0.01891 | 

********** Iteration 77 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00034 |       0.00000 |       0.01857 |       0.00094 |       1.14004
     -0.00440 |       0.00000 |       0.01669 |       0.00322 |       1.12802
     -0.00645 |       0.00000 |       0.01565 |       0.00327 |       1.12555
     -0.00833 |       0.00000 |       0.01512 |       0.00344 |       1.12379
     -0.00998 |       0.00000 |       0.01463 |       0.00427 |       1.11877
     -0.01188 |       0.00000 |       0.01425 |       0.00438 |       1.12352
     -0.01266 |       0.00000 |       0.01395 |       0.00463 |       1.12684
     -0.01379 |       0.00000 |       0.01367 |       0.00493 |       1.12425
     -0.01451 |       0.00000 |       0.01347 |       0.00544 |       1.12105
     -0.01538 |       0.00000 |       0.01324 |       0.00553 |       1.12840
Evaluating losses...
     -0.01744 |       0.00000 |       0.01278 |       0.00586 |       

     -0.01245 |       0.00000 |       0.00933 |       0.00579 |       1.10719
     -0.01327 |       0.00000 |       0.00922 |       0.00567 |       1.10298
Evaluating losses...
     -0.01468 |       0.00000 |       0.00900 |       0.00581 |       1.10309
---------------------------------
| EpLenMean       | 731         |
| EpRewMean       | -4.8        |
| EpThisIter      | 5           |
| EpisodesSoFar   | 510         |
| TimeElapsed     | 356         |
| TimestepsSoFar  | 339968      |
| ev_tdlam_before | 0.842       |
| loss_ent        | 1.1030927   |
| loss_kl         | 0.005809648 |
| loss_pol_entpen | 0.0         |
| loss_pol_surr   | -0.01468109 |
| loss_vf_loss    | 0.008996162 |
---------------------------------
********** Iteration 83 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00070 |       0.00000 |       0.01771 |       0.00126 |       1.12137
     -0.00474 |       0.00000 |       0.01620 |       0.00330 

********** Iteration 88 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00019 |       0.00000 |       0.01586 |       0.00140 |       1.03101
     -0.00387 |       0.00000 |       0.01422 |       0.00307 |       1.02894
     -0.00611 |       0.00000 |       0.01361 |       0.00408 |       1.03382
     -0.00778 |       0.00000 |       0.01307 |       0.00401 |       1.02391
     -0.00806 |       0.00000 |       0.01270 |       0.00430 |       1.02890
     -0.00959 |       0.00000 |       0.01236 |       0.00427 |       1.02379
     -0.01095 |       0.00000 |       0.01228 |       0.00451 |       1.02568
     -0.01107 |       0.00000 |       0.01199 |       0.00485 |       1.02318
     -0.01210 |       0.00000 |       0.01180 |       0.00499 |       1.02656
     -0.01278 |       0.00000 |       0.01158 |       0.00468 |       1.02244
Evaluating losses...
     -0.01467 |       0.00000 |       0.01118 |       0.00509 |       

     -0.01096 |       0.00000 |       0.01131 |       0.00418 |       0.99165
     -0.01196 |       0.00000 |       0.01102 |       0.00453 |       0.99198
Evaluating losses...
     -0.01388 |       0.00000 |       0.01074 |       0.00460 |       0.99643
----------------------------------
| EpLenMean       | 719          |
| EpRewMean       | -4.86        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 573          |
| TimeElapsed     | 399          |
| TimestepsSoFar  | 385024       |
| ev_tdlam_before | 0.816        |
| loss_ent        | 0.9964347    |
| loss_kl         | 0.004600752  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013875335 |
| loss_vf_loss    | 0.010735738  |
----------------------------------
********** Iteration 94 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00052 |       0.00000 |       0.01581 |       0.00158 |       0.98379
     -0.00448 |       0.00000 |       0.01397 | 

********** Iteration 99 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00021 |       0.00000 |       0.02207 |       0.00157 |       0.93390
     -0.00474 |       0.00000 |       0.01968 |       0.00272 |       0.92522
     -0.00703 |       0.00000 |       0.01849 |       0.00378 |       0.91718
     -0.00857 |       0.00000 |       0.01754 |       0.00395 |       0.90879
     -0.00958 |       0.00000 |       0.01688 |       0.00453 |       0.91470
     -0.01071 |       0.00000 |       0.01635 |       0.00458 |       0.91293
     -0.01180 |       0.00000 |       0.01588 |       0.00538 |       0.90659
     -0.01251 |       0.00000 |       0.01565 |       0.00543 |       0.90989
     -0.01323 |       0.00000 |       0.01535 |       0.00579 |       0.90161
     -0.01448 |       0.00000 |       0.01509 |       0.00587 |       0.90879
Evaluating losses...
     -0.01639 |       0.00000 |       0.01462 |       0.00599 |       

     -0.01141 |       0.00000 |       0.01271 |       0.00488 |       0.98865
     -0.01240 |       0.00000 |       0.01260 |       0.00529 |       0.99108
Evaluating losses...
     -0.01445 |       0.00000 |       0.01253 |       0.00483 |       0.98670
-----------------------------------
| EpLenMean       | 773           |
| EpRewMean       | -4.82         |
| EpThisIter      | 3             |
| EpisodesSoFar   | 626           |
| TimeElapsed     | 442           |
| TimestepsSoFar  | 430080        |
| ev_tdlam_before | 0.734         |
| loss_ent        | 0.9867045     |
| loss_kl         | 0.004833236   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0144545045 |
| loss_vf_loss    | 0.01253274    |
-----------------------------------
********** Iteration 105 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00076 |       0.00000 |       0.01114 |       0.00138 |       0.99411
     -0.00463 |       0.00000 |  

********** Iteration 110 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00065 |       0.00000 |       0.01668 |       0.00106 |       0.93584
     -0.00349 |       0.00000 |       0.01486 |       0.00318 |       0.93748
     -0.00533 |       0.00000 |       0.01393 |       0.00372 |       0.94101
     -0.00718 |       0.00000 |       0.01323 |       0.00456 |       0.93856
     -0.00854 |       0.00000 |       0.01283 |       0.00405 |       0.93419
     -0.00896 |       0.00000 |       0.01268 |       0.00479 |       0.93633
     -0.01032 |       0.00000 |       0.01252 |       0.00486 |       0.93781
     -0.01164 |       0.00000 |       0.01210 |       0.00494 |       0.93771
     -0.01198 |       0.00000 |       0.01189 |       0.00518 |       0.93648
     -0.01281 |       0.00000 |       0.01156 |       0.00496 |       0.93617
Evaluating losses...
     -0.01440 |       0.00000 |       0.01123 |       0.00529 |      

     -0.01038 |       0.00000 |       0.01477 |       0.00301 |       0.90522
     -0.01129 |       0.00000 |       0.01476 |       0.00344 |       0.90295
     -0.01164 |       0.00000 |       0.01446 |       0.00339 |       0.90047
Evaluating losses...
     -0.01405 |       0.00000 |       0.01381 |       0.00321 |       0.90222
----------------------------------
| EpLenMean       | 858          |
| EpRewMean       | -4.83        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 678          |
| TimeElapsed     | 487          |
| TimestepsSoFar  | 475136       |
| ev_tdlam_before | 0.671        |
| loss_ent        | 0.9022175    |
| loss_kl         | 0.0032108729 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014048165 |
| loss_vf_loss    | 0.013812091  |
----------------------------------
********** Iteration 116 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00126 |       0.00000 |       0.01791 |

********** Iteration 121 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00032 |       0.00000 |       0.02056 |       0.00096 |       0.87478
     -0.00496 |       0.00000 |       0.01740 |       0.00234 |       0.87336
     -0.00774 |       0.00000 |       0.01634 |       0.00235 |       0.87215
     -0.00966 |       0.00000 |       0.01577 |       0.00327 |       0.87082
     -0.01113 |       0.00000 |       0.01528 |       0.00299 |       0.86622
     -0.01193 |       0.00000 |       0.01495 |       0.00329 |       0.86205
     -0.01289 |       0.00000 |       0.01469 |       0.00365 |       0.86204
     -0.01354 |       0.00000 |       0.01438 |       0.00388 |       0.86218
     -0.01512 |       0.00000 |       0.01419 |       0.00400 |       0.85903
     -0.01579 |       0.00000 |       0.01394 |       0.00438 |       0.85850
Evaluating losses...
     -0.01763 |       0.00000 |       0.01353 |       0.00439 |      

     -0.01278 |       0.00000 |       0.01040 |       0.00457 |       0.84123
     -0.01409 |       0.00000 |       0.01006 |       0.00518 |       0.84006
     -0.01468 |       0.00000 |       0.00989 |       0.00469 |       0.84115
     -0.01529 |       0.00000 |       0.00968 |       0.00517 |       0.83911
Evaluating losses...
     -0.01742 |       0.00000 |       0.00935 |       0.00571 |       0.84020
----------------------------------
| EpLenMean       | 966          |
| EpRewMean       | -4.83        |
| EpThisIter      | 4            |
| EpisodesSoFar   | 721          |
| TimeElapsed     | 581          |
| TimestepsSoFar  | 520192       |
| ev_tdlam_before | 0.65         |
| loss_ent        | 0.8402006    |
| loss_kl         | 0.005711353  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.017419403 |
| loss_vf_loss    | 0.009352465  |
----------------------------------
********** Iteration 127 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |

********** Iteration 132 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00052 |       0.00000 |       0.01760 |       0.00139 |       0.82767
     -0.00422 |       0.00000 |       0.01593 |       0.00251 |       0.82719
     -0.00587 |       0.00000 |       0.01532 |       0.00239 |       0.83324
     -0.00727 |       0.00000 |       0.01459 |       0.00262 |       0.82842
     -0.00851 |       0.00000 |       0.01430 |       0.00382 |       0.83257
     -0.00960 |       0.00000 |       0.01399 |       0.00378 |       0.84102
     -0.01119 |       0.00000 |       0.01366 |       0.00396 |       0.83743
     -0.01207 |       0.00000 |       0.01342 |       0.00410 |       0.83435
     -0.01323 |       0.00000 |       0.01344 |       0.00463 |       0.83652
     -0.01314 |       0.00000 |       0.01302 |       0.00490 |       0.83796
Evaluating losses...
     -0.01549 |       0.00000 |       0.01265 |       0.00479 |      

     -0.01169 |       0.00000 |       0.00975 |       0.00499 |       0.84127
     -0.01201 |       0.00000 |       0.00957 |       0.00534 |       0.83738
Evaluating losses...
     -0.01386 |       0.00000 |       0.00927 |       0.00496 |       0.83925
---------------------------------
| EpLenMean       | 1.08e+03    |
| EpRewMean       | -4.82       |
| EpThisIter      | 2           |
| EpisodesSoFar   | 756         |
| TimeElapsed     | 624         |
| TimestepsSoFar  | 565248      |
| ev_tdlam_before | 0.674       |
| loss_ent        | 0.83924925  |
| loss_kl         | 0.004962662 |
| loss_pol_entpen | 0.0         |
| loss_pol_surr   | -0.01386252 |
| loss_vf_loss    | 0.009268626 |
---------------------------------
********** Iteration 138 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00085 |       0.00000 |       0.01465 |       0.00144 |       0.82141
     -0.00406 |       0.00000 |       0.01218 |       0.00298

********** Iteration 143 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00196 |       0.00000 |       0.01236 |       0.00110 |       0.79623
     -0.00258 |       0.00000 |       0.01077 |       0.00153 |       0.79000
     -0.00497 |       0.00000 |       0.01023 |       0.00224 |       0.78927
     -0.00617 |       0.00000 |       0.00954 |       0.00302 |       0.78536
     -0.00780 |       0.00000 |       0.00925 |       0.00305 |       0.78767
     -0.00775 |       0.00000 |       0.00920 |       0.00388 |       0.78816
     -0.00946 |       0.00000 |       0.00889 |       0.00351 |       0.78757
     -0.01013 |       0.00000 |       0.00880 |       0.00412 |       0.78743
     -0.01139 |       0.00000 |       0.00850 |       0.00383 |       0.78579
     -0.01113 |       0.00000 |       0.00838 |       0.00384 |       0.78576
Evaluating losses...
     -0.01343 |       0.00000 |       0.00812 |       0.00369 |      

     -0.01289 |       0.00000 |       0.00897 |       0.00460 |       0.79867
     -0.01355 |       0.00000 |       0.00882 |       0.00487 |       0.79974
Evaluating losses...
     -0.01579 |       0.00000 |       0.00860 |       0.00506 |       0.80068
----------------------------------
| EpLenMean       | 1.34e+03     |
| EpRewMean       | -4.77        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 779          |
| TimeElapsed     | 672          |
| TimestepsSoFar  | 610304       |
| ev_tdlam_before | 0.43         |
| loss_ent        | 0.80067736   |
| loss_kl         | 0.005062955  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015789501 |
| loss_vf_loss    | 0.008599863  |
----------------------------------
********** Iteration 149 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00184 |       0.00000 |       0.01437 |       0.00304 |       0.77428
     -0.00436 |       0.00000 |       0.01245 |

********** Iteration 154 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00075 |       0.00000 |       0.01040 |       0.00169 |       0.72404
     -0.00449 |       0.00000 |       0.00915 |       0.00351 |       0.71055
     -0.00723 |       0.00000 |       0.00864 |       0.00261 |       0.71669
     -0.00771 |       0.00000 |       0.00822 |       0.00376 |       0.71353
     -0.00904 |       0.00000 |       0.00807 |       0.00379 |       0.71135
     -0.01003 |       0.00000 |       0.00792 |       0.00395 |       0.71653
     -0.01167 |       0.00000 |       0.00759 |       0.00435 |       0.71264
     -0.01060 |       0.00000 |       0.00745 |       0.00481 |       0.71573
     -0.01236 |       0.00000 |       0.00750 |       0.00426 |       0.71385
     -0.01223 |       0.00000 |       0.00720 |       0.00445 |       0.71553
Evaluating losses...
     -0.01487 |       0.00000 |       0.00699 |       0.00418 |      

     -0.01350 |       0.00000 |       0.00653 |       0.00431 |       0.77100
     -0.01419 |       0.00000 |       0.00636 |       0.00570 |       0.77267
Evaluating losses...
     -0.01628 |       0.00000 |       0.00613 |       0.00582 |       0.77183
----------------------------------
| EpLenMean       | 1.58e+03     |
| EpRewMean       | -4.64        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 798          |
| TimeElapsed     | 708          |
| TimestepsSoFar  | 655360       |
| ev_tdlam_before | 0.56         |
| loss_ent        | 0.7718322    |
| loss_kl         | 0.005821909  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016284628 |
| loss_vf_loss    | 0.0061315238 |
----------------------------------
********** Iteration 160 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00070 |       0.00000 |       0.00973 |       0.00218 |       0.77391
     -0.00249 |       0.00000 |       0.00852 |

********** Iteration 165 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00113 |       0.00000 |       0.00883 |       0.00179 |       0.74131
     -0.00354 |       0.00000 |       0.00770 |       0.00310 |       0.74116
     -0.00468 |       0.00000 |       0.00712 |       0.00394 |       0.73658
     -0.00656 |       0.00000 |       0.00678 |       0.00429 |       0.73939
     -0.00775 |       0.00000 |       0.00648 |       0.00396 |       0.74022
     -0.00866 |       0.00000 |       0.00613 |       0.00390 |       0.73606
     -0.00992 |       0.00000 |       0.00600 |       0.00378 |       0.74251
     -0.01102 |       0.00000 |       0.00591 |       0.00491 |       0.73844
     -0.01154 |       0.00000 |       0.00570 |       0.00535 |       0.73898
     -0.01299 |       0.00000 |       0.00553 |       0.00503 |       0.73790
Evaluating losses...
     -0.01466 |       0.00000 |       0.00533 |       0.00576 |      

     -0.01068 |       0.00000 |       0.00382 |       0.00484 |       0.75185
     -0.01088 |       0.00000 |       0.00379 |       0.00593 |       0.74136
Evaluating losses...
     -0.01321 |       0.00000 |       0.00361 |       0.00485 |       0.74306
----------------------------------
| EpLenMean       | 1.86e+03     |
| EpRewMean       | -4.38        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 814          |
| TimeElapsed     | 750          |
| TimestepsSoFar  | 700416       |
| ev_tdlam_before | 0.583        |
| loss_ent        | 0.7430644    |
| loss_kl         | 0.004851033  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013211417 |
| loss_vf_loss    | 0.0036121416 |
----------------------------------
********** Iteration 171 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00104 |       0.00000 |       0.00803 |       0.00259 |       0.74140
     -0.00206 |       0.00000 |       0.00658 |

********** Iteration 176 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00050 |       0.00000 |       0.01051 |       0.00208 |       0.77956
     -0.00262 |       0.00000 |       0.00879 |       0.00362 |       0.77879
     -0.00593 |       0.00000 |       0.00804 |       0.00316 |       0.77091
     -0.00632 |       0.00000 |       0.00761 |       0.00351 |       0.77669
     -0.00747 |       0.00000 |       0.00736 |       0.00484 |       0.77606
     -0.00886 |       0.00000 |       0.00700 |       0.00463 |       0.77141
     -0.00953 |       0.00000 |       0.00679 |       0.00466 |       0.77696
     -0.01033 |       0.00000 |       0.00662 |       0.00410 |       0.77899
     -0.01123 |       0.00000 |       0.00641 |       0.00503 |       0.77248
     -0.01160 |       0.00000 |       0.00633 |       0.00472 |       0.77647
Evaluating losses...
     -0.01349 |       0.00000 |       0.00605 |       0.00408 |      

     -0.01289 |       0.00000 |       0.00061 |       0.00679 |       0.81361
     -0.01332 |       0.00000 |       0.00060 |       0.00722 |       0.81262
Evaluating losses...
     -0.01589 |       0.00000 |       0.00058 |       0.00818 |       0.81811
-----------------------------------
| EpLenMean       | 2.13e+03      |
| EpRewMean       | -4.08         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 829           |
| TimeElapsed     | 788           |
| TimestepsSoFar  | 745472        |
| ev_tdlam_before | -0.042        |
| loss_ent        | 0.81810504    |
| loss_kl         | 0.008177106   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.01588972   |
| loss_vf_loss    | 0.00057508354 |
-----------------------------------
********** Iteration 182 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00062 |       0.00000 |       0.00370 |       0.00358 |       0.81975
     -0.00488 |       0.00000 |  

********** Iteration 187 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00041 |       0.00000 |       0.00762 |       0.00329 |       0.82365
     -0.00260 |       0.00000 |       0.00671 |       0.00318 |       0.83255
     -0.00433 |       0.00000 |       0.00629 |       0.00455 |       0.83593
     -0.00682 |       0.00000 |       0.00603 |       0.00411 |       0.83757
     -0.00750 |       0.00000 |       0.00587 |       0.00476 |       0.83896
     -0.00975 |       0.00000 |       0.00572 |       0.00476 |       0.84099
     -0.01088 |       0.00000 |       0.00545 |       0.00534 |       0.84684
     -0.01195 |       0.00000 |       0.00548 |       0.00526 |       0.84608
     -0.01284 |       0.00000 |       0.00534 |       0.00522 |       0.84585
     -0.01315 |       0.00000 |       0.00520 |       0.00590 |       0.84806
Evaluating losses...
     -0.01248 |       0.00000 |       0.00519 |       0.00969 |      

     -0.01226 |       0.00000 |       0.00493 |       0.00441 |       0.82237
     -0.01142 |       0.00000 |       0.00476 |       0.00510 |       0.82715
Evaluating losses...
     -0.01356 |       0.00000 |       0.00454 |       0.00460 |       0.82195
----------------------------------
| EpLenMean       | 2.4e+03      |
| EpRewMean       | -3.54        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 844          |
| TimeElapsed     | 915          |
| TimestepsSoFar  | 790528       |
| ev_tdlam_before | 0.559        |
| loss_ent        | 0.8219477    |
| loss_kl         | 0.0045962883 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013563675 |
| loss_vf_loss    | 0.0045351973 |
----------------------------------
********** Iteration 193 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00023 |       0.00000 |       0.00297 |       0.00164 |       0.83666
     -0.00482 |       0.00000 |       0.00189 |

********** Iteration 198 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00137 |       0.00000 |       0.00857 |       0.00360 |       0.83912
     -0.00744 |       0.00000 |       0.00746 |       0.00419 |       0.83851
     -0.00991 |       0.00000 |       0.00672 |       0.00548 |       0.83559
     -0.01081 |       0.00000 |       0.00643 |       0.00672 |       0.84017
     -0.01212 |       0.00000 |       0.00621 |       0.00668 |       0.83734
     -0.01359 |       0.00000 |       0.00602 |       0.00785 |       0.84008
     -0.01448 |       0.00000 |       0.00596 |       0.00726 |       0.84076
     -0.01421 |       0.00000 |       0.00572 |       0.00790 |       0.84144
     -0.01574 |       0.00000 |       0.00564 |       0.00683 |       0.84344
     -0.01576 |       0.00000 |       0.00553 |       0.00761 |       0.84059
Evaluating losses...
     -0.01822 |       0.00000 |       0.00535 |       0.00811 |      

     -0.01076 |       0.00000 |       0.00271 |       0.00541 |       0.88947
     -0.01172 |       0.00000 |       0.00266 |       0.00495 |       0.88707
Evaluating losses...
     -0.01332 |       0.00000 |       0.00252 |       0.00478 |       0.88652
----------------------------------
| EpLenMean       | 2.65e+03     |
| EpRewMean       | -3.11        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 859          |
| TimeElapsed     | 959          |
| TimestepsSoFar  | 835584       |
| ev_tdlam_before | 0.469        |
| loss_ent        | 0.88652116   |
| loss_kl         | 0.0047809137 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013320658 |
| loss_vf_loss    | 0.0025166473 |
----------------------------------
********** Iteration 204 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00037 |       0.00000 |       0.00811 |       0.00244 |       0.83715
     -0.00288 |       0.00000 |       0.00650 |

********** Iteration 209 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00111 |       0.00000 |       0.00117 |       0.00198 |       0.84822
     -0.00428 |       0.00000 |       0.00085 |       0.00315 |       0.85178
     -0.00583 |       0.00000 |       0.00071 |       0.00343 |       0.85768
     -0.00748 |       0.00000 |       0.00062 |       0.00386 |       0.85903
     -0.00943 |       0.00000 |       0.00056 |       0.00458 |       0.85529
     -0.01069 |       0.00000 |       0.00052 |       0.00499 |       0.85469
     -0.01128 |       0.00000 |       0.00050 |       0.00491 |       0.85872
     -0.01238 |       0.00000 |       0.00047 |       0.00578 |       0.85640
     -0.01269 |       0.00000 |       0.00045 |       0.00602 |       0.86118
     -0.01412 |       0.00000 |       0.00042 |       0.00588 |       0.86106
Evaluating losses...
     -0.01547 |       0.00000 |       0.00040 |       0.00615 |      

     -0.01211 |       0.00000 |       0.00293 |       0.00602 |       0.81152
     -0.01219 |       0.00000 |       0.00285 |       0.00606 |       0.80884
Evaluating losses...
     -0.01409 |       0.00000 |       0.00276 |       0.00690 |       0.81025
----------------------------------
| EpLenMean       | 2.82e+03     |
| EpRewMean       | -2.63        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 874          |
| TimeElapsed     | 1e+03        |
| TimestepsSoFar  | 880640       |
| ev_tdlam_before | 0.403        |
| loss_ent        | 0.81024927   |
| loss_kl         | 0.0068973717 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014093197 |
| loss_vf_loss    | 0.002761383  |
----------------------------------
********** Iteration 215 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00275 |       0.00000 |       0.00662 |       0.00513 |       0.80825
     -0.00141 |       0.00000 |       0.00536 |

********** Iteration 220 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00114 |       0.00000 |       0.00853 |       0.00219 |       0.79068
     -0.00320 |       0.00000 |       0.00707 |       0.00369 |       0.79570
     -0.00531 |       0.00000 |       0.00639 |       0.00337 |       0.79728
     -0.00589 |       0.00000 |       0.00613 |       0.00378 |       0.79941
     -0.00658 |       0.00000 |       0.00568 |       0.00607 |       0.80471
     -0.00824 |       0.00000 |       0.00553 |       0.00520 |       0.80288
     -0.00792 |       0.00000 |       0.00541 |       0.00517 |       0.80355
     -0.00920 |       0.00000 |       0.00542 |       0.00535 |       0.80476
     -0.00918 |       0.00000 |       0.00511 |       0.00516 |       0.80479
     -0.01020 |       0.00000 |       0.00517 |       0.00547 |       0.80824
Evaluating losses...
     -0.01180 |       0.00000 |       0.00482 |       0.00511 |      

     -0.01547 |       0.00000 |       0.00351 |       0.00592 |       0.90697
     -0.01665 |       0.00000 |       0.00341 |       0.00568 |       0.90598
Evaluating losses...
     -0.01801 |       0.00000 |       0.00328 |       0.00613 |       0.90336
----------------------------------
| EpLenMean       | 2.9e+03      |
| EpRewMean       | -2.31        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 889          |
| TimeElapsed     | 1.04e+03     |
| TimestepsSoFar  | 925696       |
| ev_tdlam_before | 0.517        |
| loss_ent        | 0.9033645    |
| loss_kl         | 0.006132139  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018006047 |
| loss_vf_loss    | 0.0032797942 |
----------------------------------
********** Iteration 226 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00051 |       0.00000 |       0.00371 |       0.00261 |       0.90152
     -0.00292 |       0.00000 |       0.00328 |

********** Iteration 231 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00051 |       0.00000 |       0.00660 |       0.00275 |       0.92494
     -0.00554 |       0.00000 |       0.00498 |       0.00318 |       0.93163
     -0.00789 |       0.00000 |       0.00461 |       0.00422 |       0.93670
     -0.00929 |       0.00000 |       0.00419 |       0.00383 |       0.93326
     -0.01050 |       0.00000 |       0.00399 |       0.00444 |       0.93510
     -0.01113 |       0.00000 |       0.00372 |       0.00450 |       0.93586
     -0.01161 |       0.00000 |       0.00364 |       0.00592 |       0.93477
     -0.01274 |       0.00000 |       0.00355 |       0.00560 |       0.93510
     -0.01231 |       0.00000 |       0.00343 |       0.00705 |       0.92983
     -0.01382 |       0.00000 |       0.00342 |       0.00554 |       0.93100
Evaluating losses...
     -0.01470 |       0.00000 |       0.00317 |       0.00668 |      

     -0.00951 |       0.00000 |       0.00398 |       0.00574 |       0.89230
     -0.01179 |       0.00000 |       0.00391 |       0.00478 |       0.89929
Evaluating losses...
     -0.01206 |       0.00000 |       0.00371 |       0.00548 |       0.89412
-----------------------------------
| EpLenMean       | 3e+03         |
| EpRewMean       | -1.88         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 904           |
| TimeElapsed     | 1.08e+03      |
| TimestepsSoFar  | 970752        |
| ev_tdlam_before | 0.542         |
| loss_ent        | 0.8941221     |
| loss_kl         | 0.0054790787  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0120572345 |
| loss_vf_loss    | 0.0037146723  |
-----------------------------------
********** Iteration 237 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00053 |       0.00000 |       0.00588 |       0.00295 |       0.91891
     -0.00552 |       0.00000 |  

********** Iteration 242 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00011 |       0.00000 |       0.00288 |       0.00287 |       0.91917
     -0.00387 |       0.00000 |       0.00208 |       0.00337 |       0.91557
     -0.00625 |       0.00000 |       0.00183 |       0.00361 |       0.91570
     -0.00743 |       0.00000 |       0.00162 |       0.00411 |       0.91044
     -0.00851 |       0.00000 |       0.00150 |       0.00444 |       0.90931
     -0.00936 |       0.00000 |       0.00141 |       0.00486 |       0.90996
     -0.01020 |       0.00000 |       0.00135 |       0.00473 |       0.91060
     -0.01099 |       0.00000 |       0.00129 |       0.00504 |       0.91089
     -0.01098 |       0.00000 |       0.00125 |       0.00509 |       0.90943
     -0.01154 |       0.00000 |       0.00122 |       0.00528 |       0.90962
Evaluating losses...
     -0.01128 |       0.00000 |       0.00113 |       0.00825 |      

     -0.01436 |       0.00000 |       0.00383 |       0.00584 |       0.91858
     -0.01533 |       0.00000 |       0.00376 |       0.00572 |       0.91564
     -0.01579 |       0.00000 |       0.00368 |       0.00596 |       0.91531
     -0.01596 |       0.00000 |       0.00372 |       0.00666 |       0.91532
Evaluating losses...
     -0.01697 |       0.00000 |       0.00359 |       0.00656 |       0.91162
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -1.61        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 919          |
| TimeElapsed     | 1.27e+03     |
| TimestepsSoFar  | 1015808      |
| ev_tdlam_before | 0.461        |
| loss_ent        | 0.9116224    |
| loss_kl         | 0.0065586735 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016969869 |
| loss_vf_loss    | 0.0035881535 |
----------------------------------
********** Iteration 248 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |

********** Iteration 253 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00076 |       0.00000 |       0.00591 |       0.00276 |       0.91171
     -0.00491 |       0.00000 |       0.00485 |       0.00492 |       0.91032
     -0.00720 |       0.00000 |       0.00441 |       0.00498 |       0.90504
     -0.00729 |       0.00000 |       0.00418 |       0.00604 |       0.90640
     -0.00836 |       0.00000 |       0.00401 |       0.00547 |       0.90770
     -0.00936 |       0.00000 |       0.00384 |       0.00572 |       0.90819
     -0.00988 |       0.00000 |       0.00371 |       0.00617 |       0.91057
     -0.01023 |       0.00000 |       0.00361 |       0.00631 |       0.90758
     -0.01066 |       0.00000 |       0.00354 |       0.00647 |       0.90870
     -0.01057 |       0.00000 |       0.00347 |       0.00685 |       0.91402
Evaluating losses...
     -0.01192 |       0.00000 |       0.00331 |       0.00800 |      

     -0.00629 |       0.00000 |       0.00226 |       0.00397 |       0.88299
     -0.00626 |       0.00000 |       0.00215 |       0.00448 |       0.88532
Evaluating losses...
     -0.00731 |       0.00000 |       0.00205 |       0.00370 |       0.88225
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -1.45        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 934          |
| TimeElapsed     | 1.31e+03     |
| TimestepsSoFar  | 1060864      |
| ev_tdlam_before | 0.303        |
| loss_ent        | 0.8822528    |
| loss_kl         | 0.0036988102 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.007313541 |
| loss_vf_loss    | 0.0020526138 |
----------------------------------
********** Iteration 259 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00040 |       0.00000 |       0.00269 |       0.00902 |       0.94211
     -0.00413 |       0.00000 |       0.00222 |

********** Iteration 264 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00459 |       0.00000 |       0.00505 |       0.00384 |       0.87547
      0.00032 |       0.00000 |       0.00390 |       0.00841 |       0.85759
     -0.00142 |       0.00000 |       0.00357 |       0.00544 |       0.87077
     -0.00416 |       0.00000 |       0.00341 |       0.00482 |       0.86947
     -0.00643 |       0.00000 |       0.00331 |       0.00482 |       0.87149
     -0.00767 |       0.00000 |       0.00328 |       0.00353 |       0.88170
     -0.00818 |       0.00000 |       0.00315 |       0.00441 |       0.87798
     -0.00914 |       0.00000 |       0.00314 |       0.00458 |       0.88073
     -0.00978 |       0.00000 |       0.00308 |       0.00444 |       0.87960
     -0.00952 |       0.00000 |       0.00300 |       0.00547 |       0.87953
Evaluating losses...
     -0.01072 |       0.00000 |       0.00289 |       0.00568 |      

     -0.00940 |       0.00000 |       0.00045 |       0.00464 |       0.89327
     -0.00903 |       0.00000 |       0.00043 |       0.00456 |       0.89476
Evaluating losses...
     -0.01094 |       0.00000 |       0.00039 |       0.00467 |       0.89506
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -1.27         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 949           |
| TimeElapsed     | 1.35e+03      |
| TimestepsSoFar  | 1105920       |
| ev_tdlam_before | -0.2          |
| loss_ent        | 0.89505666    |
| loss_kl         | 0.0046725944  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0109427385 |
| loss_vf_loss    | 0.00039224722 |
-----------------------------------
********** Iteration 270 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00097 |       0.00000 |       0.00247 |       0.00420 |       0.83844
     -0.00249 |       0.00000 |  

********** Iteration 275 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00037 |       0.00000 |       0.00073 |       0.00271 |       0.92442
     -0.00377 |       0.00000 |       0.00043 |       0.00402 |       0.92316
     -0.00732 |       0.00000 |       0.00036 |       0.00424 |       0.92029
     -0.00746 |       0.00000 |       0.00033 |       0.00509 |       0.91986
     -0.00894 |       0.00000 |       0.00030 |       0.00462 |       0.92479
     -0.00978 |       0.00000 |       0.00028 |       0.00436 |       0.92468
     -0.01032 |       0.00000 |       0.00027 |       0.00436 |       0.92490
     -0.01109 |       0.00000 |       0.00025 |       0.00440 |       0.92631
     -0.01187 |       0.00000 |       0.00025 |       0.00470 |       0.92510
     -0.01203 |       0.00000 |       0.00024 |       0.00536 |       0.92748
Evaluating losses...
     -0.01431 |       0.00000 |       0.00023 |       0.00392 |      

     -0.01131 |       0.00000 |       0.00361 |       0.00641 |       0.89994
     -0.01260 |       0.00000 |       0.00348 |       0.00603 |       0.90539
Evaluating losses...
     -0.01294 |       0.00000 |       0.00332 |       0.00646 |       0.90698
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.98        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 964          |
| TimeElapsed     | 1.39e+03     |
| TimestepsSoFar  | 1150976      |
| ev_tdlam_before | 0.634        |
| loss_ent        | 0.90697694   |
| loss_kl         | 0.0064573986 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012940992 |
| loss_vf_loss    | 0.003322057  |
----------------------------------
********** Iteration 281 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00465 |       0.00000 |       0.00385 |       0.00497 |       0.95482
     -0.00092 |       0.00000 |       0.00320 |

********** Iteration 286 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00078 |       0.00000 |       0.00040 |       0.00223 |       0.88612
     -0.00596 |       0.00000 |       0.00027 |       0.00400 |       0.88820
     -0.00933 |       0.00000 |       0.00023 |       0.00407 |       0.88473
     -0.01087 |       0.00000 |       0.00020 |       0.00426 |       0.88396
     -0.01243 |       0.00000 |       0.00018 |       0.00397 |       0.88940
     -0.01347 |       0.00000 |       0.00017 |       0.00467 |       0.88982
     -0.01497 |       0.00000 |       0.00016 |       0.00446 |       0.88756
     -0.01544 |       0.00000 |       0.00015 |       0.00512 |       0.88858
     -0.01625 |       0.00000 |       0.00014 |       0.00525 |       0.89036
     -0.01715 |       0.00000 |       0.00013 |       0.00533 |       0.89105
Evaluating losses...
     -0.01915 |       0.00000 |       0.00013 |       0.00556 |      

     -0.00860 |       0.00000 |       0.00119 |       0.00563 |       0.92994
     -0.00910 |       0.00000 |       0.00117 |       0.00617 |       0.93158
Evaluating losses...
     -0.01006 |       0.00000 |       0.00114 |       0.00673 |       0.92504
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.87        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 979          |
| TimeElapsed     | 1.43e+03     |
| TimestepsSoFar  | 1196032      |
| ev_tdlam_before | 0.36         |
| loss_ent        | 0.9250413    |
| loss_kl         | 0.006725441  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010064719 |
| loss_vf_loss    | 0.0011425951 |
----------------------------------
********** Iteration 292 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00155 |       0.00000 |       0.00447 |       0.00535 |       0.95316
     -0.00477 |       0.00000 |       0.00346 |

********** Iteration 297 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00071 |       0.00000 |       0.00303 |       0.00370 |       0.92499
     -0.00471 |       0.00000 |       0.00226 |       0.00371 |       0.92085
     -0.00644 |       0.00000 |       0.00210 |       0.00449 |       0.92509
     -0.00675 |       0.00000 |       0.00201 |       0.00476 |       0.92925
     -0.00743 |       0.00000 |       0.00196 |       0.00413 |       0.93063
     -0.00651 |       0.00000 |       0.00192 |       0.00658 |       0.93208
     -0.00851 |       0.00000 |       0.00184 |       0.00470 |       0.93018
     -0.00855 |       0.00000 |       0.00183 |       0.00405 |       0.93755
     -0.00883 |       0.00000 |       0.00180 |       0.00428 |       0.93204
     -0.00990 |       0.00000 |       0.00177 |       0.00335 |       0.93357
Evaluating losses...
     -0.01130 |       0.00000 |       0.00171 |       0.00375 |      

     -0.01499 |       0.00000 |       0.00012 |       0.00530 |       0.98413
     -0.01517 |       0.00000 |       0.00011 |       0.00523 |       0.98107
Evaluating losses...
     -0.01845 |       0.00000 |       0.00011 |       0.00546 |       0.98201
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | -0.68         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 994           |
| TimeElapsed     | 1.47e+03      |
| TimestepsSoFar  | 1241088       |
| ev_tdlam_before | -2.92         |
| loss_ent        | 0.98200583    |
| loss_kl         | 0.0054618563  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.018452443  |
| loss_vf_loss    | 0.00011142315 |
-----------------------------------
********** Iteration 303 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00053 |       0.00000 |       0.00551 |       0.00271 |       0.92674
     -0.00454 |       0.00000 |  

********** Iteration 308 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00140 |       0.00000 |       0.00034 |       0.00204 |       0.97736
     -0.00243 |       0.00000 |       0.00026 |       0.00237 |       0.97285
     -0.00471 |       0.00000 |       0.00021 |       0.00266 |       0.96762
     -0.00644 |       0.00000 |       0.00018 |       0.00251 |       0.96693
     -0.00741 |       0.00000 |       0.00016 |       0.00352 |       0.96310
     -0.00837 |       0.00000 |       0.00014 |       0.00370 |       0.96416
     -0.00908 |       0.00000 |       0.00013 |       0.00389 |       0.96224
     -0.01016 |       0.00000 |       0.00012 |       0.00365 |       0.96749
     -0.01039 |       0.00000 |       0.00012 |       0.00399 |       0.97038
     -0.01128 |       0.00000 |       0.00011 |       0.00395 |       0.96623
Evaluating losses...
     -0.01275 |       0.00000 |       0.00011 |       0.00428 |      

     -0.00404 |       0.00000 |       0.00113 |       0.00349 |       0.93321
     -0.00461 |       0.00000 |       0.00111 |       0.00357 |       0.93176
     -0.00402 |       0.00000 |       0.00109 |       0.00404 |       0.93294
Evaluating losses...
     -0.00563 |       0.00000 |       0.00105 |       0.00378 |       0.93220
-----------------------------------
| EpLenMean       | 3.03e+03      |
| EpRewMean       | -0.5          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1009          |
| TimeElapsed     | 1.68e+03      |
| TimestepsSoFar  | 1286144       |
| ev_tdlam_before | 0.151         |
| loss_ent        | 0.9321971     |
| loss_kl         | 0.0037794993  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0056267604 |
| loss_vf_loss    | 0.0010505494  |
-----------------------------------
********** Iteration 314 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00021 |       0.00000 |  

********** Iteration 319 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00041 |       0.00000 |       0.00356 |       0.00125 |       0.91349
     -0.00110 |       0.00000 |       0.00283 |       0.00148 |       0.91502
     -0.00218 |       0.00000 |       0.00243 |       0.00183 |       0.91457
     -0.00317 |       0.00000 |       0.00226 |       0.00225 |       0.91495
     -0.00332 |       0.00000 |       0.00212 |       0.00289 |       0.91381
     -0.00340 |       0.00000 |       0.00203 |       0.00269 |       0.91419
     -0.00420 |       0.00000 |       0.00196 |       0.00282 |       0.91429
     -0.00420 |       0.00000 |       0.00196 |       0.00392 |       0.91439
     -0.00447 |       0.00000 |       0.00190 |       0.00348 |       0.91203
     -0.00503 |       0.00000 |       0.00186 |       0.00359 |       0.91532
Evaluating losses...
     -0.00608 |       0.00000 |       0.00182 |       0.00388 |      

     -0.01063 |       0.00000 |       0.00013 |       0.00487 |       0.97971
     -0.01141 |       0.00000 |       0.00012 |       0.00487 |       0.97878
Evaluating losses...
     -0.01300 |       0.00000 |       0.00011 |       0.00551 |       0.98303
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.36         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1024          |
| TimeElapsed     | 1.71e+03      |
| TimestepsSoFar  | 1331200       |
| ev_tdlam_before | -3.08         |
| loss_ent        | 0.9830331     |
| loss_kl         | 0.005512692   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0129994955 |
| loss_vf_loss    | 0.00011164323 |
-----------------------------------
********** Iteration 325 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00074 |       0.00000 |       0.00496 |       0.00149 |       0.88825
     -0.00295 |       0.00000 |  

********** Iteration 330 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00074 |       0.00000 |       0.00168 |       0.01518 |       0.95492
     -0.00483 |       0.00000 |       0.00137 |       0.00410 |       0.95473
     -0.00639 |       0.00000 |       0.00117 |       0.00292 |       0.95736
     -0.00678 |       0.00000 |       0.00104 |       0.00359 |       0.95721
     -0.00771 |       0.00000 |       0.00084 |       0.00240 |       0.95654
     -0.00811 |       0.00000 |       0.00077 |       0.00381 |       0.95813
     -0.00825 |       0.00000 |       0.00071 |       0.00278 |       0.95663
     -0.00862 |       0.00000 |       0.00067 |       0.00294 |       0.95549
     -0.00884 |       0.00000 |       0.00063 |       0.00325 |       0.95682
     -0.00893 |       0.00000 |       0.00061 |       0.00274 |       0.95353
Evaluating losses...
     -0.00965 |       0.00000 |       0.00055 |       0.00286 |      

     -0.00881 |       0.00000 |       0.00046 |       0.00505 |       0.94732
     -0.00990 |       0.00000 |       0.00044 |       0.00460 |       0.94745
Evaluating losses...
     -0.01180 |       0.00000 |       0.00043 |       0.00504 |       0.94530
-----------------------------------
| EpLenMean       | 3e+03         |
| EpRewMean       | -0.36         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1039          |
| TimeElapsed     | 1.75e+03      |
| TimestepsSoFar  | 1376256       |
| ev_tdlam_before | -0.267        |
| loss_ent        | 0.9452996     |
| loss_kl         | 0.0050368537  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.011798581  |
| loss_vf_loss    | 0.00043023474 |
-----------------------------------
********** Iteration 336 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00029 |       0.00000 |       0.00427 |       0.00157 |       0.97550
     -0.00499 |       0.00000 |  

********** Iteration 341 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00196 |       0.00000 |       0.00523 |       0.00667 |       0.91114
     -0.00459 |       0.00000 |       0.00425 |       0.00480 |       0.91133
     -0.00662 |       0.00000 |       0.00393 |       0.00279 |       0.90851
     -0.00650 |       0.00000 |       0.00365 |       0.00296 |       0.90843
     -0.00843 |       0.00000 |       0.00352 |       0.00308 |       0.91406
     -0.00905 |       0.00000 |       0.00341 |       0.00305 |       0.91170
     -0.00994 |       0.00000 |       0.00336 |       0.00319 |       0.91268
     -0.01024 |       0.00000 |       0.00326 |       0.00380 |       0.91275
     -0.01122 |       0.00000 |       0.00319 |       0.00352 |       0.91028
     -0.01192 |       0.00000 |       0.00317 |       0.00362 |       0.91149
Evaluating losses...
     -0.01273 |       0.00000 |       0.00302 |       0.00361 |      

     -0.01023 |       0.00000 |       0.00504 |       0.00698 |       0.88191
     -0.01077 |       0.00000 |       0.00500 |       0.00634 |       0.88465
Evaluating losses...
     -0.01214 |       0.00000 |       0.00482 |       0.00636 |       0.88312
----------------------------------
| EpLenMean       | 3e+03        |
| EpRewMean       | -0.39        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1054         |
| TimeElapsed     | 1.79e+03     |
| TimestepsSoFar  | 1421312      |
| ev_tdlam_before | 0.282        |
| loss_ent        | 0.88311595   |
| loss_kl         | 0.0063571623 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012136685 |
| loss_vf_loss    | 0.004818634  |
----------------------------------
********** Iteration 347 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00011 |       0.00000 |       0.00634 |       0.00211 |       0.89385
     -0.00397 |       0.00000 |       0.00522 |

********** Iteration 352 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00262 |       0.00000 |       0.00854 |       0.00399 |       0.91653
     -0.00421 |       0.00000 |       0.00690 |       0.00464 |       0.91748
     -0.00676 |       0.00000 |       0.00646 |       0.00418 |       0.91542
     -0.00741 |       0.00000 |       0.00609 |       0.00556 |       0.91849
     -0.01000 |       0.00000 |       0.00573 |       0.00401 |       0.91372
     -0.01150 |       0.00000 |       0.00556 |       0.00450 |       0.91401
     -0.01150 |       0.00000 |       0.00539 |       0.00445 |       0.91552
     -0.01241 |       0.00000 |       0.00522 |       0.00545 |       0.91847
     -0.01266 |       0.00000 |       0.00510 |       0.00523 |       0.91743
     -0.01384 |       0.00000 |       0.00501 |       0.00528 |       0.91768
Evaluating losses...
     -0.01386 |       0.00000 |       0.00474 |       0.00559 |      

     -0.01044 |       0.00000 |       0.00455 |       0.00349 |       0.88546
     -0.01041 |       0.00000 |       0.00434 |       0.00333 |       0.88595
     -0.01176 |       0.00000 |       0.00427 |       0.00366 |       0.88659
Evaluating losses...
     -0.01188 |       0.00000 |       0.00415 |       0.00504 |       0.88649
----------------------------------
| EpLenMean       | 3e+03        |
| EpRewMean       | -0.27        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1069         |
| TimeElapsed     | 1.83e+03     |
| TimestepsSoFar  | 1466368      |
| ev_tdlam_before | 0.333        |
| loss_ent        | 0.88649154   |
| loss_kl         | 0.0050388216 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.011884383 |
| loss_vf_loss    | 0.0041529355 |
----------------------------------
********** Iteration 358 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00054 |       0.00000 |       0.00128 |

********** Iteration 363 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00035 |       0.00000 |       0.00112 |       0.00121 |       0.94442
     -0.00354 |       0.00000 |       0.00069 |       0.00389 |       0.94474
     -0.00551 |       0.00000 |       0.00057 |       0.00288 |       0.94478
     -0.00718 |       0.00000 |       0.00054 |       0.00313 |       0.94529
     -0.00812 |       0.00000 |       0.00050 |       0.00422 |       0.94173
     -0.00926 |       0.00000 |       0.00049 |       0.00386 |       0.94463
     -0.01024 |       0.00000 |       0.00046 |       0.00399 |       0.94441
     -0.01112 |       0.00000 |       0.00044 |       0.00457 |       0.94482
     -0.01147 |       0.00000 |       0.00043 |       0.00426 |       0.94262
     -0.01159 |       0.00000 |       0.00042 |       0.00496 |       0.94480
Evaluating losses...
     -0.01358 |       0.00000 |       0.00041 |       0.00481 |      

     -0.00631 |       0.00000 |       0.00158 |       0.00543 |       0.89486
     -0.00751 |       0.00000 |       0.00155 |       0.00495 |       0.89541
     -0.00747 |       0.00000 |       0.00155 |       0.00499 |       0.89382
     -0.00806 |       0.00000 |       0.00150 |       0.00495 |       0.89489
Evaluating losses...
     -0.00822 |       0.00000 |       0.00142 |       0.00648 |       0.88994
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.27        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1083         |
| TimeElapsed     | 2.04e+03     |
| TimestepsSoFar  | 1511424      |
| ev_tdlam_before | -0.424       |
| loss_ent        | 0.88994175   |
| loss_kl         | 0.006477022  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008216324 |
| loss_vf_loss    | 0.0014223409 |
----------------------------------
********** Iteration 369 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |

********** Iteration 374 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00016 |       0.00000 |       0.00428 |       0.00340 |       0.89669
     -0.00321 |       0.00000 |       0.00359 |       0.00368 |       0.89529
     -0.00572 |       0.00000 |       0.00322 |       0.00227 |       0.88912
     -0.00689 |       0.00000 |       0.00295 |       0.00277 |       0.89033
     -0.00782 |       0.00000 |       0.00273 |       0.00295 |       0.88902
     -0.00865 |       0.00000 |       0.00257 |       0.00375 |       0.88976
     -0.00873 |       0.00000 |       0.00243 |       0.00425 |       0.88980
     -0.00940 |       0.00000 |       0.00234 |       0.00468 |       0.88929
     -0.01024 |       0.00000 |       0.00225 |       0.00440 |       0.89009
     -0.01120 |       0.00000 |       0.00216 |       0.00444 |       0.88943
Evaluating losses...
     -0.01170 |       0.00000 |       0.00203 |       0.00497 |      

     -0.00948 |       0.00000 |       0.00273 |       0.00405 |       0.91292
     -0.00932 |       0.00000 |       0.00269 |       0.00425 |       0.91108
     -0.01026 |       0.00000 |       0.00260 |       0.00440 |       0.91159
Evaluating losses...
     -0.01080 |       0.00000 |       0.00249 |       0.00578 |       0.91675
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.26        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1098         |
| TimeElapsed     | 2.07e+03     |
| TimestepsSoFar  | 1556480      |
| ev_tdlam_before | 0.4          |
| loss_ent        | 0.9167473    |
| loss_kl         | 0.0057806014 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010798924 |
| loss_vf_loss    | 0.0024926949 |
----------------------------------
********** Iteration 380 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00015 |       0.00000 |       0.00070 |

********** Iteration 385 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00070 |       0.00000 |       0.00163 |       0.00308 |       0.93032
     -0.00402 |       0.00000 |       0.00124 |       0.00317 |       0.92645
     -0.00523 |       0.00000 |       0.00116 |       0.00307 |       0.92367
     -0.00665 |       0.00000 |       0.00103 |       0.00282 |       0.92643
     -0.00771 |       0.00000 |       0.00099 |       0.00401 |       0.92638
     -0.00769 |       0.00000 |       0.00097 |       0.00414 |       0.92775
     -0.00856 |       0.00000 |       0.00093 |       0.00375 |       0.92951
     -0.00921 |       0.00000 |       0.00092 |       0.00374 |       0.92672
     -0.00939 |       0.00000 |       0.00094 |       0.00397 |       0.92889
     -0.01033 |       0.00000 |       0.00092 |       0.00412 |       0.92583
Evaluating losses...
     -0.00994 |       0.00000 |       0.00086 |       0.00638 |      

     -0.00652 |       0.00000 |       0.00154 |       0.00326 |       0.94979
     -0.00677 |       0.00000 |       0.00144 |       0.00349 |       0.95162
     -0.00714 |       0.00000 |       0.00144 |       0.00455 |       0.95247
Evaluating losses...
     -0.00800 |       0.00000 |       0.00134 |       0.00406 |       0.95009
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.2         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1113         |
| TimeElapsed     | 2.12e+03     |
| TimestepsSoFar  | 1601536      |
| ev_tdlam_before | 0.175        |
| loss_ent        | 0.9500928    |
| loss_kl         | 0.0040573534 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008002921 |
| loss_vf_loss    | 0.0013373755 |
----------------------------------
********** Iteration 391 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00137 |       0.00000 |       0.00344 |

********** Iteration 396 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00087 |       0.00000 |       0.00045 |       0.00136 |       0.89618
     -0.00297 |       0.00000 |       0.00022 |       0.00218 |       0.89699
     -0.00420 |       0.00000 |       0.00016 |       0.00289 |       0.89725
     -0.00714 |       0.00000 |       0.00014 |       0.00243 |       0.90176
     -0.00920 |       0.00000 |       0.00012 |       0.00320 |       0.90177
     -0.00990 |       0.00000 |       0.00012 |       0.00292 |       0.90254
     -0.01066 |       0.00000 |       0.00010 |       0.00314 |       0.90316
     -0.01113 |       0.00000 |      9.82e-05 |       0.00335 |       0.90358
     -0.01199 |       0.00000 |      9.15e-05 |       0.00367 |       0.90476
     -0.01244 |       0.00000 |      8.76e-05 |       0.00382 |       0.90448
Evaluating losses...
     -0.01366 |       0.00000 |      8.27e-05 |       0.00446 |      

     -0.00898 |       0.00000 |       0.00192 |       0.00370 |       0.91491
     -0.00933 |       0.00000 |       0.00186 |       0.00388 |       0.91400
Evaluating losses...
     -0.01029 |       0.00000 |       0.00175 |       0.00355 |       0.91659
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.27         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1128          |
| TimeElapsed     | 2.15e+03      |
| TimestepsSoFar  | 1646592       |
| ev_tdlam_before | 0.252         |
| loss_ent        | 0.91658956    |
| loss_kl         | 0.0035546753  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0102938535 |
| loss_vf_loss    | 0.0017518428  |
-----------------------------------
********** Iteration 402 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00347 |       0.00000 |       0.00563 |       0.00415 |       0.86166
     -0.00696 |       0.00000 |  

********** Iteration 407 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00167 |       0.00000 |       0.00525 |       0.00370 |       0.91473
     -0.00439 |       0.00000 |       0.00414 |       0.00298 |       0.91809
     -0.00614 |       0.00000 |       0.00379 |       0.00393 |       0.92064
     -0.00724 |       0.00000 |       0.00363 |       0.00444 |       0.92160
     -0.00849 |       0.00000 |       0.00354 |       0.00391 |       0.92257
     -0.01001 |       0.00000 |       0.00343 |       0.00435 |       0.92131
     -0.01000 |       0.00000 |       0.00335 |       0.00464 |       0.92603
     -0.01002 |       0.00000 |       0.00325 |       0.00577 |       0.92257
     -0.01087 |       0.00000 |       0.00318 |       0.00491 |       0.92536
     -0.01252 |       0.00000 |       0.00314 |       0.00478 |       0.92555
Evaluating losses...
     -0.01262 |       0.00000 |       0.00300 |       0.00611 |      

     -0.00863 |       0.00000 |       0.00060 |       0.00394 |       0.96598
     -0.00945 |       0.00000 |       0.00058 |       0.00316 |       0.96519
Evaluating losses...
     -0.01024 |       0.00000 |       0.00052 |       0.00342 |       0.96520
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | -0.32         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1143          |
| TimeElapsed     | 2.19e+03      |
| TimestepsSoFar  | 1691648       |
| ev_tdlam_before | 0.764         |
| loss_ent        | 0.9652017     |
| loss_kl         | 0.003419214   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.010236289  |
| loss_vf_loss    | 0.00051628024 |
-----------------------------------
********** Iteration 413 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00042 |       0.00000 |       0.00043 |       0.00195 |       0.94962
     -0.00301 |       0.00000 |  

********** Iteration 418 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00022 |       0.00000 |       0.00098 |       0.00250 |       0.94638
     -0.00311 |       0.00000 |       0.00070 |       0.00283 |       0.95278
     -0.00482 |       0.00000 |       0.00065 |       0.00276 |       0.95181
     -0.00550 |       0.00000 |       0.00062 |       0.00345 |       0.95524
     -0.00625 |       0.00000 |       0.00059 |       0.00277 |       0.95431
     -0.00684 |       0.00000 |       0.00056 |       0.00323 |       0.95667
     -0.00762 |       0.00000 |       0.00055 |       0.00349 |       0.95744
     -0.00779 |       0.00000 |       0.00056 |       0.00363 |       0.95730
     -0.00852 |       0.00000 |       0.00053 |       0.00392 |       0.95514
     -0.00896 |       0.00000 |       0.00054 |       0.00355 |       0.95659
Evaluating losses...
     -0.00987 |       0.00000 |       0.00050 |       0.00449 |      

     -0.01332 |       0.00000 |       0.00323 |       0.00496 |       0.92803
     -0.01443 |       0.00000 |       0.00315 |       0.00506 |       0.92763
Evaluating losses...
     -0.01518 |       0.00000 |       0.00298 |       0.00464 |       0.92681
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.3         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1158         |
| TimeElapsed     | 2.23e+03     |
| TimestepsSoFar  | 1736704      |
| ev_tdlam_before | 0.221        |
| loss_ent        | 0.9268092    |
| loss_kl         | 0.004642306  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015175448 |
| loss_vf_loss    | 0.0029823477 |
----------------------------------
********** Iteration 424 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00016 |       0.00000 |       0.00045 |       0.00216 |       0.93163
     -0.00281 |       0.00000 |       0.00030 |

********** Iteration 429 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.10e-05 |       0.00000 |       0.00193 |       0.00087 |       0.91982
     -0.00176 |       0.00000 |       0.00154 |       0.00230 |       0.91742
     -0.00245 |       0.00000 |       0.00145 |       0.00232 |       0.91560
     -0.00320 |       0.00000 |       0.00137 |       0.00248 |       0.91529
     -0.00369 |       0.00000 |       0.00134 |       0.00260 |       0.91568
     -0.00395 |       0.00000 |       0.00133 |       0.00241 |       0.91502
     -0.00463 |       0.00000 |       0.00132 |       0.00283 |       0.91327
     -0.00468 |       0.00000 |       0.00128 |       0.00353 |       0.91464
     -0.00494 |       0.00000 |       0.00127 |       0.00336 |       0.91276
     -0.00549 |       0.00000 |       0.00125 |       0.00336 |       0.91448
Evaluating losses...
     -0.00612 |       0.00000 |       0.00125 |       0.00338 |      

     -0.01036 |       0.00000 |       0.00150 |       0.00398 |       0.96245
     -0.01005 |       0.00000 |       0.00152 |       0.00442 |       0.96289
     -0.01118 |       0.00000 |       0.00152 |       0.00394 |       0.96467
Evaluating losses...
     -0.01217 |       0.00000 |       0.00159 |       0.00422 |       0.96255
----------------------------------
| EpLenMean       | 3.03e+03     |
| EpRewMean       | -0.34        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1173         |
| TimeElapsed     | 2.44e+03     |
| TimestepsSoFar  | 1781760      |
| ev_tdlam_before | 0.399        |
| loss_ent        | 0.9625545    |
| loss_kl         | 0.004219005  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012172562 |
| loss_vf_loss    | 0.00158627   |
----------------------------------
********** Iteration 435 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00304 |       0.00000 |       0.00163 |

********** Iteration 440 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00024 |       0.00000 |       0.00045 |       0.00231 |       0.84977
     -0.00440 |       0.00000 |       0.00025 |       0.00280 |       0.84949
     -0.00571 |       0.00000 |       0.00021 |       0.00328 |       0.85088
     -0.00711 |       0.00000 |       0.00018 |       0.00332 |       0.84996
     -0.00822 |       0.00000 |       0.00016 |       0.00379 |       0.85078
     -0.00856 |       0.00000 |       0.00015 |       0.00409 |       0.84803
     -0.01020 |       0.00000 |       0.00014 |       0.00398 |       0.84962
     -0.01103 |       0.00000 |       0.00013 |       0.00407 |       0.84990
     -0.01129 |       0.00000 |       0.00012 |       0.00431 |       0.85052
     -0.01170 |       0.00000 |       0.00012 |       0.00438 |       0.85042
Evaluating losses...
     -0.01295 |       0.00000 |       0.00011 |       0.00506 |      

     -0.00888 |       0.00000 |       0.00156 |       0.00442 |       0.84850
     -0.00995 |       0.00000 |       0.00151 |       0.00311 |       0.84677
     -0.01016 |       0.00000 |       0.00150 |       0.00399 |       0.84930
Evaluating losses...
     -0.01022 |       0.00000 |       0.00139 |       0.00482 |       0.85141
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.28        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1188         |
| TimeElapsed     | 2.48e+03     |
| TimestepsSoFar  | 1826816      |
| ev_tdlam_before | -0.162       |
| loss_ent        | 0.8514051    |
| loss_kl         | 0.0048173848 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01022472  |
| loss_vf_loss    | 0.0013902483 |
----------------------------------
********** Iteration 446 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00108 |       0.00000 |       0.00538 |

********** Iteration 451 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00138 |       0.00000 |      9.80e-05 |       0.00160 |       0.85454
     -0.00195 |       0.00000 |      7.63e-05 |       0.00179 |       0.85108
     -0.00446 |       0.00000 |      6.98e-05 |       0.00224 |       0.85232
     -0.00659 |       0.00000 |      6.37e-05 |       0.00213 |       0.85339
     -0.00758 |       0.00000 |      5.92e-05 |       0.00282 |       0.85284
     -0.00758 |       0.00000 |      5.60e-05 |       0.00267 |       0.85373
     -0.00940 |       0.00000 |      5.19e-05 |       0.00273 |       0.85344
     -0.01018 |       0.00000 |      4.97e-05 |       0.00270 |       0.85414
     -0.01140 |       0.00000 |      4.78e-05 |       0.00313 |       0.85336
     -0.01180 |       0.00000 |      4.59e-05 |       0.00324 |       0.85468
Evaluating losses...
     -0.01344 |       0.00000 |      4.19e-05 |       0.00299 |      

     -0.00882 |       0.00000 |       0.00136 |       0.00324 |       0.89623
     -0.00942 |       0.00000 |       0.00129 |       0.00366 |       0.89465
     -0.00919 |       0.00000 |       0.00124 |       0.00374 |       0.89318
Evaluating losses...
     -0.01091 |       0.00000 |       0.00118 |       0.00299 |       0.89476
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.26         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1203          |
| TimeElapsed     | 2.52e+03      |
| TimestepsSoFar  | 1871872       |
| ev_tdlam_before | 0.553         |
| loss_ent        | 0.89475846    |
| loss_kl         | 0.0029910447  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0109137455 |
| loss_vf_loss    | 0.0011815999  |
-----------------------------------
********** Iteration 457 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00025 |       0.00000 |  

********** Iteration 462 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00158 |       0.00000 |       0.00059 |       0.00135 |       0.83725
     -0.00097 |       0.00000 |       0.00040 |       0.00166 |       0.83574
     -0.00280 |       0.00000 |       0.00035 |       0.00181 |       0.83629
     -0.00513 |       0.00000 |       0.00031 |       0.00167 |       0.83572
     -0.00643 |       0.00000 |       0.00029 |       0.00230 |       0.83834
     -0.00684 |       0.00000 |       0.00027 |       0.00262 |       0.83775
     -0.00809 |       0.00000 |       0.00025 |       0.00264 |       0.83727
     -0.00920 |       0.00000 |       0.00024 |       0.00271 |       0.83773
     -0.00935 |       0.00000 |       0.00023 |       0.00313 |       0.83928
     -0.01013 |       0.00000 |       0.00022 |       0.00307 |       0.83867
Evaluating losses...
     -0.01212 |       0.00000 |       0.00021 |       0.00333 |      

     -0.00834 |       0.00000 |      7.02e-05 |       0.00327 |       0.85218
     -0.00883 |       0.00000 |      6.72e-05 |       0.00306 |       0.85418
     -0.00833 |       0.00000 |      6.63e-05 |       0.00345 |       0.85323
Evaluating losses...
     -0.00954 |       0.00000 |      6.47e-05 |       0.00303 |       0.85296
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.19         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1218          |
| TimeElapsed     | 2.56e+03      |
| TimestepsSoFar  | 1916928       |
| ev_tdlam_before | -1.13         |
| loss_ent        | 0.8529647     |
| loss_kl         | 0.0030279022  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0095428275 |
| loss_vf_loss    | 6.46965e-05   |
-----------------------------------
********** Iteration 468 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00051 |       0.00000 |  

********** Iteration 473 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -7.89e-05 |       0.00000 |       0.00323 |       0.00251 |       0.90493
     -0.00546 |       0.00000 |       0.00249 |       0.00305 |       0.90252
     -0.00700 |       0.00000 |       0.00221 |       0.00304 |       0.90148
     -0.00820 |       0.00000 |       0.00204 |       0.00276 |       0.90146
     -0.00931 |       0.00000 |       0.00190 |       0.00294 |       0.90174
     -0.01032 |       0.00000 |       0.00189 |       0.00284 |       0.89977
     -0.01035 |       0.00000 |       0.00180 |       0.00291 |       0.90055
     -0.01133 |       0.00000 |       0.00174 |       0.00323 |       0.89902
     -0.01137 |       0.00000 |       0.00172 |       0.00388 |       0.89894
     -0.01168 |       0.00000 |       0.00167 |       0.00395 |       0.90104
Evaluating losses...
     -0.01267 |       0.00000 |       0.00166 |       0.00395 |      

     -0.00884 |       0.00000 |       0.00257 |       0.00248 |       0.89612
     -0.00904 |       0.00000 |       0.00249 |       0.00303 |       0.89460
     -0.00954 |       0.00000 |       0.00240 |       0.00306 |       0.89612
Evaluating losses...
     -0.01068 |       0.00000 |       0.00226 |       0.00293 |       0.89749
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.14        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1233         |
| TimeElapsed     | 2.6e+03      |
| TimestepsSoFar  | 1961984      |
| ev_tdlam_before | 0.148        |
| loss_ent        | 0.8974916    |
| loss_kl         | 0.0029332456 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010680863 |
| loss_vf_loss    | 0.0022599306 |
----------------------------------
********** Iteration 479 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00016 |       0.00000 |       0.00288 |

********** Iteration 484 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00087 |       0.00000 |       0.00176 |       0.00126 |       0.87596
     -0.00406 |       0.00000 |       0.00164 |       0.00228 |       0.87111
     -0.00585 |       0.00000 |       0.00153 |       0.00270 |       0.87057
     -0.00605 |       0.00000 |       0.00147 |       0.00360 |       0.86820
     -0.00767 |       0.00000 |       0.00143 |       0.00305 |       0.86768
     -0.00877 |       0.00000 |       0.00144 |       0.00301 |       0.87019
     -0.00898 |       0.00000 |       0.00140 |       0.00338 |       0.86910
     -0.00998 |       0.00000 |       0.00136 |       0.00371 |       0.87069
     -0.01001 |       0.00000 |       0.00134 |       0.00360 |       0.87014
     -0.01071 |       0.00000 |       0.00133 |       0.00410 |       0.87060
Evaluating losses...
     -0.01012 |       0.00000 |       0.00126 |       0.00367 |      

     -0.00774 |       0.00000 |       0.00256 |       0.00333 |       0.83682
     -0.00880 |       0.00000 |       0.00256 |       0.00337 |       0.83906
     -0.00950 |       0.00000 |       0.00248 |       0.00306 |       0.83848
     -0.01002 |       0.00000 |       0.00245 |       0.00343 |       0.83842
Evaluating losses...
     -0.00975 |       0.00000 |       0.00235 |       0.00499 |       0.83776
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.11        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1248         |
| TimeElapsed     | 2.84e+03     |
| TimestepsSoFar  | 2007040      |
| ev_tdlam_before | 0.664        |
| loss_ent        | 0.8377599    |
| loss_kl         | 0.0049860016 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009748218 |
| loss_vf_loss    | 0.002352648  |
----------------------------------
********** Iteration 490 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |

********** Iteration 495 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00020 |       0.00000 |       0.00367 |       0.00082 |       0.87004
     -0.00313 |       0.00000 |       0.00278 |       0.00176 |       0.86727
     -0.00377 |       0.00000 |       0.00238 |       0.00180 |       0.86563
     -0.00446 |       0.00000 |       0.00214 |       0.00183 |       0.86627
     -0.00459 |       0.00000 |       0.00192 |       0.00280 |       0.86645
     -0.00579 |       0.00000 |       0.00179 |       0.00187 |       0.86596
     -0.00581 |       0.00000 |       0.00166 |       0.00186 |       0.86690
     -0.00623 |       0.00000 |       0.00162 |       0.00221 |       0.86736
     -0.00662 |       0.00000 |       0.00159 |       0.00216 |       0.86834
     -0.00714 |       0.00000 |       0.00148 |       0.00227 |       0.86803
Evaluating losses...
     -0.00764 |       0.00000 |       0.00140 |       0.00241 |      

     -0.00773 |       0.00000 |       0.00058 |       0.00320 |       0.85703
     -0.00849 |       0.00000 |       0.00055 |       0.00327 |       0.85819
     -0.00905 |       0.00000 |       0.00055 |       0.00287 |       0.85573
Evaluating losses...
     -0.01001 |       0.00000 |       0.00051 |       0.00271 |       0.85599
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | -0.06         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1263          |
| TimeElapsed     | 2.89e+03      |
| TimestepsSoFar  | 2052096       |
| ev_tdlam_before | 0.338         |
| loss_ent        | 0.8559869     |
| loss_kl         | 0.0027141823  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.010006923  |
| loss_vf_loss    | 0.00051041663 |
-----------------------------------
********** Iteration 501 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00030 |       0.00000 |  

********** Iteration 506 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00123 |       0.00000 |       0.00446 |       0.00365 |       0.86650
     -0.00324 |       0.00000 |       0.00378 |       0.00171 |       0.86583
     -0.00499 |       0.00000 |       0.00351 |       0.00197 |       0.86785
     -0.00609 |       0.00000 |       0.00332 |       0.00202 |       0.86753
     -0.00692 |       0.00000 |       0.00319 |       0.00234 |       0.86894
     -0.00763 |       0.00000 |       0.00314 |       0.00232 |       0.86791
     -0.00855 |       0.00000 |       0.00300 |       0.00255 |       0.87187
     -0.00858 |       0.00000 |       0.00290 |       0.00267 |       0.86963
     -0.00896 |       0.00000 |       0.00283 |       0.00293 |       0.86859
     -0.00901 |       0.00000 |       0.00279 |       0.00341 |       0.87048
Evaluating losses...
     -0.01038 |       0.00000 |       0.00266 |       0.00357 |      

     -0.00839 |       0.00000 |       0.00231 |       0.00352 |       0.88758
     -0.00866 |       0.00000 |       0.00225 |       0.00343 |       0.88714
Evaluating losses...
     -0.01023 |       0.00000 |       0.00216 |       0.00322 |       0.88706
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.13        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1278         |
| TimeElapsed     | 2.93e+03     |
| TimestepsSoFar  | 2097152      |
| ev_tdlam_before | 0.693        |
| loss_ent        | 0.8870608    |
| loss_kl         | 0.0032156268 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010228154 |
| loss_vf_loss    | 0.0021572725 |
----------------------------------
********** Iteration 512 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00121 |       0.00000 |       0.00088 |       0.00091 |       0.88756
     -0.00307 |       0.00000 |       0.00049 |

********** Iteration 517 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00047 |       0.00000 |       0.00277 |       0.00126 |       0.86812
     -0.00244 |       0.00000 |       0.00246 |       0.00190 |       0.86515
     -0.00388 |       0.00000 |       0.00236 |       0.00254 |       0.86413
     -0.00496 |       0.00000 |       0.00229 |       0.00333 |       0.86518
     -0.00630 |       0.00000 |       0.00222 |       0.00304 |       0.86515
     -0.00620 |       0.00000 |       0.00224 |       0.00318 |       0.87012
     -0.00700 |       0.00000 |       0.00219 |       0.00343 |       0.86768
     -0.00733 |       0.00000 |       0.00217 |       0.00368 |       0.87081
     -0.00797 |       0.00000 |       0.00214 |       0.00369 |       0.86971
     -0.00791 |       0.00000 |       0.00210 |       0.00496 |       0.86955
Evaluating losses...
     -0.00904 |       0.00000 |       0.00203 |       0.00402 |      

     -0.00934 |       0.00000 |       0.00032 |       0.00317 |       0.86449
     -0.00963 |       0.00000 |       0.00031 |       0.00351 |       0.86425
Evaluating losses...
     -0.01089 |       0.00000 |       0.00030 |       0.00347 |       0.86429
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.21         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1293          |
| TimeElapsed     | 2.97e+03      |
| TimestepsSoFar  | 2142208       |
| ev_tdlam_before | -1.86         |
| loss_ent        | 0.86429244    |
| loss_kl         | 0.0034723342  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0108884    |
| loss_vf_loss    | 0.00029533927 |
-----------------------------------
********** Iteration 523 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00101 |       0.00000 |       0.00187 |       0.00117 |       0.91537
     -0.00306 |       0.00000 |  

********** Iteration 528 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00113 |       0.00000 |       0.00017 |       0.00237 |       0.92155
     -0.00526 |       0.00000 |       0.00012 |       0.00259 |       0.92114
     -0.00868 |       0.00000 |       0.00010 |       0.00253 |       0.92036
     -0.00997 |       0.00000 |      9.32e-05 |       0.00278 |       0.91996
     -0.01047 |       0.00000 |      8.57e-05 |       0.00312 |       0.91955
     -0.01136 |       0.00000 |      7.86e-05 |       0.00292 |       0.92132
     -0.01189 |       0.00000 |      7.37e-05 |       0.00287 |       0.91945
     -0.01312 |       0.00000 |      6.91e-05 |       0.00287 |       0.91786
     -0.01325 |       0.00000 |      6.68e-05 |       0.00329 |       0.91809
     -0.01388 |       0.00000 |      6.27e-05 |       0.00300 |       0.91701
Evaluating losses...
     -0.01560 |       0.00000 |      7.26e-05 |       0.00325 |      

     -0.01040 |       0.00000 |      8.59e-05 |       0.00328 |       0.82990
     -0.01037 |       0.00000 |      7.98e-05 |       0.00334 |       0.83185
     -0.01159 |       0.00000 |      7.55e-05 |       0.00336 |       0.83227
Evaluating losses...
     -0.01261 |       0.00000 |      7.58e-05 |       0.00385 |       0.83170
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.31        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1308         |
| TimeElapsed     | 3.01e+03     |
| TimestepsSoFar  | 2187264      |
| ev_tdlam_before | -1.81        |
| loss_ent        | 0.83170205   |
| loss_kl         | 0.003851711  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012613836 |
| loss_vf_loss    | 7.577652e-05 |
----------------------------------
********** Iteration 534 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00256 |       0.00000 |       0.00178 |

********** Iteration 539 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00154 |       0.00000 |       0.00483 |       0.00157 |       0.89248
     -0.00328 |       0.00000 |       0.00435 |       0.00177 |       0.89315
     -0.00568 |       0.00000 |       0.00405 |       0.00197 |       0.89249
     -0.00666 |       0.00000 |       0.00392 |       0.00257 |       0.89197
     -0.00730 |       0.00000 |       0.00382 |       0.00282 |       0.89361
     -0.00869 |       0.00000 |       0.00374 |       0.00282 |       0.89359
     -0.00986 |       0.00000 |       0.00363 |       0.00244 |       0.89365
     -0.00972 |       0.00000 |       0.00361 |       0.00298 |       0.89399
     -0.01056 |       0.00000 |       0.00354 |       0.00302 |       0.89458
     -0.01003 |       0.00000 |       0.00348 |       0.00353 |       0.89132
Evaluating losses...
     -0.01270 |       0.00000 |       0.00336 |       0.00335 |      

     -0.00779 |       0.00000 |       0.00182 |       0.00269 |       0.90018
     -0.00860 |       0.00000 |       0.00177 |       0.00306 |       0.90196
     -0.00901 |       0.00000 |       0.00173 |       0.00302 |       0.90352
Evaluating losses...
     -0.00973 |       0.00000 |       0.00166 |       0.00311 |       0.90236
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.34        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1323         |
| TimeElapsed     | 3.06e+03     |
| TimestepsSoFar  | 2232320      |
| ev_tdlam_before | 0.604        |
| loss_ent        | 0.90235525   |
| loss_kl         | 0.0031074663 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009732949 |
| loss_vf_loss    | 0.0016550313 |
----------------------------------
********** Iteration 545 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00083 |       0.00000 |       0.00050 |

********** Iteration 550 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00124 |       0.00000 |       0.00043 |       0.00135 |       0.91340
     -0.00183 |       0.00000 |       0.00026 |       0.00225 |       0.91401
     -0.00322 |       0.00000 |       0.00021 |       0.00239 |       0.91274
     -0.00483 |       0.00000 |       0.00017 |       0.00260 |       0.91388
     -0.00579 |       0.00000 |       0.00015 |       0.00300 |       0.91292
     -0.00693 |       0.00000 |       0.00014 |       0.00251 |       0.91300
     -0.00730 |       0.00000 |       0.00013 |       0.00295 |       0.91455
     -0.00672 |       0.00000 |       0.00012 |       0.00343 |       0.91506
     -0.00786 |       0.00000 |       0.00012 |       0.00339 |       0.91812
     -0.00841 |       0.00000 |       0.00011 |       0.00335 |       0.91564
Evaluating losses...
     -0.00997 |       0.00000 |       0.00011 |       0.00334 |      

     -0.00848 |       0.00000 |       0.00194 |       0.00280 |       0.89406
     -0.00933 |       0.00000 |       0.00183 |       0.00269 |       0.89299
     -0.00943 |       0.00000 |       0.00176 |       0.00304 |       0.89364
Evaluating losses...
     -0.01066 |       0.00000 |       0.00167 |       0.00303 |       0.89339
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.4         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1338         |
| TimeElapsed     | 3.32e+03     |
| TimestepsSoFar  | 2277376      |
| ev_tdlam_before | 0.315        |
| loss_ent        | 0.8933872    |
| loss_kl         | 0.0030328052 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010657509 |
| loss_vf_loss    | 0.0016701595 |
----------------------------------
********** Iteration 556 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00064 |       0.00000 |       0.00360 |

********** Iteration 561 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00199 |       0.00000 |       0.00183 |       0.00092 |       0.86494
     -0.00201 |       0.00000 |       0.00137 |       0.00206 |       0.86424
     -0.00375 |       0.00000 |       0.00117 |       0.00165 |       0.86487
     -0.00508 |       0.00000 |       0.00106 |       0.00165 |       0.86593
     -0.00517 |       0.00000 |       0.00096 |       0.00209 |       0.86468
     -0.00641 |       0.00000 |       0.00092 |       0.00228 |       0.86364
     -0.00663 |       0.00000 |       0.00086 |       0.00244 |       0.86516
     -0.00599 |       0.00000 |       0.00085 |       0.00275 |       0.86327
     -0.00765 |       0.00000 |       0.00084 |       0.00245 |       0.86393
     -0.00786 |       0.00000 |       0.00078 |       0.00252 |       0.86336
Evaluating losses...
     -0.00902 |       0.00000 |       0.00074 |       0.00317 |      

     -0.00592 |       0.00000 |       0.00128 |       0.00218 |       0.94404
     -0.00582 |       0.00000 |       0.00123 |       0.00256 |       0.94506
     -0.00611 |       0.00000 |       0.00125 |       0.00261 |       0.94665
Evaluating losses...
     -0.00729 |       0.00000 |       0.00115 |       0.00240 |       0.94570
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.29         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1353          |
| TimeElapsed     | 3.36e+03      |
| TimestepsSoFar  | 2322432       |
| ev_tdlam_before | 0.452         |
| loss_ent        | 0.94570017    |
| loss_kl         | 0.002403709   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0072915014 |
| loss_vf_loss    | 0.0011467746  |
-----------------------------------
********** Iteration 567 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00108 |       0.00000 |  

********** Iteration 572 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00112 |       0.00000 |       0.00096 |       0.00405 |       0.94913
     -0.00335 |       0.00000 |       0.00064 |       0.00333 |       0.95145
     -0.00498 |       0.00000 |       0.00053 |       0.00171 |       0.95308
     -0.00584 |       0.00000 |       0.00048 |       0.00202 |       0.95560
     -0.00699 |       0.00000 |       0.00046 |       0.00218 |       0.95598
     -0.00760 |       0.00000 |       0.00044 |       0.00216 |       0.95798
     -0.00767 |       0.00000 |       0.00042 |       0.00240 |       0.95802
     -0.00801 |       0.00000 |       0.00042 |       0.00233 |       0.95876
     -0.00859 |       0.00000 |       0.00040 |       0.00237 |       0.95930
     -0.00859 |       0.00000 |       0.00037 |       0.00235 |       0.95942
Evaluating losses...
     -0.00949 |       0.00000 |       0.00036 |       0.00267 |      

     -0.00798 |       0.00000 |       0.00057 |       0.00231 |       0.95395
     -0.00839 |       0.00000 |       0.00054 |       0.00229 |       0.95254
     -0.00862 |       0.00000 |       0.00053 |       0.00253 |       0.95350
Evaluating losses...
     -0.01050 |       0.00000 |       0.00050 |       0.00238 |       0.95231
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.21        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1368         |
| TimeElapsed     | 3.41e+03     |
| TimestepsSoFar  | 2367488      |
| ev_tdlam_before | 0.524        |
| loss_ent        | 0.95230997   |
| loss_kl         | 0.0023756803 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010496303 |
| loss_vf_loss    | 0.0004994885 |
----------------------------------
********** Iteration 578 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00060 |       0.00000 |       0.00364 |

********** Iteration 583 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00023 |       0.00000 |       0.00132 |       0.00104 |       0.97050
     -0.00288 |       0.00000 |       0.00074 |       0.00138 |       0.96949
     -0.00365 |       0.00000 |       0.00057 |       0.00159 |       0.96740
     -0.00482 |       0.00000 |       0.00047 |       0.00172 |       0.96727
     -0.00565 |       0.00000 |       0.00043 |       0.00222 |       0.96824
     -0.00643 |       0.00000 |       0.00039 |       0.00228 |       0.96873
     -0.00668 |       0.00000 |       0.00036 |       0.00234 |       0.96952
     -0.00737 |       0.00000 |       0.00034 |       0.00247 |       0.97228
     -0.00782 |       0.00000 |       0.00032 |       0.00240 |       0.97282
     -0.00822 |       0.00000 |       0.00031 |       0.00260 |       0.97214
Evaluating losses...
     -0.00901 |       0.00000 |       0.00030 |       0.00253 |      

     -0.00832 |       0.00000 |       0.00209 |       0.00284 |       0.89997
     -0.00902 |       0.00000 |       0.00205 |       0.00299 |       0.89944
Evaluating losses...
     -0.01011 |       0.00000 |       0.00198 |       0.00297 |       0.90107
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.12        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1383         |
| TimeElapsed     | 3.45e+03     |
| TimestepsSoFar  | 2412544      |
| ev_tdlam_before | 0.258        |
| loss_ent        | 0.901068     |
| loss_kl         | 0.0029706531 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010113098 |
| loss_vf_loss    | 0.0019778872 |
----------------------------------
********** Iteration 589 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00124 |       0.00000 |       0.00437 |       0.00111 |       0.91190
     -0.00207 |       0.00000 |       0.00363 |

********** Iteration 594 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00029 |       0.00000 |       0.00536 |       0.00075 |       0.90028
     -0.00257 |       0.00000 |       0.00471 |       0.00224 |       0.89696
     -0.00369 |       0.00000 |       0.00439 |       0.00258 |       0.89673
     -0.00564 |       0.00000 |       0.00416 |       0.00221 |       0.89692
     -0.00678 |       0.00000 |       0.00401 |       0.00186 |       0.89867
     -0.00683 |       0.00000 |       0.00395 |       0.00240 |       0.89946
     -0.00764 |       0.00000 |       0.00384 |       0.00247 |       0.89870
     -0.00755 |       0.00000 |       0.00373 |       0.00264 |       0.89974
     -0.00837 |       0.00000 |       0.00369 |       0.00270 |       0.89823
     -0.00899 |       0.00000 |       0.00362 |       0.00236 |       0.90022
Evaluating losses...
     -0.00980 |       0.00000 |       0.00360 |       0.00237 |      

     -0.00750 |       0.00000 |       0.00184 |       0.00159 |       0.90710
     -0.00784 |       0.00000 |       0.00182 |       0.00161 |       0.90834
     -0.00633 |       0.00000 |       0.00180 |       0.00308 |       0.90598
Evaluating losses...
     -0.00598 |       0.00000 |       0.00174 |       0.00360 |       0.90208
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.13         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1398          |
| TimeElapsed     | 3.52e+03      |
| TimestepsSoFar  | 2457600       |
| ev_tdlam_before | 0.544         |
| loss_ent        | 0.9020808     |
| loss_kl         | 0.003595903   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0059754914 |
| loss_vf_loss    | 0.0017431413  |
-----------------------------------
********** Iteration 600 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00125 |       0.00000 |  

********** Iteration 605 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00046 |       0.00000 |       0.00361 |       0.00048 |       0.85448
     -0.00074 |       0.00000 |       0.00291 |       0.00070 |       0.85523
     -0.00103 |       0.00000 |       0.00266 |       0.00098 |       0.85524
     -0.00158 |       0.00000 |       0.00250 |       0.00106 |       0.85460
     -0.00161 |       0.00000 |       0.00231 |       0.00090 |       0.85398
     -0.00189 |       0.00000 |       0.00222 |       0.00115 |       0.85454
     -0.00203 |       0.00000 |       0.00215 |       0.00117 |       0.85410
     -0.00226 |       0.00000 |       0.00211 |       0.00122 |       0.85327
     -0.00253 |       0.00000 |       0.00203 |       0.00127 |       0.85311
     -0.00217 |       0.00000 |       0.00200 |       0.00147 |       0.85232
Evaluating losses...
     -0.00308 |       0.00000 |       0.00188 |       0.00152 |      

     -0.00604 |       0.00000 |       0.00251 |       0.00206 |       0.83680
     -0.00649 |       0.00000 |       0.00244 |       0.00199 |       0.83664
     -0.00662 |       0.00000 |       0.00238 |       0.00203 |       0.83594
     -0.00682 |       0.00000 |       0.00236 |       0.00218 |       0.83583
Evaluating losses...
     -0.00775 |       0.00000 |       0.00224 |       0.00252 |       0.83710
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.1         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1412         |
| TimeElapsed     | 3.81e+03     |
| TimestepsSoFar  | 2502656      |
| ev_tdlam_before | 0.17         |
| loss_ent        | 0.83709943   |
| loss_kl         | 0.0025166704 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.007746931 |
| loss_vf_loss    | 0.0022420087 |
----------------------------------
********** Iteration 611 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |

********** Iteration 616 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00051 |       0.00000 |       0.00200 |       0.00199 |       0.91234
     -0.00327 |       0.00000 |       0.00181 |       0.00080 |       0.91205
     -0.00423 |       0.00000 |       0.00171 |       0.00116 |       0.91159
     -0.00429 |       0.00000 |       0.00164 |       0.00125 |       0.91324
     -0.00520 |       0.00000 |       0.00158 |       0.00248 |       0.91127
     -0.00553 |       0.00000 |       0.00156 |       0.00152 |       0.91244
     -0.00627 |       0.00000 |       0.00150 |       0.00135 |       0.91388
     -0.00627 |       0.00000 |       0.00146 |       0.00155 |       0.91633
     -0.00688 |       0.00000 |       0.00142 |       0.00155 |       0.91582
     -0.00706 |       0.00000 |       0.00142 |       0.00167 |       0.91515
Evaluating losses...
     -0.00751 |       0.00000 |       0.00134 |       0.00137 |      

     -0.00576 |       0.00000 |       0.00122 |       0.00269 |       0.88239
     -0.00654 |       0.00000 |       0.00119 |       0.00274 |       0.88630
     -0.00619 |       0.00000 |       0.00119 |       0.00296 |       0.88623
Evaluating losses...
     -0.00735 |       0.00000 |       0.00115 |       0.00267 |       0.88489
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | -0.07         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1427          |
| TimeElapsed     | 3.86e+03      |
| TimestepsSoFar  | 2547712       |
| ev_tdlam_before | 0.214         |
| loss_ent        | 0.88488847    |
| loss_kl         | 0.0026697265  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0073475316 |
| loss_vf_loss    | 0.001154779   |
-----------------------------------
********** Iteration 622 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00032 |       0.00000 |  

********** Iteration 627 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00018 |       0.00000 |       0.00247 |       0.00062 |       0.87046
     -0.00221 |       0.00000 |       0.00153 |       0.00110 |       0.87435
     -0.00406 |       0.00000 |       0.00133 |       0.00183 |       0.87303
     -0.00503 |       0.00000 |       0.00123 |       0.00197 |       0.87388
     -0.00581 |       0.00000 |       0.00115 |       0.00189 |       0.87456
     -0.00667 |       0.00000 |       0.00106 |       0.00215 |       0.87330
     -0.00697 |       0.00000 |       0.00101 |       0.00229 |       0.87490
     -0.00756 |       0.00000 |       0.00098 |       0.00242 |       0.87546
     -0.00815 |       0.00000 |       0.00093 |       0.00251 |       0.87478
     -0.00819 |       0.00000 |       0.00089 |       0.00272 |       0.87396
Evaluating losses...
     -0.00898 |       0.00000 |       0.00086 |       0.00284 |      

     -0.00671 |       0.00000 |       0.00305 |       0.00310 |       0.89043
     -0.00727 |       0.00000 |       0.00299 |       0.00306 |       0.89192
     -0.00740 |       0.00000 |       0.00296 |       0.00263 |       0.89106
Evaluating losses...
     -0.00828 |       0.00000 |       0.00289 |       0.00302 |       0.89162
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.1         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1442         |
| TimeElapsed     | 3.92e+03     |
| TimestepsSoFar  | 2592768      |
| ev_tdlam_before | 0.698        |
| loss_ent        | 0.89161694   |
| loss_kl         | 0.0030240058 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008281611 |
| loss_vf_loss    | 0.002886981  |
----------------------------------
********** Iteration 633 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00059 |       0.00000 |       0.00357 |

********** Iteration 638 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.77e-05 |       0.00000 |       0.00131 |       0.00162 |       0.89070
     -0.00262 |       0.00000 |       0.00093 |       0.00153 |       0.89224
     -0.00322 |       0.00000 |       0.00087 |       0.00141 |       0.89097
     -0.00400 |       0.00000 |       0.00082 |       0.00140 |       0.89119
     -0.00415 |       0.00000 |       0.00082 |       0.00177 |       0.89102
     -0.00480 |       0.00000 |       0.00075 |       0.00162 |       0.89159
     -0.00485 |       0.00000 |       0.00074 |       0.00191 |       0.89100
     -0.00533 |       0.00000 |       0.00071 |       0.00181 |       0.89148
     -0.00573 |       0.00000 |       0.00070 |       0.00219 |       0.89277
     -0.00554 |       0.00000 |       0.00068 |       0.00219 |       0.89010
Evaluating losses...
     -0.00648 |       0.00000 |       0.00064 |       0.00232 |      

     -0.00429 |       0.00000 |       0.00026 |       0.00158 |       0.87686
     -0.00456 |       0.00000 |       0.00028 |       0.00182 |       0.87691
     -0.00469 |       0.00000 |       0.00027 |       0.00175 |       0.87658
Evaluating losses...
     -0.00479 |       0.00000 |       0.00024 |       0.00191 |       0.87467
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.11         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1457          |
| TimeElapsed     | 3.96e+03      |
| TimestepsSoFar  | 2637824       |
| ev_tdlam_before | 0.234         |
| loss_ent        | 0.87466764    |
| loss_kl         | 0.0019104393  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.004786146  |
| loss_vf_loss    | 0.00023595859 |
-----------------------------------
********** Iteration 644 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00064 |       0.00000 |  

********** Iteration 649 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -1.39e-05 |       0.00000 |       0.00034 |       0.00062 |       0.86422
     -0.00308 |       0.00000 |       0.00021 |       0.00127 |       0.86542
     -0.00447 |       0.00000 |       0.00017 |       0.00135 |       0.86412
     -0.00553 |       0.00000 |       0.00015 |       0.00159 |       0.86403
     -0.00611 |       0.00000 |       0.00014 |       0.00174 |       0.86505
     -0.00663 |       0.00000 |       0.00012 |       0.00214 |       0.86523
     -0.00691 |       0.00000 |       0.00011 |       0.00222 |       0.86398
     -0.00773 |       0.00000 |       0.00011 |       0.00210 |       0.86390
     -0.00825 |       0.00000 |       0.00010 |       0.00234 |       0.86340
     -0.00871 |       0.00000 |      9.78e-05 |       0.00237 |       0.86321
Evaluating losses...
     -0.00908 |       0.00000 |      9.41e-05 |       0.00302 |      

     -0.00653 |       0.00000 |       0.00119 |       0.00209 |       0.86827
     -0.00683 |       0.00000 |       0.00114 |       0.00227 |       0.86668
     -0.00688 |       0.00000 |       0.00111 |       0.00238 |       0.86734
Evaluating losses...
     -0.00791 |       0.00000 |       0.00104 |       0.00237 |       0.86530
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.17        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1472         |
| TimeElapsed     | 4.02e+03     |
| TimestepsSoFar  | 2682880      |
| ev_tdlam_before | 0.222        |
| loss_ent        | 0.86529714   |
| loss_kl         | 0.0023661032 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.007913366 |
| loss_vf_loss    | 0.0010391593 |
----------------------------------
********** Iteration 655 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00061 |       0.00000 |       0.00061 |

********** Iteration 660 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00109 |       0.00000 |       0.00020 |       0.00190 |       0.91264
     -0.00164 |       0.00000 |       0.00015 |       0.00279 |       0.91401
     -0.00455 |       0.00000 |       0.00013 |       0.00202 |       0.91476
     -0.00595 |       0.00000 |       0.00012 |       0.00178 |       0.91602
     -0.00663 |       0.00000 |       0.00011 |       0.00202 |       0.91548
     -0.00710 |       0.00000 |       0.00010 |       0.00225 |       0.91508
     -0.00837 |       0.00000 |      9.92e-05 |       0.00208 |       0.91632
     -0.00862 |       0.00000 |      9.52e-05 |       0.00219 |       0.91568
     -0.00880 |       0.00000 |      9.21e-05 |       0.00255 |       0.91537
     -0.00981 |       0.00000 |      8.79e-05 |       0.00235 |       0.91647
Evaluating losses...
     -0.01073 |       0.00000 |      8.53e-05 |       0.00214 |      

     -0.00506 |       0.00000 |       0.00168 |       0.00246 |       0.84603
     -0.00538 |       0.00000 |       0.00168 |       0.00249 |       0.84661
     -0.00577 |       0.00000 |       0.00165 |       0.00218 |       0.84693
Evaluating losses...
     -0.00647 |       0.00000 |       0.00162 |       0.00245 |       0.84734
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.07         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1487          |
| TimeElapsed     | 4.07e+03      |
| TimestepsSoFar  | 2727936       |
| ev_tdlam_before | 0.322         |
| loss_ent        | 0.84733856    |
| loss_kl         | 0.002447849   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0064737243 |
| loss_vf_loss    | 0.0016187979  |
-----------------------------------
********** Iteration 666 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00032 |       0.00000 |  

********** Iteration 671 ************
Eval num_timesteps=2748416, episode_reward=0.30 +/- 0.96
Episode length: 3000.00 +/- 0.00
New best mean reward!
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00011 |       0.00000 |       0.00275 |       0.00061 |       0.84254
     -0.00121 |       0.00000 |       0.00209 |       0.00091 |       0.84138
     -0.00253 |       0.00000 |       0.00185 |       0.00098 |       0.84121
     -0.00310 |       0.00000 |       0.00172 |       0.00127 |       0.84071
     -0.00324 |       0.00000 |       0.00163 |       0.00114 |       0.84160
     -0.00414 |       0.00000 |       0.00157 |       0.00118 |       0.84105
     -0.00406 |       0.00000 |       0.00152 |       0.00149 |       0.83975
     -0.00473 |       0.00000 |       0.00147 |       0.00180 |       0.83992
     -0.00481 |       0.00000 |       0.00144 |       0.00160 |       0.83983
     -0.00532 |       0.00000 |       0.00143 |       0.

     -0.00621 |       0.00000 |       0.00011 |       0.00163 |       0.84457
     -0.00663 |       0.00000 |       0.00010 |       0.00161 |       0.84245
     -0.00721 |       0.00000 |      9.86e-05 |       0.00169 |       0.84276
     -0.00745 |       0.00000 |      9.43e-05 |       0.00191 |       0.84162
Evaluating losses...
     -0.00775 |       0.00000 |      9.23e-05 |       0.00218 |       0.84022
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.06          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1502          |
| TimeElapsed     | 4.34e+03      |
| TimestepsSoFar  | 2772992       |
| ev_tdlam_before | -0.136        |
| loss_ent        | 0.84021604    |
| loss_kl         | 0.0021765733  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0077538146 |
| loss_vf_loss    | 9.2277936e-05 |
-----------------------------------
********** Iteration 677 ************
Optimizing...
     pol_surr |    pol_entpen |  

********** Iteration 682 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -7.91e-05 |       0.00000 |       0.00152 |       0.00040 |       0.85059
     -0.00195 |       0.00000 |       0.00089 |       0.00086 |       0.85216
     -0.00322 |       0.00000 |       0.00077 |       0.00135 |       0.85228
     -0.00299 |       0.00000 |       0.00069 |       0.00134 |       0.85100
     -0.00382 |       0.00000 |       0.00067 |       0.00146 |       0.85129
     -0.00398 |       0.00000 |       0.00064 |       0.00182 |       0.85087
     -0.00444 |       0.00000 |       0.00064 |       0.00163 |       0.85047
     -0.00458 |       0.00000 |       0.00061 |       0.00156 |       0.84948
     -0.00457 |       0.00000 |       0.00061 |       0.00180 |       0.85044
     -0.00480 |       0.00000 |       0.00057 |       0.00160 |       0.85008
Evaluating losses...
     -0.00579 |       0.00000 |       0.00055 |       0.00168 |      

     -0.00708 |       0.00000 |      5.21e-05 |       0.00191 |       0.87608
     -0.00731 |       0.00000 |      5.08e-05 |       0.00243 |       0.87766
     -0.00799 |       0.00000 |      4.74e-05 |       0.00217 |       0.87619
Evaluating losses...
     -0.00898 |       0.00000 |      4.51e-05 |       0.00236 |       0.87988
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.03          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1517          |
| TimeElapsed     | 4.39e+03      |
| TimestepsSoFar  | 2818048       |
| ev_tdlam_before | -0.426        |
| loss_ent        | 0.8798846     |
| loss_kl         | 0.0023573346  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.008977661  |
| loss_vf_loss    | 4.5129287e-05 |
-----------------------------------
********** Iteration 688 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00061 |       0.00000 |  

********** Iteration 693 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00085 |       0.00000 |       0.00072 |       0.00152 |       0.86450
     3.77e-06 |       0.00000 |       0.00053 |       0.00072 |       0.86411
     -0.00183 |       0.00000 |       0.00045 |       0.00073 |       0.86626
     -0.00258 |       0.00000 |       0.00043 |       0.00072 |       0.86682
     -0.00275 |       0.00000 |       0.00040 |       0.00101 |       0.86657
     -0.00362 |       0.00000 |       0.00039 |       0.00098 |       0.86523
     -0.00383 |       0.00000 |       0.00037 |       0.00111 |       0.86564
     -0.00401 |       0.00000 |       0.00036 |       0.00111 |       0.86640
     -0.00404 |       0.00000 |       0.00034 |       0.00151 |       0.86523
     -0.00460 |       0.00000 |       0.00031 |       0.00141 |       0.86599
Evaluating losses...
     -0.00511 |       0.00000 |       0.00031 |       0.00130 |      

     -0.00548 |       0.00000 |       0.00085 |       0.00144 |       0.85523
     -0.00519 |       0.00000 |       0.00084 |       0.00155 |       0.85583
     -0.00609 |       0.00000 |       0.00079 |       0.00150 |       0.85557
Evaluating losses...
     -0.00655 |       0.00000 |       0.00079 |       0.00166 |       0.85486
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.13          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1532          |
| TimeElapsed     | 4.44e+03      |
| TimestepsSoFar  | 2863104       |
| ev_tdlam_before | 0.439         |
| loss_ent        | 0.8548595     |
| loss_kl         | 0.001662695   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0065494757 |
| loss_vf_loss    | 0.0007929924  |
-----------------------------------
********** Iteration 699 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00039 |       0.00000 |  

********** Iteration 704 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00062 |       0.00000 |       0.00143 |       0.00066 |       0.88392
     -0.00199 |       0.00000 |       0.00098 |       0.00222 |       0.88729
     -0.00405 |       0.00000 |       0.00085 |       0.00110 |       0.88879
     -0.00392 |       0.00000 |       0.00080 |       0.00202 |       0.88583
     -0.00475 |       0.00000 |       0.00076 |       0.00181 |       0.88887
     -0.00449 |       0.00000 |       0.00073 |       0.00243 |       0.88676
     -0.00559 |       0.00000 |       0.00070 |       0.00177 |       0.88643
     -0.00623 |       0.00000 |       0.00068 |       0.00191 |       0.88595
     -0.00583 |       0.00000 |       0.00068 |       0.00215 |       0.88502
     -0.00659 |       0.00000 |       0.00064 |       0.00198 |       0.88572
Evaluating losses...
     -0.00764 |       0.00000 |       0.00063 |       0.00185 |      

     -0.00727 |       0.00000 |       0.00154 |       0.00227 |       0.91852
     -0.00781 |       0.00000 |       0.00151 |       0.00204 |       0.91861
     -0.00785 |       0.00000 |       0.00150 |       0.00214 |       0.91868
Evaluating losses...
     -0.00843 |       0.00000 |       0.00141 |       0.00238 |       0.91811
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.08         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1547         |
| TimeElapsed     | 4.49e+03     |
| TimestepsSoFar  | 2908160      |
| ev_tdlam_before | 0.546        |
| loss_ent        | 0.91810864   |
| loss_kl         | 0.0023844822 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008425288 |
| loss_vf_loss    | 0.0014145797 |
----------------------------------
********** Iteration 710 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00082 |       0.00000 |       0.00221 |

********** Iteration 715 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00146 |       0.00000 |       0.00197 |       0.00060 |       0.84818
     -0.00356 |       0.00000 |       0.00171 |       0.00167 |       0.84562
     -0.00482 |       0.00000 |       0.00160 |       0.00138 |       0.84913
     -0.00535 |       0.00000 |       0.00152 |       0.00144 |       0.84831
     -0.00569 |       0.00000 |       0.00145 |       0.00207 |       0.84718
     -0.00570 |       0.00000 |       0.00141 |       0.00169 |       0.84824
     -0.00619 |       0.00000 |       0.00138 |       0.00146 |       0.84818
     -0.00647 |       0.00000 |       0.00133 |       0.00172 |       0.84714
     -0.00592 |       0.00000 |       0.00130 |       0.00200 |       0.84819
     -0.00660 |       0.00000 |       0.00129 |       0.00186 |       0.84745
Evaluating losses...
     -0.00710 |       0.00000 |       0.00124 |       0.00184 |      

     -0.00192 |       0.00000 |       0.00028 |       0.00117 |       0.88250
     -0.00207 |       0.00000 |       0.00030 |       0.00103 |       0.88361
     -0.00219 |       0.00000 |       0.00027 |       0.00115 |       0.88504
Evaluating losses...
     -0.00255 |       0.00000 |       0.00026 |       0.00139 |       0.88332
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.07          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1562          |
| TimeElapsed     | 4.54e+03      |
| TimestepsSoFar  | 2953216       |
| ev_tdlam_before | 0.493         |
| loss_ent        | 0.8833216     |
| loss_kl         | 0.0013923714  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.002553333  |
| loss_vf_loss    | 0.00026207228 |
-----------------------------------
********** Iteration 721 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00014 |       0.00000 |  

********** Iteration 726 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00037 |       0.00000 |       0.00109 |       0.00043 |       0.85827
     -0.00185 |       0.00000 |       0.00093 |       0.00104 |       0.86119
     -0.00233 |       0.00000 |       0.00090 |       0.00074 |       0.85965
     -0.00299 |       0.00000 |       0.00086 |       0.00107 |       0.86042
     -0.00358 |       0.00000 |       0.00083 |       0.00138 |       0.85992
     -0.00391 |       0.00000 |       0.00081 |       0.00134 |       0.85953
     -0.00411 |       0.00000 |       0.00081 |       0.00143 |       0.85972
     -0.00455 |       0.00000 |       0.00079 |       0.00139 |       0.85970
     -0.00482 |       0.00000 |       0.00078 |       0.00135 |       0.85712
     -0.00492 |       0.00000 |       0.00076 |       0.00164 |       0.85836
Evaluating losses...
     -0.00540 |       0.00000 |       0.00075 |       0.00175 |      

     -0.00816 |       0.00000 |      4.54e-05 |       0.00177 |       0.90388
     -0.00802 |       0.00000 |      4.43e-05 |       0.00215 |       0.90470
     -0.00811 |       0.00000 |      4.16e-05 |       0.00190 |       0.90537
Evaluating losses...
     -0.00993 |       0.00000 |      4.26e-05 |       0.00201 |       0.90510
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.1           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1577          |
| TimeElapsed     | 4.6e+03       |
| TimestepsSoFar  | 2998272       |
| ev_tdlam_before | -0.0401       |
| loss_ent        | 0.905102      |
| loss_kl         | 0.0020097655  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.009933309  |
| loss_vf_loss    | 4.2551743e-05 |
-----------------------------------
********** Iteration 732 ************
Eval num_timesteps=2998272, episode_reward=-0.17 +/- 0.88
Episode length: 3000.00 +/- 0.00
Optimizing...
     pol_surr |    p

********** Iteration 737 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00109 |       0.00000 |       0.00016 |       0.00051 |       0.90659
     -0.00150 |       0.00000 |      9.59e-05 |       0.00071 |       0.90880
     -0.00271 |       0.00000 |      7.72e-05 |       0.00095 |       0.90853
     -0.00272 |       0.00000 |      6.79e-05 |       0.00095 |       0.90754
     -0.00472 |       0.00000 |      6.33e-05 |       0.00125 |       0.90726
     -0.00558 |       0.00000 |      5.94e-05 |       0.00118 |       0.90769
     -0.00564 |       0.00000 |      5.50e-05 |       0.00132 |       0.90794
     -0.00632 |       0.00000 |      5.26e-05 |       0.00152 |       0.90806
     -0.00660 |       0.00000 |      5.13e-05 |       0.00159 |       0.90841
     -0.00659 |       0.00000 |      4.92e-05 |       0.00175 |       0.90873
Evaluating losses...
     -0.00765 |       0.00000 |      4.72e-05 |       0.00165 |      

     -0.00485 |       0.00000 |       0.00129 |       0.00145 |       0.89873
     -0.00517 |       0.00000 |       0.00126 |       0.00164 |       0.89853
     -0.00518 |       0.00000 |       0.00122 |       0.00177 |       0.89725
Evaluating losses...
     -0.00550 |       0.00000 |       0.00118 |       0.00205 |       0.89931
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.06         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1592         |
| TimeElapsed     | 4.86e+03     |
| TimestepsSoFar  | 3043328      |
| ev_tdlam_before | 0.0198       |
| loss_ent        | 0.89930934   |
| loss_kl         | 0.0020502342 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.005497744 |
| loss_vf_loss    | 0.0011813365 |
----------------------------------
********** Iteration 743 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00094 |       0.00000 |       0.00032 |

********** Iteration 748 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -7.50e-05 |       0.00000 |       0.00260 |       0.00162 |       0.89150
     -0.00270 |       0.00000 |       0.00179 |       0.00149 |       0.89389
     -0.00363 |       0.00000 |       0.00154 |       0.00136 |       0.89537
     -0.00417 |       0.00000 |       0.00140 |       0.00127 |       0.89285
     -0.00379 |       0.00000 |       0.00134 |       0.00149 |       0.89342
     -0.00452 |       0.00000 |       0.00129 |       0.00113 |       0.89392
     -0.00485 |       0.00000 |       0.00127 |       0.00099 |       0.89420
     -0.00513 |       0.00000 |       0.00124 |       0.00128 |       0.89387
     -0.00561 |       0.00000 |       0.00126 |       0.00110 |       0.89418
     -0.00548 |       0.00000 |       0.00122 |       0.00121 |       0.89391
Evaluating losses...
     -0.00570 |       0.00000 |       0.00123 |       0.00124 |      

     -0.00546 |       0.00000 |       0.00121 |       0.00174 |       0.87459
     -0.00573 |       0.00000 |       0.00118 |       0.00132 |       0.87389
     -0.00577 |       0.00000 |       0.00113 |       0.00147 |       0.87346
Evaluating losses...
     -0.00624 |       0.00000 |       0.00109 |       0.00134 |       0.87394
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.01         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1607         |
| TimeElapsed     | 4.91e+03     |
| TimestepsSoFar  | 3088384      |
| ev_tdlam_before | 0.552        |
| loss_ent        | 0.87394243   |
| loss_kl         | 0.0013408073 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.006241532 |
| loss_vf_loss    | 0.0010902698 |
----------------------------------
********** Iteration 754 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00048 |       0.00000 |       0.00140 |

********** Iteration 759 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00053 |       0.00000 |       0.00381 |       0.00068 |       0.85811
     -0.00104 |       0.00000 |       0.00334 |       0.00118 |       0.85900
     -0.00243 |       0.00000 |       0.00307 |       0.00100 |       0.85721
     -0.00296 |       0.00000 |       0.00293 |       0.00173 |       0.85663
     -0.00321 |       0.00000 |       0.00280 |       0.00244 |       0.85758
     -0.00414 |       0.00000 |       0.00268 |       0.00217 |       0.85708
     -0.00427 |       0.00000 |       0.00261 |       0.00217 |       0.85783
     -0.00459 |       0.00000 |       0.00250 |       0.00223 |       0.85777
     -0.00528 |       0.00000 |       0.00242 |       0.00216 |       0.85695
     -0.00520 |       0.00000 |       0.00239 |       0.00222 |       0.85859
Evaluating losses...
     -0.00598 |       0.00000 |       0.00225 |       0.00189 |      

     -0.00568 |       0.00000 |       0.00012 |       0.00142 |       0.90738
     -0.00606 |       0.00000 |       0.00011 |       0.00157 |       0.90693
     -0.00624 |       0.00000 |       0.00011 |       0.00156 |       0.90807
Evaluating losses...
     -0.00725 |       0.00000 |       0.00011 |       0.00151 |       0.90931
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | -0.07          |
| EpThisIter      | 1              |
| EpisodesSoFar   | 1622           |
| TimeElapsed     | 4.95e+03       |
| TimestepsSoFar  | 3133440        |
| ev_tdlam_before | -1.02          |
| loss_ent        | 0.9093069      |
| loss_kl         | 0.0015061374   |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.0072471336  |
| loss_vf_loss    | 0.000110301124 |
------------------------------------
********** Iteration 765 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00014 |    

********** Iteration 770 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00055 |       0.00000 |       0.00192 |       0.00295 |       0.86491
     -0.00268 |       0.00000 |       0.00167 |       0.00130 |       0.87032
     -0.00388 |       0.00000 |       0.00157 |       0.00108 |       0.87245
     -0.00412 |       0.00000 |       0.00156 |       0.00111 |       0.87399
     -0.00445 |       0.00000 |       0.00148 |       0.00178 |       0.87584
     -0.00488 |       0.00000 |       0.00148 |       0.00146 |       0.87471
     -0.00481 |       0.00000 |       0.00145 |       0.00157 |       0.87474
     -0.00524 |       0.00000 |       0.00144 |       0.00147 |       0.87584
     -0.00540 |       0.00000 |       0.00142 |       0.00158 |       0.87645
     -0.00550 |       0.00000 |       0.00143 |       0.00176 |       0.87602
Evaluating losses...
     -0.00551 |       0.00000 |       0.00135 |       0.00209 |      

     -0.00413 |       0.00000 |       0.00076 |       0.00171 |       0.88430
     -0.00410 |       0.00000 |       0.00072 |       0.00196 |       0.88333
     -0.00501 |       0.00000 |       0.00072 |       0.00190 |       0.88373
Evaluating losses...
     -0.00530 |       0.00000 |       0.00069 |       0.00170 |       0.88348
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.07         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1637          |
| TimeElapsed     | 5e+03         |
| TimestepsSoFar  | 3178496       |
| ev_tdlam_before | 0.58          |
| loss_ent        | 0.8834787     |
| loss_kl         | 0.0017037126  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.005298304  |
| loss_vf_loss    | 0.00068844034 |
-----------------------------------
********** Iteration 776 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -1.01e-05 |       0.00000 |  

********** Iteration 781 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00060 |       0.00000 |      3.30e-05 |       0.00046 |       0.89846
     -0.00179 |       0.00000 |      2.83e-05 |       0.00091 |       0.89663
     -0.00363 |       0.00000 |      2.66e-05 |       0.00091 |       0.89685
     -0.00424 |       0.00000 |      2.49e-05 |       0.00117 |       0.89566
     -0.00532 |       0.00000 |      2.34e-05 |       0.00136 |       0.89567
     -0.00622 |       0.00000 |      2.24e-05 |       0.00113 |       0.89684
     -0.00698 |       0.00000 |      2.14e-05 |       0.00154 |       0.89702
     -0.00693 |       0.00000 |      2.06e-05 |       0.00135 |       0.89732
     -0.00796 |       0.00000 |      2.00e-05 |       0.00139 |       0.89697
     -0.00809 |       0.00000 |      1.94e-05 |       0.00143 |       0.89803
Evaluating losses...
     -0.00965 |       0.00000 |      1.88e-05 |       0.00136 |      

     -0.00371 |       0.00000 |       0.00058 |       0.00101 |       0.88233
     -0.00401 |       0.00000 |       0.00057 |       0.00110 |       0.88236
     -0.00433 |       0.00000 |       0.00056 |       0.00111 |       0.88142
Evaluating losses...
     -0.00494 |       0.00000 |       0.00054 |       0.00111 |       0.88131
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.05         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1652          |
| TimeElapsed     | 5.05e+03      |
| TimestepsSoFar  | 3223552       |
| ev_tdlam_before | 0.682         |
| loss_ent        | 0.88130796    |
| loss_kl         | 0.0011073108  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0049392376 |
| loss_vf_loss    | 0.00054351834 |
-----------------------------------
********** Iteration 787 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00081 |       0.00000 |  

********** Iteration 792 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00012 |       0.00000 |       0.00272 |       0.00023 |       0.88720
     -0.00208 |       0.00000 |       0.00203 |       0.00050 |       0.88602
     -0.00193 |       0.00000 |       0.00198 |       0.00061 |       0.88442
     -0.00169 |       0.00000 |       0.00189 |       0.00071 |       0.88353
     -0.00318 |       0.00000 |       0.00182 |       0.00063 |       0.88442
     -0.00351 |       0.00000 |       0.00181 |       0.00058 |       0.88284
     -0.00410 |       0.00000 |       0.00177 |       0.00077 |       0.88222
     -0.00396 |       0.00000 |       0.00169 |       0.00098 |       0.88259
     -0.00437 |       0.00000 |       0.00169 |       0.00102 |       0.88082
     -0.00476 |       0.00000 |       0.00167 |       0.00101 |       0.88147
Evaluating losses...
     -0.00532 |       0.00000 |       0.00162 |       0.00103 |      

     -0.00345 |       0.00000 |       0.00071 |       0.00096 |       0.84681
     -0.00380 |       0.00000 |       0.00069 |       0.00085 |       0.84596
     -0.00407 |       0.00000 |       0.00067 |       0.00101 |       0.84551
     -0.00432 |       0.00000 |       0.00067 |       0.00103 |       0.84582
Evaluating losses...
     -0.00460 |       0.00000 |       0.00064 |       0.00119 |       0.84535
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | -0.1          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1667          |
| TimeElapsed     | 5.33e+03      |
| TimestepsSoFar  | 3268608       |
| ev_tdlam_before | 0.515         |
| loss_ent        | 0.8453512     |
| loss_kl         | 0.0011907794  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.004604695  |
| loss_vf_loss    | 0.00063884805 |
-----------------------------------
********** Iteration 798 ************
Optimizing...
     pol_surr |    pol_entpen |  

********** Iteration 803 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     9.94e-05 |       0.00000 |       0.00145 |       0.00028 |       0.86870
     -0.00114 |       0.00000 |       0.00076 |       0.00050 |       0.86757
     -0.00181 |       0.00000 |       0.00055 |       0.00057 |       0.86768
     -0.00273 |       0.00000 |       0.00048 |       0.00048 |       0.86789
     -0.00292 |       0.00000 |       0.00045 |       0.00054 |       0.86786
     -0.00317 |       0.00000 |       0.00041 |       0.00075 |       0.86722
     -0.00348 |       0.00000 |       0.00038 |       0.00070 |       0.86750
     -0.00352 |       0.00000 |       0.00038 |       0.00072 |       0.86807
     -0.00374 |       0.00000 |       0.00038 |       0.00079 |       0.86855
     -0.00376 |       0.00000 |       0.00035 |       0.00087 |       0.86898
Evaluating losses...
     -0.00357 |       0.00000 |       0.00033 |       0.00092 |      

     -0.00476 |       0.00000 |       0.00441 |       0.00127 |       0.85127
     -0.00531 |       0.00000 |       0.00435 |       0.00132 |       0.85115
     -0.00510 |       0.00000 |       0.00426 |       0.00136 |       0.85143
Evaluating losses...
     -0.00537 |       0.00000 |       0.00411 |       0.00134 |       0.85208
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.1          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1682          |
| TimeElapsed     | 5.38e+03      |
| TimestepsSoFar  | 3313664       |
| ev_tdlam_before | 0.539         |
| loss_ent        | 0.852085      |
| loss_kl         | 0.001339411   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0053657056 |
| loss_vf_loss    | 0.00410566    |
-----------------------------------
********** Iteration 809 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00117 |       0.00000 |  

********** Iteration 814 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00132 |       0.00000 |       0.00197 |       0.00018 |       0.84427
     -0.00034 |       0.00000 |       0.00180 |       0.00030 |       0.84364
     -0.00090 |       0.00000 |       0.00169 |       0.00052 |       0.84347
     -0.00090 |       0.00000 |       0.00166 |       0.00058 |       0.84283
     -0.00220 |       0.00000 |       0.00166 |       0.00068 |       0.84251
     -0.00212 |       0.00000 |       0.00160 |       0.00077 |       0.84418
     -0.00264 |       0.00000 |       0.00158 |       0.00071 |       0.84419
     -0.00323 |       0.00000 |       0.00159 |       0.00077 |       0.84375
     -0.00213 |       0.00000 |       0.00156 |       0.00082 |       0.84419
     -0.00336 |       0.00000 |       0.00155 |       0.00087 |       0.84436
Evaluating losses...
     -0.00376 |       0.00000 |       0.00150 |       0.00085 |      

     -0.00439 |       0.00000 |       0.00306 |       0.00098 |       0.82591
     -0.00453 |       0.00000 |       0.00298 |       0.00109 |       0.82647
     -0.00482 |       0.00000 |       0.00295 |       0.00118 |       0.82559
Evaluating losses...
     -0.00545 |       0.00000 |       0.00294 |       0.00097 |       0.82552
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.11         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1697          |
| TimeElapsed     | 5.44e+03      |
| TimestepsSoFar  | 3358720       |
| ev_tdlam_before | 0.574         |
| loss_ent        | 0.8255163     |
| loss_kl         | 0.0009650408  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0054507344 |
| loss_vf_loss    | 0.0029386838  |
-----------------------------------
********** Iteration 820 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00045 |       0.00000 |  

********** Iteration 825 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00038 |       0.00000 |       0.00010 |       0.00024 |       0.84021
     -0.00217 |       0.00000 |      7.37e-05 |       0.00051 |       0.84112
     -0.00322 |       0.00000 |      6.24e-05 |       0.00064 |       0.84087
     -0.00423 |       0.00000 |      5.63e-05 |       0.00077 |       0.84148
     -0.00483 |       0.00000 |      5.13e-05 |       0.00079 |       0.84189
     -0.00553 |       0.00000 |      4.81e-05 |       0.00085 |       0.84161
     -0.00557 |       0.00000 |      4.54e-05 |       0.00093 |       0.84114
     -0.00597 |       0.00000 |      4.37e-05 |       0.00098 |       0.84150
     -0.00641 |       0.00000 |      4.11e-05 |       0.00105 |       0.84111
     -0.00657 |       0.00000 |      3.97e-05 |       0.00115 |       0.84179
Evaluating losses...
     -0.00742 |       0.00000 |      3.80e-05 |       0.00132 |      

     -0.00300 |       0.00000 |       0.00105 |       0.00065 |       0.84822
     -0.00329 |       0.00000 |       0.00102 |       0.00082 |       0.84950
     -0.00360 |       0.00000 |       0.00101 |       0.00091 |       0.84980
Evaluating losses...
     -0.00386 |       0.00000 |       0.00095 |       0.00094 |       0.84971
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.06         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1712          |
| TimeElapsed     | 5.52e+03      |
| TimestepsSoFar  | 3403776       |
| ev_tdlam_before | 0.23          |
| loss_ent        | 0.84970546    |
| loss_kl         | 0.00093725295 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0038567614 |
| loss_vf_loss    | 0.00095426606 |
-----------------------------------
********** Iteration 831 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00047 |       0.00000 |  

********** Iteration 836 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00098 |       0.00000 |       0.00623 |       0.00056 |       0.83117
     -0.00102 |       0.00000 |       0.00467 |       0.00085 |       0.83207
     -0.00208 |       0.00000 |       0.00425 |       0.00126 |       0.83117
     -0.00292 |       0.00000 |       0.00401 |       0.00143 |       0.83104
     -0.00282 |       0.00000 |       0.00379 |       0.00171 |       0.83072
     -0.00350 |       0.00000 |       0.00362 |       0.00134 |       0.83105
     -0.00425 |       0.00000 |       0.00355 |       0.00118 |       0.83102
     -0.00469 |       0.00000 |       0.00341 |       0.00145 |       0.83214
     -0.00464 |       0.00000 |       0.00334 |       0.00158 |       0.83141
     -0.00480 |       0.00000 |       0.00328 |       0.00171 |       0.83144
Evaluating losses...
     -0.00566 |       0.00000 |       0.00316 |       0.00168 |      

     -0.00518 |       0.00000 |      7.02e-05 |       0.00082 |       0.83439
     -0.00552 |       0.00000 |      6.66e-05 |       0.00090 |       0.83408
     -0.00567 |       0.00000 |      6.46e-05 |       0.00104 |       0.83453
Evaluating losses...
     -0.00661 |       0.00000 |      6.13e-05 |       0.00097 |       0.83475
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.09         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1727          |
| TimeElapsed     | 5.58e+03      |
| TimestepsSoFar  | 3448832       |
| ev_tdlam_before | -1.67         |
| loss_ent        | 0.83474886    |
| loss_kl         | 0.00096779235 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0066096853 |
| loss_vf_loss    | 6.131526e-05  |
-----------------------------------
********** Iteration 842 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00060 |       0.00000 |  

********** Iteration 847 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00043 |       0.00000 |       0.00108 |       0.00016 |       0.80939
     -0.00074 |       0.00000 |       0.00096 |       0.00027 |       0.80946
     -0.00125 |       0.00000 |       0.00088 |       0.00031 |       0.80853
     -0.00158 |       0.00000 |       0.00090 |       0.00032 |       0.80857
     -0.00146 |       0.00000 |       0.00090 |       0.00030 |       0.80725
     -0.00194 |       0.00000 |       0.00085 |       0.00039 |       0.80838
     -0.00179 |       0.00000 |       0.00082 |       0.00043 |       0.80804
     -0.00192 |       0.00000 |       0.00084 |       0.00044 |       0.80767
     -0.00207 |       0.00000 |       0.00081 |       0.00051 |       0.80813
     -0.00223 |       0.00000 |       0.00083 |       0.00048 |       0.80774
Evaluating losses...
     -0.00254 |       0.00000 |       0.00078 |       0.00045 |      

     -0.00429 |       0.00000 |       0.00195 |       0.00128 |       0.82262
     -0.00463 |       0.00000 |       0.00188 |       0.00150 |       0.82356
     -0.00436 |       0.00000 |       0.00186 |       0.00120 |       0.82310
Evaluating losses...
     -0.00505 |       0.00000 |       0.00177 |       0.00132 |       0.82363
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.05         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1742          |
| TimeElapsed     | 5.65e+03      |
| TimestepsSoFar  | 3493888       |
| ev_tdlam_before | 0.463         |
| loss_ent        | 0.82362777    |
| loss_kl         | 0.0013209328  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0050468585 |
| loss_vf_loss    | 0.001774909   |
-----------------------------------
********** Iteration 853 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00048 |       0.00000 |  

********** Iteration 858 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -1.57e-05 |       0.00000 |       0.00043 |       0.00038 |       0.83772
     -0.00147 |       0.00000 |       0.00027 |       0.00063 |       0.83879
     -0.00213 |       0.00000 |       0.00022 |       0.00074 |       0.83794
     -0.00286 |       0.00000 |       0.00019 |       0.00059 |       0.83740
     -0.00330 |       0.00000 |       0.00017 |       0.00067 |       0.83615
     -0.00383 |       0.00000 |       0.00016 |       0.00068 |       0.83582
     -0.00412 |       0.00000 |       0.00015 |       0.00077 |       0.83574
     -0.00454 |       0.00000 |       0.00014 |       0.00080 |       0.83546
     -0.00471 |       0.00000 |       0.00014 |       0.00085 |       0.83506
     -0.00490 |       0.00000 |       0.00013 |       0.00094 |       0.83520
Evaluating losses...
     -0.00539 |       0.00000 |       0.00012 |       0.00092 |      

     -0.00446 |       0.00000 |       0.00172 |       0.00082 |       0.82001
     -0.00461 |       0.00000 |       0.00169 |       0.00081 |       0.82010
     -0.00486 |       0.00000 |       0.00163 |       0.00077 |       0.81978
Evaluating losses...
     -0.00534 |       0.00000 |       0.00165 |       0.00099 |       0.81943
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | -0.04         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1756          |
| TimeElapsed     | 5.98e+03      |
| TimestepsSoFar  | 3538944       |
| ev_tdlam_before | 0.434         |
| loss_ent        | 0.81943226    |
| loss_kl         | 0.0009946851  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0053425627 |
| loss_vf_loss    | 0.0016538962  |
-----------------------------------
********** Iteration 864 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00016 |       0.00000 |  

********** Iteration 869 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00084 |       0.00000 |      6.20e-05 |       0.00039 |       0.85016
     -0.00090 |       0.00000 |      5.29e-05 |       0.00032 |       0.84987
     -0.00230 |       0.00000 |      4.91e-05 |       0.00057 |       0.84912
     -0.00357 |       0.00000 |      4.64e-05 |       0.00050 |       0.84848
     -0.00391 |       0.00000 |      4.43e-05 |       0.00056 |       0.84865
     -0.00470 |       0.00000 |      4.24e-05 |       0.00057 |       0.84809
     -0.00490 |       0.00000 |      4.09e-05 |       0.00069 |       0.84830
     -0.00509 |       0.00000 |      3.97e-05 |       0.00071 |       0.84889
     -0.00567 |       0.00000 |      3.81e-05 |       0.00073 |       0.84922
     -0.00569 |       0.00000 |      3.69e-05 |       0.00077 |       0.84869
Evaluating losses...
     -0.00655 |       0.00000 |      3.55e-05 |       0.00087 |      

     -0.00416 |       0.00000 |       0.00228 |       0.00071 |       0.81241
     -0.00424 |       0.00000 |       0.00224 |       0.00071 |       0.81225
     -0.00436 |       0.00000 |       0.00218 |       0.00075 |       0.81188
Evaluating losses...
     -0.00487 |       0.00000 |       0.00211 |       0.00068 |       0.81209
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.04          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1772          |
| TimeElapsed     | 6.04e+03      |
| TimestepsSoFar  | 3584000       |
| ev_tdlam_before | 0.524         |
| loss_ent        | 0.8120873     |
| loss_kl         | 0.00068289036 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.004870106  |
| loss_vf_loss    | 0.0021067176  |
-----------------------------------
********** Iteration 875 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00045 |       0.00000 |  

********** Iteration 880 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00024 |       0.00000 |       0.00074 |       0.00027 |       0.79865
     -0.00110 |       0.00000 |       0.00052 |       0.00045 |       0.79788
     -0.00170 |       0.00000 |       0.00043 |       0.00053 |       0.79775
     -0.00236 |       0.00000 |       0.00036 |       0.00053 |       0.79750
     -0.00266 |       0.00000 |       0.00032 |       0.00071 |       0.79835
     -0.00281 |       0.00000 |       0.00027 |       0.00063 |       0.79739
     -0.00312 |       0.00000 |       0.00025 |       0.00070 |       0.79786
     -0.00336 |       0.00000 |       0.00022 |       0.00073 |       0.79745
     -0.00355 |       0.00000 |       0.00020 |       0.00078 |       0.79790
     -0.00380 |       0.00000 |       0.00019 |       0.00077 |       0.79727
Evaluating losses...
     -0.00417 |       0.00000 |       0.00017 |       0.00072 |      

     -0.00438 |       0.00000 |       0.00011 |       0.00054 |       0.76706
     -0.00443 |       0.00000 |       0.00010 |       0.00073 |       0.76755
     -0.00466 |       0.00000 |      9.81e-05 |       0.00076 |       0.76742
Evaluating losses...
     -0.00503 |       0.00000 |      9.31e-05 |       0.00082 |       0.76680
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.07          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1787          |
| TimeElapsed     | 6.09e+03      |
| TimestepsSoFar  | 3629056       |
| ev_tdlam_before | -1.3          |
| loss_ent        | 0.7668001     |
| loss_kl         | 0.0008243061  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0050325217 |
| loss_vf_loss    | 9.311518e-05  |
-----------------------------------
********** Iteration 886 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00092 |       0.00000 |  

********** Iteration 891 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -7.49e-05 |       0.00000 |       0.00132 |       0.00021 |       0.79548
     -0.00077 |       0.00000 |       0.00060 |       0.00033 |       0.79503
     -0.00108 |       0.00000 |       0.00045 |       0.00034 |       0.79461
     -0.00126 |       0.00000 |       0.00041 |       0.00040 |       0.79385
     -0.00125 |       0.00000 |       0.00038 |       0.00054 |       0.79409
     -0.00146 |       0.00000 |       0.00037 |       0.00045 |       0.79370
     -0.00168 |       0.00000 |       0.00037 |       0.00051 |       0.79338
     -0.00161 |       0.00000 |       0.00035 |       0.00069 |       0.79326
     -0.00171 |       0.00000 |       0.00034 |       0.00058 |       0.79319
     -0.00188 |       0.00000 |       0.00035 |       0.00065 |       0.79331
Evaluating losses...
     -0.00205 |       0.00000 |       0.00033 |       0.00064 |      

     -0.00469 |       0.00000 |       0.00013 |       0.00075 |       0.81325
     -0.00515 |       0.00000 |       0.00012 |       0.00068 |       0.81323
     -0.00523 |       0.00000 |       0.00011 |       0.00066 |       0.81389
Evaluating losses...
     -0.00579 |       0.00000 |       0.00011 |       0.00074 |       0.81284
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.07          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1802          |
| TimeElapsed     | 6.14e+03      |
| TimestepsSoFar  | 3674112       |
| ev_tdlam_before | -1.37         |
| loss_ent        | 0.81283975    |
| loss_kl         | 0.000736489   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0057867705 |
| loss_vf_loss    | 0.00010882143 |
-----------------------------------
********** Iteration 897 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00068 |       0.00000 |  

********** Iteration 902 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00022 |       0.00000 |       0.00566 |       0.00074 |       0.78255
     -0.00214 |       0.00000 |       0.00490 |       0.00044 |       0.78186
     -0.00247 |       0.00000 |       0.00449 |       0.00131 |       0.78035
     -0.00348 |       0.00000 |       0.00432 |       0.00067 |       0.78029
     -0.00379 |       0.00000 |       0.00417 |       0.00081 |       0.77914
     -0.00407 |       0.00000 |       0.00407 |       0.00079 |       0.77920
     -0.00460 |       0.00000 |       0.00403 |       0.00075 |       0.77897
     -0.00472 |       0.00000 |       0.00393 |       0.00088 |       0.77882
     -0.00494 |       0.00000 |       0.00388 |       0.00078 |       0.77887
     -0.00521 |       0.00000 |       0.00382 |       0.00089 |       0.77950
Evaluating losses...
     -0.00547 |       0.00000 |       0.00379 |       0.00080 |      

     -0.00278 |       0.00000 |       0.00060 |       0.00058 |       0.75610
     -0.00329 |       0.00000 |       0.00057 |       0.00059 |       0.75626
     -0.00331 |       0.00000 |       0.00054 |       0.00062 |       0.75641
Evaluating losses...
     -0.00375 |       0.00000 |       0.00052 |       0.00072 |       0.75638
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.1           |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1817          |
| TimeElapsed     | 6.19e+03      |
| TimestepsSoFar  | 3719168       |
| ev_tdlam_before | 0.346         |
| loss_ent        | 0.7563847     |
| loss_kl         | 0.0007167118  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0037472863 |
| loss_vf_loss    | 0.0005199696  |
-----------------------------------
********** Iteration 908 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     6.68e-06 |       0.00000 |  

********** Iteration 913 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00016 |       0.00000 |       0.00153 |       0.00020 |       0.83320
     -0.00114 |       0.00000 |       0.00133 |       0.00040 |       0.83351
     -0.00158 |       0.00000 |       0.00124 |       0.00047 |       0.83353
     -0.00186 |       0.00000 |       0.00118 |       0.00048 |       0.83316
     -0.00216 |       0.00000 |       0.00111 |       0.00037 |       0.83393
     -0.00233 |       0.00000 |       0.00105 |       0.00041 |       0.83350
     -0.00246 |       0.00000 |       0.00098 |       0.00041 |       0.83369
     -0.00209 |       0.00000 |       0.00091 |       0.00054 |       0.83305
     -0.00271 |       0.00000 |       0.00086 |       0.00049 |       0.83411
     -0.00286 |       0.00000 |       0.00079 |       0.00045 |       0.83403
Evaluating losses...
     -0.00322 |       0.00000 |       0.00075 |       0.00045 |      

     -0.00413 |       0.00000 |       0.00310 |       0.00075 |       0.75647
     -0.00429 |       0.00000 |       0.00302 |       0.00080 |       0.75648
     -0.00428 |       0.00000 |       0.00293 |       0.00083 |       0.75637
     -0.00441 |       0.00000 |       0.00285 |       0.00083 |       0.75630
Evaluating losses...
     -0.00487 |       0.00000 |       0.00275 |       0.00086 |       0.75604
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.12          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1831          |
| TimeElapsed     | 6.45e+03      |
| TimestepsSoFar  | 3764224       |
| ev_tdlam_before | 0.322         |
| loss_ent        | 0.7560377     |
| loss_kl         | 0.00085925695 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0048684897 |
| loss_vf_loss    | 0.002754372   |
-----------------------------------
********** Iteration 919 ************
Optimizing...
     pol_surr |    pol_entpen |  

********** Iteration 924 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00031 |       0.00000 |       0.00132 |       0.00020 |       0.78773
     -0.00118 |       0.00000 |       0.00089 |       0.00065 |       0.78665
     -0.00160 |       0.00000 |       0.00079 |       0.00060 |       0.78628
     -0.00231 |       0.00000 |       0.00073 |       0.00052 |       0.78677
     -0.00251 |       0.00000 |       0.00068 |       0.00053 |       0.78731
     -0.00283 |       0.00000 |       0.00065 |       0.00048 |       0.78728
     -0.00298 |       0.00000 |       0.00063 |       0.00053 |       0.78752
     -0.00323 |       0.00000 |       0.00060 |       0.00055 |       0.78718
     -0.00335 |       0.00000 |       0.00059 |       0.00064 |       0.78696
     -0.00359 |       0.00000 |       0.00057 |       0.00054 |       0.78767
Evaluating losses...
     -0.00404 |       0.00000 |       0.00056 |       0.00047 |      

     -0.00169 |       0.00000 |       0.00119 |       0.00047 |       0.74583
     -0.00191 |       0.00000 |       0.00116 |       0.00052 |       0.74534
     -0.00224 |       0.00000 |       0.00114 |       0.00053 |       0.74593
Evaluating losses...
     -0.00236 |       0.00000 |       0.00109 |       0.00053 |       0.74615
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.09          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1846          |
| TimeElapsed     | 6.51e+03      |
| TimestepsSoFar  | 3809280       |
| ev_tdlam_before | 0.115         |
| loss_ent        | 0.74615484    |
| loss_kl         | 0.00052760215 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0023643489 |
| loss_vf_loss    | 0.0010877406  |
-----------------------------------
********** Iteration 930 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00070 |       0.00000 |  

********** Iteration 935 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00016 |       0.00000 |       0.00025 |       0.00011 |       0.76411
     -0.00115 |       0.00000 |       0.00018 |       0.00020 |       0.76331
     -0.00179 |       0.00000 |       0.00015 |       0.00024 |       0.76301
     -0.00242 |       0.00000 |       0.00013 |       0.00027 |       0.76334
     -0.00300 |       0.00000 |       0.00011 |       0.00029 |       0.76301
     -0.00352 |       0.00000 |      9.97e-05 |       0.00032 |       0.76314
     -0.00363 |       0.00000 |      9.12e-05 |       0.00038 |       0.76265
     -0.00384 |       0.00000 |      8.32e-05 |       0.00040 |       0.76321
     -0.00399 |       0.00000 |      7.75e-05 |       0.00048 |       0.76278
     -0.00422 |       0.00000 |      7.28e-05 |       0.00046 |       0.76290
Evaluating losses...
     -0.00466 |       0.00000 |      6.96e-05 |       0.00049 |      

     -0.00353 |       0.00000 |      9.57e-05 |       0.00041 |       0.77256
     -0.00391 |       0.00000 |      9.03e-05 |       0.00045 |       0.77237
     -0.00425 |       0.00000 |      8.49e-05 |       0.00044 |       0.77200
Evaluating losses...
     -0.00465 |       0.00000 |      8.17e-05 |       0.00042 |       0.77237
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.11          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1861          |
| TimeElapsed     | 6.55e+03      |
| TimestepsSoFar  | 3854336       |
| ev_tdlam_before | -0.846        |
| loss_ent        | 0.7723666     |
| loss_kl         | 0.00041617567 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.004649059  |
| loss_vf_loss    | 8.172286e-05  |
-----------------------------------
********** Iteration 941 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00068 |       0.00000 |  

********** Iteration 946 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00020 |       0.00000 |       0.00221 |       0.00011 |       0.76058
     -0.00152 |       0.00000 |       0.00195 |       0.00028 |       0.76082
     -0.00203 |       0.00000 |       0.00174 |       0.00026 |       0.76104
     -0.00259 |       0.00000 |       0.00160 |       0.00028 |       0.76055
     -0.00278 |       0.00000 |       0.00149 |       0.00034 |       0.76118
     -0.00288 |       0.00000 |       0.00140 |       0.00041 |       0.76078
     -0.00317 |       0.00000 |       0.00133 |       0.00043 |       0.76119
     -0.00332 |       0.00000 |       0.00124 |       0.00038 |       0.76084
     -0.00321 |       0.00000 |       0.00118 |       0.00041 |       0.76028
     -0.00341 |       0.00000 |       0.00111 |       0.00038 |       0.76033
Evaluating losses...
     -0.00347 |       0.00000 |       0.00106 |       0.00047 |      

     -0.00416 |       0.00000 |       0.00013 |       0.00057 |       0.77898
     -0.00456 |       0.00000 |       0.00012 |       0.00054 |       0.77883
     -0.00501 |       0.00000 |       0.00012 |       0.00053 |       0.77862
Evaluating losses...
     -0.00565 |       0.00000 |       0.00011 |       0.00052 |       0.77814
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | 0.13           |
| EpThisIter      | 1              |
| EpisodesSoFar   | 1876           |
| TimeElapsed     | 6.6e+03        |
| TimestepsSoFar  | 3899392        |
| ev_tdlam_before | 0.115          |
| loss_ent        | 0.77813786     |
| loss_kl         | 0.00051812397  |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.005649334   |
| loss_vf_loss    | 0.000114783805 |
------------------------------------
********** Iteration 952 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00013 |    

********** Iteration 957 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00010 |       0.00000 |       0.00406 |       0.00034 |       0.76049
     -0.00105 |       0.00000 |       0.00291 |       0.00022 |       0.76058
     -0.00166 |       0.00000 |       0.00253 |       0.00033 |       0.76049
     -0.00188 |       0.00000 |       0.00235 |       0.00039 |       0.76020
     -0.00201 |       0.00000 |       0.00223 |       0.00045 |       0.76014
     -0.00225 |       0.00000 |       0.00215 |       0.00040 |       0.76014
     -0.00260 |       0.00000 |       0.00205 |       0.00041 |       0.75993
     -0.00256 |       0.00000 |       0.00203 |       0.00057 |       0.75955
     -0.00249 |       0.00000 |       0.00196 |       0.00055 |       0.75974
     -0.00263 |       0.00000 |       0.00190 |       0.00047 |       0.76013
Evaluating losses...
     -0.00305 |       0.00000 |       0.00185 |       0.00046 |      

     -0.00240 |       0.00000 |       0.00200 |       0.00029 |       0.74022
     -0.00250 |       0.00000 |       0.00196 |       0.00034 |       0.74010
     -0.00269 |       0.00000 |       0.00192 |       0.00034 |       0.73991
Evaluating losses...
     -0.00298 |       0.00000 |       0.00186 |       0.00035 |       0.74029
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.11          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1891          |
| TimeElapsed     | 6.65e+03      |
| TimestepsSoFar  | 3944448       |
| ev_tdlam_before | 0.461         |
| loss_ent        | 0.7402946     |
| loss_kl         | 0.00035247614 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0029806714 |
| loss_vf_loss    | 0.0018561798  |
-----------------------------------
********** Iteration 963 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -2.47e-05 |       0.00000 |  

********** Iteration 968 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00055 |       0.00000 |       0.00272 |       0.00022 |       0.74429
     -0.00062 |       0.00000 |       0.00223 |       0.00027 |       0.74398
     -0.00116 |       0.00000 |       0.00199 |       0.00021 |       0.74367
     -0.00138 |       0.00000 |       0.00185 |       0.00026 |       0.74305
     -0.00160 |       0.00000 |       0.00174 |       0.00032 |       0.74235
     -0.00163 |       0.00000 |       0.00167 |       0.00038 |       0.74252
     -0.00180 |       0.00000 |       0.00159 |       0.00031 |       0.74251
     -0.00196 |       0.00000 |       0.00152 |       0.00035 |       0.74223
     -0.00207 |       0.00000 |       0.00147 |       0.00031 |       0.74213
     -0.00211 |       0.00000 |       0.00142 |       0.00037 |       0.74184
Evaluating losses...
     -0.00230 |       0.00000 |       0.00137 |       0.00042 |      

     -0.00223 |       0.00000 |       0.00182 |       0.00082 |       0.73451
     -0.00226 |       0.00000 |       0.00183 |       0.00070 |       0.73483
     -0.00245 |       0.00000 |       0.00183 |       0.00046 |       0.73520
Evaluating losses...
     -0.00279 |       0.00000 |       0.00180 |       0.00054 |       0.73516
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.19          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1906          |
| TimeElapsed     | 6.7e+03       |
| TimestepsSoFar  | 3989504       |
| ev_tdlam_before | 0.491         |
| loss_ent        | 0.7351635     |
| loss_kl         | 0.0005441246  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0027919128 |
| loss_vf_loss    | 0.001797122   |
-----------------------------------
********** Iteration 974 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00017 |       0.00000 |  

********** Iteration 979 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -7.79e-06 |       0.00000 |       0.00013 |       0.00011 |       0.73591
     -0.00161 |       0.00000 |       0.00011 |       0.00021 |       0.73526
     -0.00230 |       0.00000 |      9.89e-05 |       0.00023 |       0.73574
     -0.00284 |       0.00000 |      9.18e-05 |       0.00025 |       0.73584
     -0.00314 |       0.00000 |      8.61e-05 |       0.00030 |       0.73675
     -0.00358 |       0.00000 |      8.16e-05 |       0.00036 |       0.73626
     -0.00393 |       0.00000 |      7.78e-05 |       0.00032 |       0.73658
     -0.00420 |       0.00000 |      7.46e-05 |       0.00037 |       0.73709
     -0.00432 |       0.00000 |      7.19e-05 |       0.00039 |       0.73688
     -0.00452 |       0.00000 |      6.98e-05 |       0.00042 |       0.73736
Evaluating losses...
     -0.00511 |       0.00000 |      6.72e-05 |       0.00043 |      

     -0.00363 |       0.00000 |      4.40e-05 |       0.00030 |       0.72729
     -0.00393 |       0.00000 |      4.21e-05 |       0.00029 |       0.72701
     -0.00406 |       0.00000 |      4.08e-05 |       0.00032 |       0.72667
Evaluating losses...
     -0.00441 |       0.00000 |      3.97e-05 |       0.00033 |       0.72725
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.25          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1921          |
| TimeElapsed     | 6.96e+03      |
| TimestepsSoFar  | 4034560       |
| ev_tdlam_before | -1.77         |
| loss_ent        | 0.7272481     |
| loss_kl         | 0.00032627748 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0044083456 |
| loss_vf_loss    | 3.969906e-05  |
-----------------------------------
********** Iteration 985 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00025 |       0.00000 |  

********** Iteration 990 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.70e-05 |       0.00000 |       0.00029 |       0.00013 |       0.76017
     -0.00124 |       0.00000 |       0.00020 |       0.00025 |       0.76033
     -0.00206 |       0.00000 |       0.00016 |       0.00029 |       0.76055
     -0.00225 |       0.00000 |       0.00014 |       0.00025 |       0.76045
     -0.00276 |       0.00000 |       0.00013 |       0.00028 |       0.76054
     -0.00292 |       0.00000 |       0.00012 |       0.00028 |       0.75999
     -0.00332 |       0.00000 |       0.00011 |       0.00033 |       0.75985
     -0.00344 |       0.00000 |       0.00010 |       0.00036 |       0.75975
     -0.00358 |       0.00000 |      9.74e-05 |       0.00038 |       0.75990
     -0.00374 |       0.00000 |      9.30e-05 |       0.00041 |       0.75991
Evaluating losses...
     -0.00412 |       0.00000 |      9.01e-05 |       0.00042 |      

     -0.00185 |       0.00000 |       0.00123 |       0.00031 |       0.76779
     -0.00197 |       0.00000 |       0.00120 |       0.00035 |       0.76787
     -0.00179 |       0.00000 |       0.00119 |       0.00038 |       0.76820
Evaluating losses...
     -0.00233 |       0.00000 |       0.00115 |       0.00032 |       0.76858
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.24          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1936          |
| TimeElapsed     | 7.01e+03      |
| TimestepsSoFar  | 4079616       |
| ev_tdlam_before | 0.618         |
| loss_ent        | 0.7685819     |
| loss_kl         | 0.00031765428 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0023270156 |
| loss_vf_loss    | 0.0011512205  |
-----------------------------------
********** Iteration 996 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -8.47e-05 |       0.00000 |  

********** Iteration 1001 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     8.91e-05 |       0.00000 |       0.00271 |       0.00017 |       0.73927
     -0.00076 |       0.00000 |       0.00183 |       0.00020 |       0.73978
     -0.00114 |       0.00000 |       0.00163 |       0.00018 |       0.73969
     -0.00137 |       0.00000 |       0.00153 |       0.00025 |       0.74009
     -0.00161 |       0.00000 |       0.00143 |       0.00023 |       0.73987
     -0.00170 |       0.00000 |       0.00138 |       0.00024 |       0.73997
     -0.00180 |       0.00000 |       0.00135 |       0.00025 |       0.73988
     -0.00189 |       0.00000 |       0.00131 |       0.00029 |       0.74019
     -0.00196 |       0.00000 |       0.00129 |       0.00028 |       0.74020
     -0.00207 |       0.00000 |       0.00126 |       0.00032 |       0.74027
Evaluating losses...
     -0.00236 |       0.00000 |       0.00123 |       0.00030 |     

     -0.00242 |       0.00000 |      2.56e-05 |       0.00025 |       0.80021
     -0.00246 |       0.00000 |      2.46e-05 |       0.00027 |       0.80051
     -0.00280 |       0.00000 |      2.40e-05 |       0.00028 |       0.80059
Evaluating losses...
     -0.00322 |       0.00000 |      2.31e-05 |       0.00029 |       0.80048
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.27          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1951          |
| TimeElapsed     | 7.06e+03      |
| TimestepsSoFar  | 4124672       |
| ev_tdlam_before | -1.62         |
| loss_ent        | 0.8004822     |
| loss_kl         | 0.00028575776 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0032152021 |
| loss_vf_loss    | 2.3095545e-05 |
-----------------------------------
********** Iteration 1007 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00112 |       0.00000 | 

********** Iteration 1012 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00078 |       0.00000 |       0.00033 |      9.73e-05 |       0.76843
     -0.00133 |       0.00000 |       0.00024 |       0.00019 |       0.76889
     -0.00172 |       0.00000 |       0.00020 |       0.00016 |       0.76943
     -0.00159 |       0.00000 |       0.00017 |       0.00024 |       0.77083
     -0.00219 |       0.00000 |       0.00015 |       0.00017 |       0.77053
     -0.00232 |       0.00000 |       0.00014 |       0.00018 |       0.77036
     -0.00243 |       0.00000 |       0.00013 |       0.00021 |       0.77017
     -0.00266 |       0.00000 |       0.00012 |       0.00020 |       0.77039
     -0.00268 |       0.00000 |       0.00011 |       0.00026 |       0.77056
     -0.00281 |       0.00000 |       0.00010 |       0.00024 |       0.77104
Evaluating losses...
     -0.00314 |       0.00000 |      9.83e-05 |       0.00024 |     

     -0.00235 |       0.00000 |       0.00102 |       0.00025 |       0.74799
     -0.00228 |       0.00000 |       0.00096 |       0.00026 |       0.74797
     -0.00268 |       0.00000 |       0.00092 |       0.00027 |       0.74830
Evaluating losses...
     -0.00252 |       0.00000 |       0.00088 |       0.00029 |       0.74853
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.24          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1966          |
| TimeElapsed     | 7.1e+03       |
| TimestepsSoFar  | 4169728       |
| ev_tdlam_before | 0.373         |
| loss_ent        | 0.7485291     |
| loss_kl         | 0.0002942472  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0025213887 |
| loss_vf_loss    | 0.00088317815 |
-----------------------------------
********** Iteration 1018 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00014 |       0.00000 | 

********** Iteration 1023 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -8.82e-05 |       0.00000 |       0.00171 |       0.00016 |       0.78034
     -0.00125 |       0.00000 |       0.00159 |       0.00018 |       0.78088
     -0.00151 |       0.00000 |       0.00152 |       0.00016 |       0.78020
     -0.00169 |       0.00000 |       0.00149 |       0.00017 |       0.78004
     -0.00185 |       0.00000 |       0.00145 |       0.00018 |       0.78029
     -0.00188 |       0.00000 |       0.00142 |       0.00020 |       0.77975
     -0.00201 |       0.00000 |       0.00141 |       0.00020 |       0.78012
     -0.00208 |       0.00000 |       0.00140 |       0.00023 |       0.78003
     -0.00215 |       0.00000 |       0.00138 |       0.00023 |       0.78001
     -0.00225 |       0.00000 |       0.00136 |       0.00023 |       0.78006
Evaluating losses...
     -0.00234 |       0.00000 |       0.00135 |       0.00023 |     

     -0.00209 |       0.00000 |       0.00179 |       0.00019 |       0.76889
     -0.00225 |       0.00000 |       0.00175 |       0.00021 |       0.76839
     -0.00218 |       0.00000 |       0.00171 |       0.00021 |       0.76870
Evaluating losses...
     -0.00258 |       0.00000 |       0.00166 |       0.00019 |       0.76855
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.21          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1981          |
| TimeElapsed     | 7.15e+03      |
| TimestepsSoFar  | 4214784       |
| ev_tdlam_before | 0.33          |
| loss_ent        | 0.7685507     |
| loss_kl         | 0.00019130537 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0025772518 |
| loss_vf_loss    | 0.0016554457  |
-----------------------------------
********** Iteration 1029 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00010 |       0.00000 | 

********** Iteration 1034 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00018 |       0.00000 |       0.00031 |       0.00017 |       0.74106
     -0.00055 |       0.00000 |       0.00023 |       0.00014 |       0.74023
     -0.00112 |       0.00000 |       0.00020 |       0.00016 |       0.74008
     -0.00137 |       0.00000 |       0.00017 |       0.00012 |       0.74070
     -0.00163 |       0.00000 |       0.00015 |       0.00015 |       0.74044
     -0.00183 |       0.00000 |       0.00014 |       0.00018 |       0.74035
     -0.00196 |       0.00000 |       0.00013 |       0.00020 |       0.74033
     -0.00200 |       0.00000 |       0.00013 |       0.00022 |       0.74007
     -0.00226 |       0.00000 |       0.00012 |       0.00022 |       0.74044
     -0.00245 |       0.00000 |       0.00011 |       0.00021 |       0.74060
Evaluating losses...
     -0.00262 |       0.00000 |       0.00011 |       0.00023 |     

     -0.00247 |       0.00000 |       0.00014 |       0.00020 |       0.76064
     -0.00258 |       0.00000 |       0.00013 |       0.00017 |       0.76090
     -0.00275 |       0.00000 |       0.00013 |       0.00020 |       0.76063
     -0.00293 |       0.00000 |       0.00012 |       0.00023 |       0.76088
     -0.00307 |       0.00000 |       0.00012 |       0.00024 |       0.76071
Evaluating losses...
     -0.00333 |       0.00000 |       0.00011 |       0.00026 |       0.76049
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.29          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1996          |
| TimeElapsed     | 7.41e+03      |
| TimestepsSoFar  | 4259840       |
| ev_tdlam_before | -0.794        |
| loss_ent        | 0.7604949     |
| loss_kl         | 0.00026428804 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0033304638 |
| loss_vf_loss    | 0.00011381646 |
-----------------------------------
*******

********** Iteration 1045 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00011 |       0.00000 |       0.00025 |      7.21e-05 |       0.75872
     -0.00092 |       0.00000 |       0.00019 |       0.00011 |       0.75857
     -0.00126 |       0.00000 |       0.00016 |       0.00015 |       0.75815
     -0.00153 |       0.00000 |       0.00014 |       0.00014 |       0.75847
     -0.00191 |       0.00000 |       0.00012 |       0.00015 |       0.75810
     -0.00195 |       0.00000 |       0.00011 |       0.00016 |       0.75776
     -0.00189 |       0.00000 |       0.00011 |       0.00023 |       0.75838
     -0.00224 |       0.00000 |       0.00010 |       0.00021 |       0.75754
     -0.00237 |       0.00000 |      9.84e-05 |       0.00024 |       0.75783
     -0.00252 |       0.00000 |      9.47e-05 |       0.00022 |       0.75734
Evaluating losses...
     -0.00279 |       0.00000 |      9.17e-05 |       0.00023 |     

     -0.00133 |       0.00000 |       0.00072 |       0.00016 |       0.74521
     -0.00140 |       0.00000 |       0.00070 |       0.00013 |       0.74525
     -0.00143 |       0.00000 |       0.00069 |       0.00017 |       0.74485
Evaluating losses...
     -0.00145 |       0.00000 |       0.00067 |       0.00018 |       0.74462
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.16          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2011          |
| TimeElapsed     | 7.47e+03      |
| TimestepsSoFar  | 4304896       |
| ev_tdlam_before | 0.478         |
| loss_ent        | 0.74461555    |
| loss_kl         | 0.00017778028 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0014488818 |
| loss_vf_loss    | 0.00067150395 |
-----------------------------------
********** Iteration 1051 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00049 |       0.00000 | 

********** Iteration 1056 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     4.04e-05 |       0.00000 |       0.00086 |      9.43e-05 |       0.78830
     -0.00046 |       0.00000 |       0.00049 |       0.00015 |       0.78765
     -0.00068 |       0.00000 |       0.00040 |       0.00014 |       0.78829
     -0.00108 |       0.00000 |       0.00035 |       0.00014 |       0.78834
     -0.00117 |       0.00000 |       0.00033 |       0.00015 |       0.78831
     -0.00136 |       0.00000 |       0.00031 |       0.00015 |       0.78854
     -0.00146 |       0.00000 |       0.00030 |       0.00015 |       0.78876
     -0.00159 |       0.00000 |       0.00029 |       0.00018 |       0.78900
     -0.00147 |       0.00000 |       0.00028 |       0.00015 |       0.78915
     -0.00175 |       0.00000 |       0.00027 |       0.00019 |       0.78871
Evaluating losses...
     -0.00192 |       0.00000 |       0.00026 |       0.00023 |     

     -0.00202 |       0.00000 |       0.00015 |       0.00016 |       0.75311
     -0.00198 |       0.00000 |       0.00014 |       0.00017 |       0.75320
     -0.00217 |       0.00000 |       0.00014 |       0.00018 |       0.75290
Evaluating losses...
     -0.00259 |       0.00000 |       0.00013 |       0.00017 |       0.75296
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.17          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2026          |
| TimeElapsed     | 7.51e+03      |
| TimestepsSoFar  | 4349952       |
| ev_tdlam_before | -0.915        |
| loss_ent        | 0.7529566     |
| loss_kl         | 0.00016760516 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0025948826 |
| loss_vf_loss    | 0.00013266758 |
-----------------------------------
********** Iteration 1062 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     3.28e-05 |       0.00000 | 

********** Iteration 1067 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00015 |       0.00000 |       0.00203 |      5.76e-05 |       0.74083
     -0.00060 |       0.00000 |       0.00141 |      7.70e-05 |       0.74076
     -0.00089 |       0.00000 |       0.00104 |      7.13e-05 |       0.74081
     -0.00125 |       0.00000 |       0.00081 |      8.81e-05 |       0.74091
     -0.00127 |       0.00000 |       0.00069 |      8.69e-05 |       0.74131
     -0.00144 |       0.00000 |       0.00061 |       0.00010 |       0.74141
     -0.00158 |       0.00000 |       0.00055 |      9.90e-05 |       0.74157
     -0.00164 |       0.00000 |       0.00052 |       0.00010 |       0.74173
     -0.00173 |       0.00000 |       0.00049 |       0.00011 |       0.74158
     -0.00181 |       0.00000 |       0.00047 |       0.00013 |       0.74205
Evaluating losses...
     -0.00198 |       0.00000 |       0.00046 |       0.00012 |     

     -0.00098 |       0.00000 |       0.00064 |       0.00012 |       0.76248
     -0.00109 |       0.00000 |       0.00063 |       0.00011 |       0.76245
     -0.00104 |       0.00000 |       0.00062 |       0.00013 |       0.76269
Evaluating losses...
     -0.00136 |       0.00000 |       0.00060 |       0.00012 |       0.76259
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.19          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2041          |
| TimeElapsed     | 7.56e+03      |
| TimestepsSoFar  | 4395008       |
| ev_tdlam_before | 0.727         |
| loss_ent        | 0.76258963    |
| loss_kl         | 0.00012472183 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0013567319 |
| loss_vf_loss    | 0.0005996862  |
-----------------------------------
********** Iteration 1073 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00019 |       0.00000 | 

********** Iteration 1078 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00015 |       0.00000 |       0.00043 |      2.63e-05 |       0.77770
     -0.00042 |       0.00000 |       0.00028 |      5.85e-05 |       0.77725
     -0.00045 |       0.00000 |       0.00021 |       0.00010 |       0.77682
     -0.00088 |       0.00000 |       0.00017 |      7.08e-05 |       0.77699
     -0.00090 |       0.00000 |       0.00015 |      9.07e-05 |       0.77701
     -0.00097 |       0.00000 |       0.00014 |      9.00e-05 |       0.77696
     -0.00113 |       0.00000 |       0.00013 |      9.95e-05 |       0.77708
     -0.00131 |       0.00000 |       0.00012 |       0.00012 |       0.77693
     -0.00129 |       0.00000 |       0.00011 |       0.00010 |       0.77679
     -0.00137 |       0.00000 |       0.00011 |       0.00011 |       0.77686
Evaluating losses...
     -0.00155 |       0.00000 |       0.00010 |       0.00012 |     

     -0.00145 |       0.00000 |       0.00019 |       0.00011 |       0.71556
     -0.00146 |       0.00000 |       0.00018 |       0.00011 |       0.71525
     -0.00160 |       0.00000 |       0.00017 |       0.00012 |       0.71533
Evaluating losses...
     -0.00181 |       0.00000 |       0.00016 |       0.00012 |       0.71544
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.17          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2056          |
| TimeElapsed     | 7.6e+03       |
| TimestepsSoFar  | 4440064       |
| ev_tdlam_before | -1.86         |
| loss_ent        | 0.7154386     |
| loss_kl         | 0.00012332143 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0018057765 |
| loss_vf_loss    | 0.00015878907 |
-----------------------------------
********** Iteration 1084 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00018 |       0.00000 | 

********** Iteration 1089 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00011 |       0.00000 |      8.24e-05 |      5.57e-05 |       0.75273
     -0.00059 |       0.00000 |      7.11e-05 |      5.71e-05 |       0.75299
     -0.00101 |       0.00000 |      6.49e-05 |      7.58e-05 |       0.75353
     -0.00126 |       0.00000 |      6.04e-05 |      7.03e-05 |       0.75365
     -0.00150 |       0.00000 |      5.76e-05 |      7.47e-05 |       0.75393
     -0.00160 |       0.00000 |      5.47e-05 |      8.08e-05 |       0.75422
     -0.00175 |       0.00000 |      5.21e-05 |       0.00010 |       0.75428
     -0.00187 |       0.00000 |      5.01e-05 |       0.00011 |       0.75434
     -0.00201 |       0.00000 |      4.78e-05 |       0.00011 |       0.75434
     -0.00197 |       0.00000 |      4.62e-05 |       0.00011 |       0.75441
Evaluating losses...
     -0.00226 |       0.00000 |      4.49e-05 |       0.00014 |     

     -0.00178 |       0.00000 |       0.00227 |       0.00013 |       0.75497
     -0.00182 |       0.00000 |       0.00220 |       0.00013 |       0.75500
     -0.00188 |       0.00000 |       0.00212 |       0.00013 |       0.75502
Evaluating losses...
     -0.00206 |       0.00000 |       0.00207 |       0.00014 |       0.75518
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.23          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 2071          |
| TimeElapsed     | 7.65e+03      |
| TimestepsSoFar  | 4485120       |
| ev_tdlam_before | 0.682         |
| loss_ent        | 0.75517994    |
| loss_kl         | 0.00013719164 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0020565349 |
| loss_vf_loss    | 0.002069453   |
-----------------------------------
********** Iteration 1095 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     7.17e-05 |       0.00000 | 

********** Iteration 1100 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     9.41e-05 |       0.00000 |       0.00155 |      1.71e-05 |       0.72363
     -0.00045 |       0.00000 |       0.00135 |      4.89e-05 |       0.72353
     -0.00064 |       0.00000 |       0.00125 |      4.21e-05 |       0.72354
     -0.00065 |       0.00000 |       0.00119 |      5.28e-05 |       0.72320
     -0.00087 |       0.00000 |       0.00114 |      5.50e-05 |       0.72355
     -0.00097 |       0.00000 |       0.00111 |      7.06e-05 |       0.72332
     -0.00112 |       0.00000 |       0.00108 |      6.33e-05 |       0.72373
     -0.00110 |       0.00000 |       0.00106 |      7.18e-05 |       0.72385
     -0.00123 |       0.00000 |       0.00104 |      7.81e-05 |       0.72406
     -0.00129 |       0.00000 |       0.00102 |      8.36e-05 |       0.72413
Evaluating losses...
     -0.00141 |       0.00000 |       0.00101 |      8.86e-05 |     

     -0.00136 |       0.00000 |       0.00015 |      7.88e-05 |       0.74398
     -0.00140 |       0.00000 |       0.00015 |      6.81e-05 |       0.74390
     -0.00152 |       0.00000 |       0.00014 |      7.68e-05 |       0.74376
Evaluating losses...
     -0.00168 |       0.00000 |       0.00014 |      8.02e-05 |       0.74402
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.26          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2086          |
| TimeElapsed     | 7.9e+03       |
| TimestepsSoFar  | 4530176       |
| ev_tdlam_before | -0.501        |
| loss_ent        | 0.7440233     |
| loss_kl         | 8.019444e-05  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0016753578 |
| loss_vf_loss    | 0.0001381844  |
-----------------------------------
********** Iteration 1106 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00012 |       0.00000 | 

********** Iteration 1111 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -1.29e-05 |       0.00000 |       0.00029 |      3.10e-05 |       0.74097
     -0.00053 |       0.00000 |       0.00022 |      5.13e-05 |       0.74101
     -0.00080 |       0.00000 |       0.00018 |      5.30e-05 |       0.74080
     -0.00091 |       0.00000 |       0.00016 |      5.71e-05 |       0.74070
     -0.00104 |       0.00000 |       0.00015 |      6.29e-05 |       0.74071
     -0.00111 |       0.00000 |       0.00014 |      6.45e-05 |       0.74071
     -0.00120 |       0.00000 |       0.00013 |      6.61e-05 |       0.74080
     -0.00129 |       0.00000 |       0.00012 |      7.90e-05 |       0.74055
     -0.00135 |       0.00000 |       0.00011 |      7.83e-05 |       0.74076
     -0.00142 |       0.00000 |       0.00011 |      8.18e-05 |       0.74070
Evaluating losses...
     -0.00160 |       0.00000 |       0.00010 |      8.14e-05 |     

     -0.00151 |       0.00000 |      9.11e-05 |      7.89e-05 |       0.75143
     -0.00153 |       0.00000 |      8.69e-05 |      8.46e-05 |       0.75151
     -0.00161 |       0.00000 |      8.34e-05 |      6.74e-05 |       0.75144
Evaluating losses...
     -0.00183 |       0.00000 |      8.09e-05 |      8.09e-05 |       0.75114
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.14          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2101          |
| TimeElapsed     | 7.96e+03      |
| TimestepsSoFar  | 4575232       |
| ev_tdlam_before | -1.13         |
| loss_ent        | 0.75114363    |
| loss_kl         | 8.093606e-05  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0018289501 |
| loss_vf_loss    | 8.087273e-05  |
-----------------------------------
********** Iteration 1117 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     1.83e-05 |       0.00000 | 

********** Iteration 1122 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00012 |       0.00000 |      8.70e-05 |      1.87e-05 |       0.75878
     -0.00035 |       0.00000 |      7.04e-05 |      3.07e-05 |       0.75884
     -0.00052 |       0.00000 |      6.11e-05 |      3.99e-05 |       0.75887
     -0.00068 |       0.00000 |      5.47e-05 |      4.12e-05 |       0.75912
     -0.00074 |       0.00000 |      4.98e-05 |      5.62e-05 |       0.75912
     -0.00082 |       0.00000 |      4.63e-05 |      5.40e-05 |       0.75933
     -0.00107 |       0.00000 |      4.34e-05 |      4.86e-05 |       0.75939
     -0.00101 |       0.00000 |      4.12e-05 |      6.26e-05 |       0.75946
     -0.00118 |       0.00000 |      3.92e-05 |      5.99e-05 |       0.75974
     -0.00125 |       0.00000 |      3.78e-05 |      6.05e-05 |       0.75984
Evaluating losses...
     -0.00148 |       0.00000 |      3.67e-05 |      5.42e-05 |     

     -0.00103 |       0.00000 |       0.00067 |      4.47e-05 |       0.74381
     -0.00104 |       0.00000 |       0.00067 |      4.09e-05 |       0.74362
     -0.00106 |       0.00000 |       0.00065 |      4.71e-05 |       0.74352
Evaluating losses...
     -0.00115 |       0.00000 |       0.00064 |      4.67e-05 |       0.74376
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.22          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2116          |
| TimeElapsed     | 8e+03         |
| TimestepsSoFar  | 4620288       |
| ev_tdlam_before | 0.719         |
| loss_ent        | 0.74376       |
| loss_kl         | 4.6684825e-05 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0011519745 |
| loss_vf_loss    | 0.0006428097  |
-----------------------------------
********** Iteration 1128 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00021 |       0.00000 | 

********** Iteration 1133 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00013 |       0.00000 |       0.00133 |      1.60e-05 |       0.72368
     -0.00026 |       0.00000 |       0.00091 |      1.61e-05 |       0.72386
     -0.00033 |       0.00000 |       0.00074 |      2.56e-05 |       0.72391
     -0.00049 |       0.00000 |       0.00064 |      2.37e-05 |       0.72369
     -0.00054 |       0.00000 |       0.00056 |      2.15e-05 |       0.72346
     -0.00059 |       0.00000 |       0.00052 |      2.60e-05 |       0.72342
     -0.00062 |       0.00000 |       0.00049 |      2.72e-05 |       0.72330
     -0.00066 |       0.00000 |       0.00046 |      3.21e-05 |       0.72307
     -0.00071 |       0.00000 |       0.00044 |      3.47e-05 |       0.72306
     -0.00070 |       0.00000 |       0.00042 |      3.46e-05 |       0.72297
Evaluating losses...
     -0.00081 |       0.00000 |       0.00041 |      3.14e-05 |     

     -0.00120 |       0.00000 |      6.82e-05 |      4.70e-05 |       0.74731
     -0.00125 |       0.00000 |      6.60e-05 |      5.41e-05 |       0.74734
     -0.00136 |       0.00000 |      6.39e-05 |      5.27e-05 |       0.74734
Evaluating losses...
     -0.00152 |       0.00000 |      6.26e-05 |      5.34e-05 |       0.74727
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.26          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2131          |
| TimeElapsed     | 8.05e+03      |
| TimestepsSoFar  | 4665344       |
| ev_tdlam_before | -0.889        |
| loss_ent        | 0.74727315    |
| loss_kl         | 5.339005e-05  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0015242551 |
| loss_vf_loss    | 6.260486e-05  |
-----------------------------------
********** Iteration 1139 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00011 |       0.00000 | 

********** Iteration 1144 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00032 |       0.00000 |       0.00199 |      1.60e-05 |       0.74504
     -0.00019 |       0.00000 |       0.00188 |      2.83e-05 |       0.74511
     -0.00021 |       0.00000 |       0.00181 |      2.75e-05 |       0.74498
     -0.00041 |       0.00000 |       0.00176 |      3.32e-05 |       0.74514
     -0.00051 |       0.00000 |       0.00173 |      2.81e-05 |       0.74520
     -0.00062 |       0.00000 |       0.00169 |      3.63e-05 |       0.74519
     -0.00071 |       0.00000 |       0.00166 |      3.23e-05 |       0.74532
     -0.00073 |       0.00000 |       0.00164 |      3.63e-05 |       0.74546
     -0.00067 |       0.00000 |       0.00162 |      5.08e-05 |       0.74508
     -0.00078 |       0.00000 |       0.00160 |      3.81e-05 |       0.74537
Evaluating losses...
     -0.00087 |       0.00000 |       0.00158 |      3.40e-05 |     

     -0.00085 |       0.00000 |       0.00012 |      2.70e-05 |       0.73827
     -0.00095 |       0.00000 |       0.00012 |      2.55e-05 |       0.73819
     -0.00098 |       0.00000 |       0.00011 |      2.85e-05 |       0.73806
Evaluating losses...
     -0.00106 |       0.00000 |       0.00011 |      3.26e-05 |       0.73809
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.26          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2146          |
| TimeElapsed     | 8.09e+03      |
| TimestepsSoFar  | 4710400       |
| ev_tdlam_before | -0.5          |
| loss_ent        | 0.7380861     |
| loss_kl         | 3.2591513e-05 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0010613168 |
| loss_vf_loss    | 0.00011051321 |
-----------------------------------
********** Iteration 1150 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -2.74e-05 |       0.00000 | 

********** Iteration 1155 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00016 |       0.00000 |       0.00192 |      5.48e-06 |       0.74029
    -2.97e-05 |       0.00000 |       0.00174 |      1.41e-05 |       0.74025
     3.08e-05 |       0.00000 |       0.00162 |      1.20e-05 |       0.74023
    -7.86e-05 |       0.00000 |       0.00154 |      1.48e-05 |       0.74021
     -0.00019 |       0.00000 |       0.00146 |      1.45e-05 |       0.74033
     -0.00026 |       0.00000 |       0.00140 |      1.86e-05 |       0.74024
     -0.00015 |       0.00000 |       0.00135 |      2.35e-05 |       0.74021
     -0.00031 |       0.00000 |       0.00131 |      1.88e-05 |       0.74029
     -0.00032 |       0.00000 |       0.00127 |      1.99e-05 |       0.74030
     -0.00038 |       0.00000 |       0.00124 |      2.01e-05 |       0.74036
Evaluating losses...
     -0.00047 |       0.00000 |       0.00121 |      1.99e-05 |     

     -0.00032 |       0.00000 |       0.00072 |      1.54e-05 |       0.73967
     -0.00033 |       0.00000 |       0.00068 |      1.88e-05 |       0.73963
     -0.00038 |       0.00000 |       0.00064 |      1.75e-05 |       0.73958
     -0.00042 |       0.00000 |       0.00062 |      1.82e-05 |       0.73959
     -0.00036 |       0.00000 |       0.00060 |      2.06e-05 |       0.73955
Evaluating losses...
     -0.00050 |       0.00000 |       0.00058 |      2.15e-05 |       0.73961
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.22          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 2160          |
| TimeElapsed     | 8.37e+03      |
| TimestepsSoFar  | 4755456       |
| ev_tdlam_before | 0.525         |
| loss_ent        | 0.739607      |
| loss_kl         | 2.1464935e-05 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0005026622 |
| loss_vf_loss    | 0.0005825813  |
-----------------------------------
*******

********** Iteration 1166 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     8.02e-05 |       0.00000 |       0.00377 |      2.06e-05 |       0.74268
     -0.00031 |       0.00000 |       0.00357 |      2.43e-05 |       0.74280
     -0.00039 |       0.00000 |       0.00343 |      2.52e-05 |       0.74285
     -0.00043 |       0.00000 |       0.00334 |      2.24e-05 |       0.74284
     -0.00056 |       0.00000 |       0.00326 |      2.08e-05 |       0.74292
     -0.00050 |       0.00000 |       0.00320 |      3.49e-05 |       0.74285
     -0.00063 |       0.00000 |       0.00314 |      2.18e-05 |       0.74287
     -0.00065 |       0.00000 |       0.00310 |      2.11e-05 |       0.74284
     -0.00054 |       0.00000 |       0.00305 |      3.00e-05 |       0.74284
     -0.00069 |       0.00000 |       0.00300 |      2.70e-05 |       0.74296
Evaluating losses...
     -0.00077 |       0.00000 |       0.00297 |      2.57e-05 |     

     -0.00054 |       0.00000 |       0.00445 |      2.43e-05 |       0.76597
     -0.00056 |       0.00000 |       0.00440 |      2.26e-05 |       0.76604
     -0.00059 |       0.00000 |       0.00435 |      2.45e-05 |       0.76604
Evaluating losses...
     -0.00062 |       0.00000 |       0.00431 |      2.61e-05 |       0.76609
------------------------------------
| EpLenMean       | 3.02e+03       |
| EpRewMean       | 0.2            |
| EpThisIter      | 1              |
| EpisodesSoFar   | 2175           |
| TimeElapsed     | 8.42e+03       |
| TimestepsSoFar  | 4800512        |
| ev_tdlam_before | 0.596          |
| loss_ent        | 0.7660918      |
| loss_kl         | 2.6100137e-05  |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.00061620586 |
| loss_vf_loss    | 0.004313409    |
------------------------------------
********** Iteration 1172 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -2.49e-05 |   

********** Iteration 1177 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -1.19e-05 |       0.00000 |       0.00149 |      4.61e-06 |       0.72611
     -0.00020 |       0.00000 |       0.00140 |      6.91e-06 |       0.72616
     -0.00024 |       0.00000 |       0.00133 |      7.13e-06 |       0.72618
     -0.00029 |       0.00000 |       0.00129 |      7.71e-06 |       0.72620
     -0.00036 |       0.00000 |       0.00125 |      8.10e-06 |       0.72620
     -0.00041 |       0.00000 |       0.00123 |      8.49e-06 |       0.72626
     -0.00040 |       0.00000 |       0.00120 |      9.57e-06 |       0.72622
     -0.00045 |       0.00000 |       0.00119 |      9.58e-06 |       0.72621
     -0.00048 |       0.00000 |       0.00117 |      1.15e-05 |       0.72625
     -0.00052 |       0.00000 |       0.00116 |      1.10e-05 |       0.72620
Evaluating losses...
     -0.00057 |       0.00000 |       0.00115 |      1.01e-05 |     

     -0.00042 |       0.00000 |       0.00030 |      8.00e-06 |       0.72674
     -0.00042 |       0.00000 |       0.00028 |      8.32e-06 |       0.72670
     -0.00046 |       0.00000 |       0.00027 |      9.75e-06 |       0.72668
Evaluating losses...
     -0.00049 |       0.00000 |       0.00027 |      9.19e-06 |       0.72675
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | 0.23           |
| EpThisIter      | 1              |
| EpisodesSoFar   | 2190           |
| TimeElapsed     | 8.47e+03       |
| TimestepsSoFar  | 4845568        |
| ev_tdlam_before | -0.415         |
| loss_ent        | 0.72674567     |
| loss_kl         | 9.186589e-06   |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.00049031584 |
| loss_vf_loss    | 0.00026720297  |
------------------------------------
********** Iteration 1183 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -3.11e-05 |   

********** Iteration 1188 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     3.95e-05 |       0.00000 |       0.00017 |      2.32e-06 |       0.73516
     -0.00012 |       0.00000 |       0.00015 |      3.75e-06 |       0.73528
     -0.00022 |       0.00000 |       0.00014 |      4.10e-06 |       0.73528
     -0.00026 |       0.00000 |       0.00014 |      3.97e-06 |       0.73533
     -0.00029 |       0.00000 |       0.00013 |      4.76e-06 |       0.73525
     -0.00033 |       0.00000 |       0.00013 |      5.45e-06 |       0.73537
     -0.00035 |       0.00000 |       0.00012 |      6.33e-06 |       0.73535
     -0.00038 |       0.00000 |       0.00012 |      6.34e-06 |       0.73529
     -0.00041 |       0.00000 |       0.00011 |      6.33e-06 |       0.73530
     -0.00040 |       0.00000 |       0.00011 |      7.11e-06 |       0.73529
Evaluating losses...
     -0.00049 |       0.00000 |       0.00011 |      6.45e-06 |     

     -0.00013 |       0.00000 |       0.00070 |      3.46e-06 |       0.75075
     -0.00018 |       0.00000 |       0.00067 |      3.63e-06 |       0.75079
     -0.00017 |       0.00000 |       0.00064 |      4.30e-06 |       0.75084
     -0.00018 |       0.00000 |       0.00061 |      3.61e-06 |       0.75080
Evaluating losses...
     -0.00024 |       0.00000 |       0.00060 |      3.70e-06 |       0.75084
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | 0.27           |
| EpThisIter      | 1              |
| EpisodesSoFar   | 2205           |
| TimeElapsed     | 8.52e+03       |
| TimestepsSoFar  | 4890624        |
| ev_tdlam_before | 0.647          |
| loss_ent        | 0.7508442      |
| loss_kl         | 3.7045309e-06  |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.00024482596 |
| loss_vf_loss    | 0.0005967064   |
------------------------------------
********** Iteration 1194 ************
Optimizing...
     pol_surr |   

********** Iteration 1199 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -1.50e-06 |       0.00000 |       0.00198 |      6.11e-07 |       0.73866
     -0.00010 |       0.00000 |       0.00189 |      1.87e-06 |       0.73862
     -0.00012 |       0.00000 |       0.00183 |      1.58e-06 |       0.73863
     -0.00016 |       0.00000 |       0.00180 |      1.46e-06 |       0.73863
     -0.00017 |       0.00000 |       0.00176 |      1.88e-06 |       0.73862
     -0.00018 |       0.00000 |       0.00174 |      2.35e-06 |       0.73862
     -0.00019 |       0.00000 |       0.00172 |      2.11e-06 |       0.73865
     -0.00019 |       0.00000 |       0.00170 |      2.34e-06 |       0.73865
     -0.00019 |       0.00000 |       0.00168 |      2.38e-06 |       0.73864
     -0.00021 |       0.00000 |       0.00167 |      2.20e-06 |       0.73865
Evaluating losses...
     -0.00022 |       0.00000 |       0.00166 |      2.39e-06 |     

     -0.00020 |       0.00000 |       0.00014 |      1.52e-06 |       0.73395
     -0.00021 |       0.00000 |       0.00013 |      1.76e-06 |       0.73395
     -0.00023 |       0.00000 |       0.00013 |      1.66e-06 |       0.73396
     -0.00024 |       0.00000 |       0.00013 |      1.81e-06 |       0.73394
Evaluating losses...
     -0.00027 |       0.00000 |       0.00013 |      2.02e-06 |       0.73398
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.28          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 2220          |
| TimeElapsed     | 8.57e+03      |
| TimestepsSoFar  | 4935680       |
| ev_tdlam_before | -0.0112       |
| loss_ent        | 0.73397964    |
| loss_kl         | 2.021514e-06  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0002725543 |
| loss_vf_loss    | 0.00012835019 |
-----------------------------------
********** Iteration 1205 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 1210 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -4.48e-06 |       0.00000 |       0.00049 |      1.89e-07 |       0.73416
    -3.92e-05 |       0.00000 |       0.00048 |      4.30e-07 |       0.73415
    -5.86e-05 |       0.00000 |       0.00047 |      4.16e-07 |       0.73415
    -6.90e-05 |       0.00000 |       0.00046 |      5.82e-07 |       0.73412
    -8.57e-05 |       0.00000 |       0.00045 |      6.00e-07 |       0.73413
    -9.19e-05 |       0.00000 |       0.00044 |      5.40e-07 |       0.73413
    -9.48e-05 |       0.00000 |       0.00044 |      6.62e-07 |       0.73413
    -9.89e-05 |       0.00000 |       0.00043 |      6.20e-07 |       0.73416
     -0.00011 |       0.00000 |       0.00042 |      6.39e-07 |       0.73414
     -0.00011 |       0.00000 |       0.00042 |      6.48e-07 |       0.73414
Evaluating losses...
     -0.00013 |       0.00000 |       0.00041 |      5.56e-07 |     

    -4.77e-05 |       0.00000 |       0.00012 |      1.39e-07 |       0.73767
    -4.84e-05 |       0.00000 |       0.00012 |      1.35e-07 |       0.73767
    -5.44e-05 |       0.00000 |       0.00012 |      1.67e-07 |       0.73767
    -5.55e-05 |       0.00000 |       0.00012 |      1.55e-07 |       0.73767
Evaluating losses...
    -6.14e-05 |       0.00000 |       0.00012 |      1.55e-07 |       0.73766
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.22          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 2235          |
| TimeElapsed     | 8.62e+03      |
| TimestepsSoFar  | 4980736       |
| ev_tdlam_before | -2.26         |
| loss_ent        | 0.7376554     |
| loss_kl         | 1.5515809e-07 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -6.136531e-05 |
| loss_vf_loss    | 0.00011561572 |
-----------------------------------
********** Iteration 1216 ************
Optimizing...
     pol_surr |    pol_entpen | 