In [None]:
#!/usr/bin/env python3

# Train single CPU PPO1 on slimevolley.
# Should solve it (beat existing AI on average over 1000 trials) in 3 hours on single CPU, within 3M steps.

import os
import gym
import slimevolleygym
from slimevolleygym import SurvivalRewardEnv

from stable_baselines.ppo1 import PPO1
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import logger
from stable_baselines.common.callbacks import EvalCallback

NUM_TIMESTEPS = int(2e7)
SEED = 721
EVAL_FREQ = 250000
EVAL_EPISODES = 1000
LOGDIR = "ppo1" # moved to zoo afterwards.

logger.configure(folder=LOGDIR)

env = gym.make("SlimeVolley-v0")
env.seed(SEED)

# take mujoco hyperparams (but doubled timesteps_per_actorbatch to cover more steps.)
model = PPO1(MlpPolicy, env, timesteps_per_actorbatch=4096, clip_param=0.2, entcoeff=0.0, optim_epochs=10,
                 optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', verbose=2)

eval_callback = EvalCallback(env, best_model_save_path=LOGDIR, log_path=LOGDIR, eval_freq=EVAL_FREQ, n_eval_episodes=EVAL_EPISODES)

model.learn(total_timesteps=NUM_TIMESTEPS, callback=eval_callback)

model.save(os.path.join(LOGDIR, "final_model")) # probably never get to this point.

env.close()

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Logging to ppo1




Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where







********** Iteration 0 ************


  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00021 |       0.00000 |       0.08507 |       0.00026 |       2.07916
     -0.00264 |       0.00000 |       0.04922 |       0.00146 |       2.07793
     -0.00487 |       0.00000 |       0.03956 |       0.00504 |       2.07431
     -0.00627 |       0.00000 |       0.03497 |       0.00720 |       2.07212
     -0.00676 |       0.00000 |       0.03209 |       0.00938 |       2.06993
     -0.00688 |       0.00000 |       0.03018 |       0.00947 |       2.06984
     -0.00742 |       0.00000 |       0.02873 |       0.00990 |       2.06940
     -0.00763 |       0.00000 |       0.02753 |       0.00911 |       2.07020
     -0.00782 |       0.00000 |       0.02662 |       0.00946 |       2.06985
     -0.00791 |       0.00000 |       0.02575 |       0.00999 |       2.06932
Evaluating losses...
     -0.00876 |       0.00000 |       0.02516 |       0.00905 |       2.07025
-----------------------------

     -0.00695 |       0.00000 |       0.01499 |       0.00563 |       1.98868
Evaluating losses...
     -0.00798 |       0.00000 |       0.01492 |       0.00453 |       1.98866
---------------------------------
| EpLenMean       | 581         |
| EpRewMean       | -4.9        |
| EpThisIter      | 7           |
| EpisodesSoFar   | 42          |
| TimeElapsed     | 21.8        |
| TimestepsSoFar  | 24576       |
| ev_tdlam_before | 0.832       |
| loss_ent        | 1.9886615   |
| loss_kl         | 0.004532754 |
| loss_pol_entpen | 0.0         |
| loss_pol_surr   | -0.00797977 |
| loss_vf_loss    | 0.014918938 |
---------------------------------
********** Iteration 6 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00067 |       0.00000 |       0.01973 |       0.00081 |       1.98984
     -0.00123 |       0.00000 |       0.01907 |       0.00234 |       1.98264
     -0.00214 |       0.00000 |       0.01826 |       0.00421 |

     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00096 |       0.00000 |       0.02234 |       0.00127 |       1.93936
     -0.00517 |       0.00000 |       0.02116 |       0.00571 |       1.95404
     -0.00602 |       0.00000 |       0.02038 |       0.00827 |       1.95384
     -0.00701 |       0.00000 |       0.01978 |       0.00842 |       1.95559
     -0.00786 |       0.00000 |       0.01937 |       0.00857 |       1.94712
     -0.00822 |       0.00000 |       0.01894 |       0.00965 |       1.95395
     -0.00864 |       0.00000 |       0.01870 |       0.00785 |       1.95207
     -0.00901 |       0.00000 |       0.01842 |       0.00842 |       1.94865
     -0.00931 |       0.00000 |       0.01843 |       0.00935 |       1.95053
     -0.00964 |       0.00000 |       0.01787 |       0.00851 |       1.94423
Evaluating losses...
     -0.01089 |       0.00000 |       0.01756 |       0.00713 |       1.95170
----------------------------------
| EpLenM

     -0.01223 |       0.00000 |       0.01347 |       0.00762 |       1.93673
Evaluating losses...
     -0.01385 |       0.00000 |       0.01336 |       0.00714 |       1.93632
----------------------------------
| EpLenMean       | 625          |
| EpRewMean       | -4.85        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 112          |
| TimeElapsed     | 64.1         |
| TimestepsSoFar  | 69632        |
| ev_tdlam_before | 0.844        |
| loss_ent        | 1.9363244    |
| loss_kl         | 0.0071390816 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013846791 |
| loss_vf_loss    | 0.013360801  |
----------------------------------
********** Iteration 17 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     6.22e-05 |       0.00000 |       0.02062 |       0.00112 |       1.92459
     -0.00311 |       0.00000 |       0.01942 |       0.00230 |       1.92403
     -0.00439 |       0.00000 |       0.01888 | 

Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00076 |       0.00000 |       0.01675 |       0.00164 |       1.87628
     -0.00251 |       0.00000 |       0.01519 |       0.00352 |       1.85910
     -0.00409 |       0.00000 |       0.01471 |       0.00437 |       1.85722
     -0.00469 |       0.00000 |       0.01438 |       0.00488 |       1.85803
     -0.00538 |       0.00000 |       0.01457 |       0.00524 |       1.85801
     -0.00584 |       0.00000 |       0.01415 |       0.00700 |       1.84143
     -0.00660 |       0.00000 |       0.01391 |       0.00505 |       1.85917
     -0.00707 |       0.00000 |       0.01383 |       0.00641 |       1.85493
     -0.00760 |       0.00000 |       0.01381 |       0.00662 |       1.85113
     -0.00807 |       0.00000 |       0.01357 |       0.00675 |       1.85104
Evaluating losses...
     -0.00955 |       0.00000 |       0.01339 |       0.00630 |       1.85190
-----------------------------

     -0.00717 |       0.00000 |       0.01528 |       0.00498 |       1.83426
     -0.00779 |       0.00000 |       0.01527 |       0.00555 |       1.82941
Evaluating losses...
     -0.00977 |       0.00000 |       0.01474 |       0.00474 |       1.83059
----------------------------------
| EpLenMean       | 654          |
| EpRewMean       | -4.85        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 181          |
| TimeElapsed     | 105          |
| TimestepsSoFar  | 114688       |
| ev_tdlam_before | 0.85         |
| loss_ent        | 1.8305947    |
| loss_kl         | 0.004735404  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009767117 |
| loss_vf_loss    | 0.014742204  |
----------------------------------
********** Iteration 28 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00079 |       0.00000 |       0.01612 |       0.00139 |       1.83281
     -0.00272 |       0.00000 |       0.01504 | 

********** Iteration 33 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00036 |       0.00000 |       0.01529 |       0.00165 |       1.75962
     -0.00395 |       0.00000 |       0.01397 |       0.00358 |       1.75494
     -0.00602 |       0.00000 |       0.01348 |       0.00469 |       1.75801
     -0.00778 |       0.00000 |       0.01331 |       0.00539 |       1.75801
     -0.00856 |       0.00000 |       0.01316 |       0.00517 |       1.75941
     -0.00924 |       0.00000 |       0.01293 |       0.00538 |       1.75978
     -0.00983 |       0.00000 |       0.01262 |       0.00592 |       1.75623
     -0.01027 |       0.00000 |       0.01258 |       0.00603 |       1.76419
     -0.01136 |       0.00000 |       0.01229 |       0.00548 |       1.76196
     -0.01189 |       0.00000 |       0.01223 |       0.00642 |       1.75506
Evaluating losses...
     -0.01340 |       0.00000 |       0.01182 |       0.00637 |       

     -0.00829 |       0.00000 |       0.01430 |       0.00650 |       1.70255
     -0.00795 |       0.00000 |       0.01406 |       0.00685 |       1.70532
Evaluating losses...
     -0.00994 |       0.00000 |       0.01384 |       0.00686 |       1.70249
----------------------------------
| EpLenMean       | 632          |
| EpRewMean       | -4.79        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 252          |
| TimeElapsed     | 147          |
| TimestepsSoFar  | 159744       |
| ev_tdlam_before | 0.851        |
| loss_ent        | 1.7024924    |
| loss_kl         | 0.0068627535 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009940835 |
| loss_vf_loss    | 0.01383962   |
----------------------------------
********** Iteration 39 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00010 |       0.00000 |       0.01514 |       0.00120 |       1.67105
     -0.00376 |       0.00000 |       0.01358 | 

********** Iteration 44 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00014 |       0.00000 |       0.01255 |       0.00094 |       1.67169
     -0.00354 |       0.00000 |       0.01201 |       0.00372 |       1.67519
     -0.00541 |       0.00000 |       0.01158 |       0.00401 |       1.66848
     -0.00687 |       0.00000 |       0.01141 |       0.00511 |       1.66749
     -0.00789 |       0.00000 |       0.01135 |       0.00449 |       1.67646
     -0.00936 |       0.00000 |       0.01121 |       0.00531 |       1.66941
     -0.00992 |       0.00000 |       0.01112 |       0.00568 |       1.67250
     -0.01044 |       0.00000 |       0.01112 |       0.00613 |       1.67724
     -0.01032 |       0.00000 |       0.01093 |       0.00582 |       1.66154
     -0.01176 |       0.00000 |       0.01090 |       0.00609 |       1.66894
Evaluating losses...
     -0.01390 |       0.00000 |       0.01075 |       0.00571 |       

     -0.01172 |       0.00000 |       0.01393 |       0.00619 |       1.63886
     -0.01150 |       0.00000 |       0.01384 |       0.00656 |       1.63388
Evaluating losses...
     -0.01367 |       0.00000 |       0.01351 |       0.00672 |       1.64020
-----------------------------------
| EpLenMean       | 611           |
| EpRewMean       | -4.85         |
| EpThisIter      | 7             |
| EpisodesSoFar   | 326           |
| TimeElapsed     | 191           |
| TimestepsSoFar  | 204800        |
| ev_tdlam_before | 0.839         |
| loss_ent        | 1.6401957     |
| loss_kl         | 0.0067244684  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0136743365 |
| loss_vf_loss    | 0.013506421   |
-----------------------------------
********** Iteration 50 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00029 |       0.00000 |       0.01427 |       0.00239 |       1.65237
     -0.00292 |       0.00000 |   

********** Iteration 55 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -2.96e-06 |       0.00000 |       0.01574 |       0.00220 |       1.56920
     -0.00404 |       0.00000 |       0.01409 |       0.00442 |       1.56167
     -0.00565 |       0.00000 |       0.01341 |       0.00614 |       1.56279
     -0.00708 |       0.00000 |       0.01334 |       0.00599 |       1.55794
     -0.00786 |       0.00000 |       0.01277 |       0.00668 |       1.55858
     -0.00864 |       0.00000 |       0.01264 |       0.00640 |       1.56050
     -0.00887 |       0.00000 |       0.01242 |       0.00690 |       1.55229
     -0.01005 |       0.00000 |       0.01238 |       0.00734 |       1.55378
     -0.01061 |       0.00000 |       0.01216 |       0.00611 |       1.56435
     -0.01066 |       0.00000 |       0.01201 |       0.00648 |       1.55460
Evaluating losses...
     -0.01315 |       0.00000 |       0.01177 |       0.00608 |       

     -0.00972 |       0.00000 |       0.01074 |       0.00586 |       1.51626
     -0.01008 |       0.00000 |       0.01063 |       0.00683 |       1.51862
Evaluating losses...
     -0.01200 |       0.00000 |       0.01036 |       0.00558 |       1.51273
----------------------------------
| EpLenMean       | 632          |
| EpRewMean       | -4.85        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 396          |
| TimeElapsed     | 237          |
| TimestepsSoFar  | 249856       |
| ev_tdlam_before | 0.869        |
| loss_ent        | 1.5127274    |
| loss_kl         | 0.0055796253 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.011996113 |
| loss_vf_loss    | 0.010364704  |
----------------------------------
********** Iteration 61 ************
Eval num_timesteps=249856, episode_reward=-4.83 +/- 0.40
Episode length: 629.47 +/- 137.90
New best mean reward!
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00102

********** Iteration 66 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00105 |       0.00000 |       0.01838 |       0.00203 |       1.50973
     -0.00308 |       0.00000 |       0.01591 |       0.00288 |       1.49994
     -0.00465 |       0.00000 |       0.01465 |       0.00435 |       1.50407
     -0.00600 |       0.00000 |       0.01382 |       0.00598 |       1.50487
     -0.00612 |       0.00000 |       0.01341 |       0.00662 |       1.50909
     -0.00711 |       0.00000 |       0.01292 |       0.00553 |       1.50704
     -0.00770 |       0.00000 |       0.01273 |       0.00570 |       1.50604
     -0.00872 |       0.00000 |       0.01225 |       0.00663 |       1.50679
     -0.00873 |       0.00000 |       0.01213 |       0.00627 |       1.50338
     -0.00991 |       0.00000 |       0.01205 |       0.00684 |       1.50342
Evaluating losses...
     -0.01181 |       0.00000 |       0.01163 |       0.00513 |       

     -0.01170 |       0.00000 |       0.01161 |       0.00747 |       1.45862
     -0.01179 |       0.00000 |       0.01148 |       0.00791 |       1.45777
Evaluating losses...
     -0.01370 |       0.00000 |       0.01100 |       0.00766 |       1.45681
----------------------------------
| EpLenMean       | 637          |
| EpRewMean       | -4.87        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 468          |
| TimeElapsed     | 695          |
| TimestepsSoFar  | 294912       |
| ev_tdlam_before | 0.845        |
| loss_ent        | 1.4568148    |
| loss_kl         | 0.0076640584 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013702125 |
| loss_vf_loss    | 0.011004379  |
----------------------------------
********** Iteration 72 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00086 |       0.00000 |       0.01563 |       0.00088 |       1.42550
     -0.00206 |       0.00000 |       0.01427 | 

********** Iteration 77 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -2.09e-05 |       0.00000 |       0.01505 |       0.00227 |       1.35078
     -0.00380 |       0.00000 |       0.01375 |       0.00307 |       1.35444
     -0.00568 |       0.00000 |       0.01314 |       0.00494 |       1.35220
     -0.00716 |       0.00000 |       0.01276 |       0.00531 |       1.34908
     -0.00834 |       0.00000 |       0.01246 |       0.00548 |       1.35259
     -0.00908 |       0.00000 |       0.01220 |       0.00564 |       1.35818
     -0.00907 |       0.00000 |       0.01220 |       0.00497 |       1.36090
     -0.01029 |       0.00000 |       0.01200 |       0.00680 |       1.35809
     -0.01090 |       0.00000 |       0.01180 |       0.00591 |       1.36131
     -0.01151 |       0.00000 |       0.01170 |       0.00654 |       1.35936
Evaluating losses...
     -0.01299 |       0.00000 |       0.01125 |       0.00610 |       

     -0.00888 |       0.00000 |       0.01061 |       0.00592 |       1.40772
     -0.00908 |       0.00000 |       0.01039 |       0.00568 |       1.40719
Evaluating losses...
     -0.01145 |       0.00000 |       0.01014 |       0.00587 |       1.40607
----------------------------------
| EpLenMean       | 650          |
| EpRewMean       | -4.83        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 535          |
| TimeElapsed     | 738          |
| TimestepsSoFar  | 339968       |
| ev_tdlam_before | 0.837        |
| loss_ent        | 1.4060749    |
| loss_kl         | 0.005866893  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.011448141 |
| loss_vf_loss    | 0.01014428   |
----------------------------------
********** Iteration 83 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00092 |       0.00000 |       0.01459 |       0.00175 |       1.46163
     -0.00100 |       0.00000 |       0.01312 | 

********** Iteration 88 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00029 |       0.00000 |       0.01379 |       0.00168 |       1.32219
     -0.00394 |       0.00000 |       0.01271 |       0.00390 |       1.32692
     -0.00558 |       0.00000 |       0.01231 |       0.00556 |       1.32918
     -0.00713 |       0.00000 |       0.01193 |       0.00481 |       1.31923
     -0.00778 |       0.00000 |       0.01185 |       0.00501 |       1.31083
     -0.00885 |       0.00000 |       0.01165 |       0.00502 |       1.31440
     -0.00989 |       0.00000 |       0.01132 |       0.00621 |       1.31419
     -0.01044 |       0.00000 |       0.01122 |       0.00590 |       1.31242
     -0.01139 |       0.00000 |       0.01107 |       0.00597 |       1.31129
     -0.01143 |       0.00000 |       0.01105 |       0.00633 |       1.30710
Evaluating losses...
     -0.01353 |       0.00000 |       0.01060 |       0.00624 |       

     -0.01232 |       0.00000 |       0.01327 |       0.00574 |       1.27454
     -0.01289 |       0.00000 |       0.01305 |       0.00554 |       1.27465
Evaluating losses...
     -0.01462 |       0.00000 |       0.01281 |       0.00626 |       1.27396
----------------------------------
| EpLenMean       | 624          |
| EpRewMean       | -4.8         |
| EpThisIter      | 6            |
| EpisodesSoFar   | 608          |
| TimeElapsed     | 777          |
| TimestepsSoFar  | 385024       |
| ev_tdlam_before | 0.847        |
| loss_ent        | 1.2739644    |
| loss_kl         | 0.0062570586 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01462038  |
| loss_vf_loss    | 0.01280593   |
----------------------------------
********** Iteration 94 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00133 |       0.00000 |       0.01506 |       0.00085 |       1.23886
     -0.00262 |       0.00000 |       0.01373 | 

********** Iteration 99 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00171 |       0.00000 |       0.01756 |       0.00107 |       1.22586
     -0.00219 |       0.00000 |       0.01572 |       0.00264 |       1.23709
     -0.00360 |       0.00000 |       0.01497 |       0.00422 |       1.24395
     -0.00528 |       0.00000 |       0.01449 |       0.00345 |       1.24707
     -0.00670 |       0.00000 |       0.01401 |       0.00426 |       1.24486
     -0.00770 |       0.00000 |       0.01372 |       0.00471 |       1.24403
     -0.00887 |       0.00000 |       0.01335 |       0.00438 |       1.24351
     -0.00961 |       0.00000 |       0.01298 |       0.00496 |       1.24738
     -0.00943 |       0.00000 |       0.01274 |       0.00543 |       1.23919
     -0.01032 |       0.00000 |       0.01262 |       0.00506 |       1.24165
Evaluating losses...
     -0.01277 |       0.00000 |       0.01220 |       0.00511 |       

     -0.00994 |       0.00000 |       0.01145 |       0.00622 |       1.17344
     -0.01147 |       0.00000 |       0.01128 |       0.00609 |       1.17289
Evaluating losses...
     -0.01350 |       0.00000 |       0.01090 |       0.00521 |       1.17515
----------------------------------
| EpLenMean       | 636          |
| EpRewMean       | -4.77        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 679          |
| TimeElapsed     | 820          |
| TimestepsSoFar  | 430080       |
| ev_tdlam_before | 0.879        |
| loss_ent        | 1.1751505    |
| loss_kl         | 0.0052081333 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013496261 |
| loss_vf_loss    | 0.010898597  |
----------------------------------
********** Iteration 105 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00046 |       0.00000 |       0.02145 |       0.00117 |       1.17839
     -0.00328 |       0.00000 |       0.01925 |

********** Iteration 110 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00096 |       0.00000 |       0.01613 |       0.00151 |       1.16193
     -0.00221 |       0.00000 |       0.01387 |       0.00237 |       1.14756
     -0.00437 |       0.00000 |       0.01314 |       0.00388 |       1.15235
     -0.00620 |       0.00000 |       0.01268 |       0.00425 |       1.14729
     -0.00764 |       0.00000 |       0.01229 |       0.00415 |       1.14618
     -0.00841 |       0.00000 |       0.01198 |       0.00455 |       1.14461
     -0.00915 |       0.00000 |       0.01176 |       0.00518 |       1.14160
     -0.01015 |       0.00000 |       0.01163 |       0.00560 |       1.14443
     -0.01091 |       0.00000 |       0.01145 |       0.00548 |       1.14346
     -0.01154 |       0.00000 |       0.01127 |       0.00573 |       1.14332
Evaluating losses...
     -0.01355 |       0.00000 |       0.01079 |       0.00587 |      

     -0.01184 |       0.00000 |       0.01391 |       0.00721 |       1.10460
     -0.01259 |       0.00000 |       0.01372 |       0.00774 |       1.09929
Evaluating losses...
     -0.01414 |       0.00000 |       0.01322 |       0.00711 |       1.10267
----------------------------------
| EpLenMean       | 653          |
| EpRewMean       | -4.76        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 747          |
| TimeElapsed     | 861          |
| TimestepsSoFar  | 475136       |
| ev_tdlam_before | 0.836        |
| loss_ent        | 1.1026685    |
| loss_kl         | 0.00711015   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014141843 |
| loss_vf_loss    | 0.013216187  |
----------------------------------
********** Iteration 116 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00025 |       0.00000 |       0.01690 |       0.00118 |       1.09033
     -0.00351 |       0.00000 |       0.01488 |

********** Iteration 121 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00032 |       0.00000 |       0.01467 |       0.00296 |       1.10997
     -0.00495 |       0.00000 |       0.01271 |       0.00318 |       1.10786
     -0.00664 |       0.00000 |       0.01186 |       0.00444 |       1.10691
     -0.00839 |       0.00000 |       0.01146 |       0.00393 |       1.11126
     -0.00887 |       0.00000 |       0.01089 |       0.00507 |       1.10966
     -0.00965 |       0.00000 |       0.01064 |       0.00510 |       1.10809
     -0.01029 |       0.00000 |       0.01047 |       0.00546 |       1.11277
     -0.01115 |       0.00000 |       0.01017 |       0.00485 |       1.10851
     -0.01169 |       0.00000 |       0.01013 |       0.00591 |       1.11009
     -0.01192 |       0.00000 |       0.01000 |       0.00584 |       1.10950
Evaluating losses...
     -0.01377 |       0.00000 |       0.00971 |       0.00623 |      

     -0.01003 |       0.00000 |       0.01008 |       0.00521 |       1.09130
     -0.01061 |       0.00000 |       0.00998 |       0.00535 |       1.08845
     -0.01054 |       0.00000 |       0.00968 |       0.00637 |       1.09274
Evaluating losses...
     -0.01288 |       0.00000 |       0.00981 |       0.00592 |       1.09195
----------------------------------
| EpLenMean       | 645          |
| EpRewMean       | -4.88        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 818          |
| TimeElapsed     | 1.26e+03     |
| TimestepsSoFar  | 520192       |
| ev_tdlam_before | 0.865        |
| loss_ent        | 1.091954     |
| loss_kl         | 0.0059169778 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012882179 |
| loss_vf_loss    | 0.009805497  |
----------------------------------
********** Iteration 127 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00019 |       0.00000 |       0.01485 |

********** Iteration 132 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00159 |       0.00000 |       0.01470 |       0.00085 |       1.10133
     -0.00221 |       0.00000 |       0.01340 |       0.00248 |       1.10763
     -0.00542 |       0.00000 |       0.01275 |       0.00282 |       1.11270
     -0.00730 |       0.00000 |       0.01223 |       0.00379 |       1.10853
     -0.00849 |       0.00000 |       0.01189 |       0.00382 |       1.11290
     -0.01013 |       0.00000 |       0.01156 |       0.00415 |       1.11166
     -0.01075 |       0.00000 |       0.01135 |       0.00479 |       1.10785
     -0.01197 |       0.00000 |       0.01106 |       0.00464 |       1.10597
     -0.01290 |       0.00000 |       0.01090 |       0.00507 |       1.10619
     -0.01292 |       0.00000 |       0.01075 |       0.00530 |       1.10954
Evaluating losses...
     -0.01496 |       0.00000 |       0.01037 |       0.00492 |      

     -0.00997 |       0.00000 |       0.01272 |       0.00611 |       1.03763
     -0.01076 |       0.00000 |       0.01245 |       0.00682 |       1.03373
Evaluating losses...
     -0.01195 |       0.00000 |       0.01208 |       0.00663 |       1.03487
----------------------------------
| EpLenMean       | 640          |
| EpRewMean       | -4.8         |
| EpThisIter      | 7            |
| EpisodesSoFar   | 889          |
| TimeElapsed     | 1.3e+03      |
| TimestepsSoFar  | 565248       |
| ev_tdlam_before | 0.855        |
| loss_ent        | 1.0348697    |
| loss_kl         | 0.0066283126 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01195485  |
| loss_vf_loss    | 0.012078653  |
----------------------------------
********** Iteration 138 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00107 |       0.00000 |       0.01544 |       0.00083 |       1.03185
     -0.00281 |       0.00000 |       0.01343 |

********** Iteration 143 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00090 |       0.00000 |       0.01812 |       0.00157 |       1.01433
     -0.00287 |       0.00000 |       0.01592 |       0.00282 |       1.00627
     -0.00551 |       0.00000 |       0.01492 |       0.00338 |       1.00386
     -0.00724 |       0.00000 |       0.01442 |       0.00409 |       1.00829
     -0.00806 |       0.00000 |       0.01401 |       0.00402 |       1.00689
     -0.00942 |       0.00000 |       0.01369 |       0.00460 |       1.00516
     -0.00997 |       0.00000 |       0.01338 |       0.00450 |       0.99828
     -0.01095 |       0.00000 |       0.01319 |       0.00457 |       1.00849
     -0.01169 |       0.00000 |       0.01290 |       0.00462 |       1.00359
     -0.01264 |       0.00000 |       0.01287 |       0.00580 |       1.01334
Evaluating losses...
     -0.01320 |       0.00000 |       0.01231 |       0.00537 |      

     -0.00886 |       0.00000 |       0.01163 |       0.00493 |       0.97035
     -0.00930 |       0.00000 |       0.01142 |       0.00548 |       0.96864
     -0.01073 |       0.00000 |       0.01110 |       0.00518 |       0.96469
Evaluating losses...
     -0.01204 |       0.00000 |       0.01081 |       0.00532 |       0.96913
----------------------------------
| EpLenMean       | 648          |
| EpRewMean       | -4.76        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 958          |
| TimeElapsed     | 1.34e+03     |
| TimestepsSoFar  | 610304       |
| ev_tdlam_before | 0.853        |
| loss_ent        | 0.9691292    |
| loss_kl         | 0.0053237034 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012035569 |
| loss_vf_loss    | 0.010808354  |
----------------------------------
********** Iteration 149 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00046 |       0.00000 |       0.01351 |

********** Iteration 154 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00174 |       0.00000 |       0.01689 |       0.00173 |       0.90834
     -0.00609 |       0.00000 |       0.01397 |       0.00367 |       0.90574
     -0.00871 |       0.00000 |       0.01298 |       0.00485 |       0.91158
     -0.01022 |       0.00000 |       0.01232 |       0.00469 |       0.90281
     -0.01133 |       0.00000 |       0.01187 |       0.00630 |       0.90706
     -0.01212 |       0.00000 |       0.01145 |       0.00608 |       0.90079
     -0.01320 |       0.00000 |       0.01142 |       0.00664 |       0.90013
     -0.01381 |       0.00000 |       0.01110 |       0.00647 |       0.89854
     -0.01440 |       0.00000 |       0.01074 |       0.00717 |       0.89971
     -0.01546 |       0.00000 |       0.01081 |       0.00716 |       0.90215
Evaluating losses...
     -0.01699 |       0.00000 |       0.01037 |       0.00686 |      

     -0.01445 |       0.00000 |       0.01256 |       0.00539 |       0.90095
     -0.01568 |       0.00000 |       0.01221 |       0.00620 |       0.90428
     -0.01629 |       0.00000 |       0.01184 |       0.00658 |       0.90119
Evaluating losses...
     -0.01804 |       0.00000 |       0.01169 |       0.00696 |       0.89875
---------------------------------
| EpLenMean       | 631         |
| EpRewMean       | -4.84       |
| EpThisIter      | 6           |
| EpisodesSoFar   | 1030        |
| TimeElapsed     | 1.38e+03    |
| TimestepsSoFar  | 655360      |
| ev_tdlam_before | 0.826       |
| loss_ent        | 0.8987537   |
| loss_kl         | 0.00696145  |
| loss_pol_entpen | 0.0         |
| loss_pol_surr   | -0.01803718 |
| loss_vf_loss    | 0.011693061 |
---------------------------------
********** Iteration 160 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00020 |       0.00000 |       0.01632 |       0.00139

********** Iteration 165 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00051 |       0.00000 |       0.01642 |       0.00175 |       0.86224
     -0.00400 |       0.00000 |       0.01446 |       0.00475 |       0.85099
     -0.00637 |       0.00000 |       0.01370 |       0.00410 |       0.85177
     -0.00855 |       0.00000 |       0.01316 |       0.00468 |       0.85229
     -0.00953 |       0.00000 |       0.01270 |       0.00560 |       0.85095
     -0.01052 |       0.00000 |       0.01241 |       0.00525 |       0.85487
     -0.01183 |       0.00000 |       0.01220 |       0.00588 |       0.85213
     -0.01258 |       0.00000 |       0.01194 |       0.00677 |       0.84835
     -0.01276 |       0.00000 |       0.01162 |       0.00680 |       0.85366
     -0.01369 |       0.00000 |       0.01146 |       0.00665 |       0.85088
Evaluating losses...
     -0.01467 |       0.00000 |       0.01098 |       0.00760 |      

     -0.01189 |       0.00000 |       0.00929 |       0.00601 |       0.82467
     -0.01193 |       0.00000 |       0.00916 |       0.00577 |       0.82356
Evaluating losses...
     -0.01334 |       0.00000 |       0.00902 |       0.00599 |       0.82948
----------------------------------
| EpLenMean       | 628          |
| EpRewMean       | -4.83        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 1101         |
| TimeElapsed     | 1.42e+03     |
| TimestepsSoFar  | 700416       |
| ev_tdlam_before | 0.846        |
| loss_ent        | 0.82947654   |
| loss_kl         | 0.005986789  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013341948 |
| loss_vf_loss    | 0.009023539  |
----------------------------------
********** Iteration 171 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00141 |       0.00000 |       0.01608 |       0.00110 |       0.82624
     -0.00345 |       0.00000 |       0.01415 |

********** Iteration 176 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00125 |       0.00000 |       0.01460 |       0.00153 |       0.82584
     -0.00511 |       0.00000 |       0.01292 |       0.00286 |       0.83185
     -0.00782 |       0.00000 |       0.01190 |       0.00358 |       0.83495
     -0.00928 |       0.00000 |       0.01158 |       0.00395 |       0.83330
     -0.00972 |       0.00000 |       0.01101 |       0.00461 |       0.83619
     -0.01121 |       0.00000 |       0.01077 |       0.00482 |       0.83742
     -0.01196 |       0.00000 |       0.01046 |       0.00520 |       0.83918
     -0.01302 |       0.00000 |       0.01040 |       0.00537 |       0.83775
     -0.01384 |       0.00000 |       0.01008 |       0.00585 |       0.83948
     -0.01469 |       0.00000 |       0.00991 |       0.00528 |       0.83850
Evaluating losses...
     -0.01586 |       0.00000 |       0.00958 |       0.00679 |      

     -0.01162 |       0.00000 |       0.01156 |       0.00528 |       0.82226
     -0.01220 |       0.00000 |       0.01133 |       0.00547 |       0.82531
Evaluating losses...
     -0.01415 |       0.00000 |       0.01088 |       0.00521 |       0.82305
----------------------------------
| EpLenMean       | 637          |
| EpRewMean       | -4.88        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 1173         |
| TimeElapsed     | 1.46e+03     |
| TimestepsSoFar  | 745472       |
| ev_tdlam_before | 0.821        |
| loss_ent        | 0.8230547    |
| loss_kl         | 0.0052142437 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014148919 |
| loss_vf_loss    | 0.010875449  |
----------------------------------
********** Iteration 182 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00019 |       0.00000 |       0.01494 |       0.00192 |       0.82403
     -0.00460 |       0.00000 |       0.01330 |

********** Iteration 187 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00042 |       0.00000 |       0.01326 |       0.00137 |       0.78049
     -0.00345 |       0.00000 |       0.01179 |       0.00320 |       0.77877
     -0.00524 |       0.00000 |       0.01130 |       0.00373 |       0.78202
     -0.00663 |       0.00000 |       0.01110 |       0.00325 |       0.78363
     -0.00736 |       0.00000 |       0.01071 |       0.00426 |       0.78318
     -0.00880 |       0.00000 |       0.01045 |       0.00441 |       0.78498
     -0.00918 |       0.00000 |       0.01022 |       0.00493 |       0.78482
     -0.01031 |       0.00000 |       0.01002 |       0.00527 |       0.78673
     -0.01067 |       0.00000 |       0.01008 |       0.00506 |       0.78722
     -0.01054 |       0.00000 |       0.00997 |       0.00513 |       0.78819
Evaluating losses...
     -0.01301 |       0.00000 |       0.00958 |       0.00477 |      

     -0.01113 |       0.00000 |       0.01114 |       0.00563 |       0.74609
     -0.01222 |       0.00000 |       0.01089 |       0.00525 |       0.74154
Evaluating losses...
     -0.01404 |       0.00000 |       0.01046 |       0.00546 |       0.74130
----------------------------------
| EpLenMean       | 641          |
| EpRewMean       | -4.93        |
| EpThisIter      | 5            |
| EpisodesSoFar   | 1243         |
| TimeElapsed     | 1.85e+03     |
| TimestepsSoFar  | 790528       |
| ev_tdlam_before | 0.825        |
| loss_ent        | 0.741302     |
| loss_kl         | 0.0054559205 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014038412 |
| loss_vf_loss    | 0.010458729  |
----------------------------------
********** Iteration 193 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00066 |       0.00000 |       0.01809 |       0.00068 |       0.71570
     -0.00280 |       0.00000 |       0.01604 |

********** Iteration 198 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00017 |       0.00000 |       0.01151 |       0.00134 |       0.73994
     -0.00387 |       0.00000 |       0.01037 |       0.00314 |       0.73199
     -0.00557 |       0.00000 |       0.00977 |       0.00336 |       0.73101
     -0.00647 |       0.00000 |       0.00940 |       0.00326 |       0.73446
     -0.00744 |       0.00000 |       0.00926 |       0.00397 |       0.73265
     -0.00853 |       0.00000 |       0.00898 |       0.00355 |       0.72825
     -0.00942 |       0.00000 |       0.00888 |       0.00424 |       0.73034
     -0.00996 |       0.00000 |       0.00865 |       0.00391 |       0.72866
     -0.01072 |       0.00000 |       0.00846 |       0.00430 |       0.72915
     -0.01094 |       0.00000 |       0.00843 |       0.00443 |       0.72734
Evaluating losses...
     -0.01301 |       0.00000 |       0.00811 |       0.00447 |      

     -0.01192 |       0.00000 |       0.00915 |       0.00514 |       0.73300
     -0.01206 |       0.00000 |       0.00908 |       0.00608 |       0.73363
Evaluating losses...
     -0.01362 |       0.00000 |       0.00872 |       0.00612 |       0.73770
---------------------------------
| EpLenMean       | 644         |
| EpRewMean       | -4.88       |
| EpThisIter      | 6           |
| EpisodesSoFar   | 1314        |
| TimeElapsed     | 1.89e+03    |
| TimestepsSoFar  | 835584      |
| ev_tdlam_before | 0.859       |
| loss_ent        | 0.7377005   |
| loss_kl         | 0.006122891 |
| loss_pol_entpen | 0.0         |
| loss_pol_surr   | -0.01361734 |
| loss_vf_loss    | 0.008723559 |
---------------------------------
********** Iteration 204 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00162 |       0.00000 |       0.01545 |       0.00141 |       0.73723
     -0.00202 |       0.00000 |       0.01303 |       0.00206

********** Iteration 209 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00072 |       0.00000 |       0.01682 |       0.00131 |       0.73072
     -0.00381 |       0.00000 |       0.01462 |       0.00266 |       0.73349
     -0.00450 |       0.00000 |       0.01401 |       0.00297 |       0.72508
     -0.00645 |       0.00000 |       0.01345 |       0.00432 |       0.73012
     -0.00703 |       0.00000 |       0.01299 |       0.00372 |       0.72131
     -0.00856 |       0.00000 |       0.01262 |       0.00377 |       0.72379
     -0.00857 |       0.00000 |       0.01250 |       0.00419 |       0.72353
     -0.00972 |       0.00000 |       0.01216 |       0.00446 |       0.72454
     -0.01017 |       0.00000 |       0.01198 |       0.00438 |       0.72458
     -0.01113 |       0.00000 |       0.01193 |       0.00473 |       0.71967
Evaluating losses...
     -0.01292 |       0.00000 |       0.01145 |       0.00491 |      

     -0.01147 |       0.00000 |       0.01231 |       0.00516 |       0.71609
     -0.01203 |       0.00000 |       0.01219 |       0.00564 |       0.71470
Evaluating losses...
     -0.01377 |       0.00000 |       0.01168 |       0.00563 |       0.71777
----------------------------------
| EpLenMean       | 643          |
| EpRewMean       | -4.83        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 1385         |
| TimeElapsed     | 1.93e+03     |
| TimestepsSoFar  | 880640       |
| ev_tdlam_before | 0.842        |
| loss_ent        | 0.7177665    |
| loss_kl         | 0.005634329  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013767781 |
| loss_vf_loss    | 0.011683307  |
----------------------------------
********** Iteration 215 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00034 |       0.00000 |       0.01568 |       0.00223 |       0.70986
     -0.00414 |       0.00000 |       0.01336 |

********** Iteration 220 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00020 |       0.00000 |       0.01412 |       0.00104 |       0.70354
     -0.00412 |       0.00000 |       0.01243 |       0.00216 |       0.70048
     -0.00648 |       0.00000 |       0.01176 |       0.00252 |       0.69629
     -0.00800 |       0.00000 |       0.01129 |       0.00286 |       0.69612
     -0.00935 |       0.00000 |       0.01108 |       0.00277 |       0.69498
     -0.01020 |       0.00000 |       0.01067 |       0.00313 |       0.69184
     -0.01097 |       0.00000 |       0.01053 |       0.00364 |       0.69056
     -0.01180 |       0.00000 |       0.01019 |       0.00392 |       0.69052
     -0.01180 |       0.00000 |       0.00994 |       0.00451 |       0.69004
     -0.01280 |       0.00000 |       0.00993 |       0.00459 |       0.68754
Evaluating losses...
     -0.01452 |       0.00000 |       0.00955 |       0.00460 |      

     -0.01026 |       0.00000 |       0.01151 |       0.00494 |       0.65856
     -0.01095 |       0.00000 |       0.01142 |       0.00500 |       0.65765
Evaluating losses...
     -0.01271 |       0.00000 |       0.01114 |       0.00505 |       0.65648
----------------------------------
| EpLenMean       | 630          |
| EpRewMean       | -4.83        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 1457         |
| TimeElapsed     | 1.97e+03     |
| TimestepsSoFar  | 925696       |
| ev_tdlam_before | 0.843        |
| loss_ent        | 0.6564846    |
| loss_kl         | 0.00504923   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012705252 |
| loss_vf_loss    | 0.011136064  |
----------------------------------
********** Iteration 226 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00094 |       0.00000 |       0.01627 |       0.00120 |       0.64066
     -0.00356 |       0.00000 |       0.01320 |

********** Iteration 231 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00193 |       0.00000 |       0.02045 |       0.00133 |       0.67939
     -0.00218 |       0.00000 |       0.01704 |       0.00217 |       0.67888
     -0.00431 |       0.00000 |       0.01598 |       0.00319 |       0.67807
     -0.00611 |       0.00000 |       0.01521 |       0.00441 |       0.67996
     -0.00697 |       0.00000 |       0.01472 |       0.00382 |       0.68153
     -0.00789 |       0.00000 |       0.01427 |       0.00455 |       0.67937
     -0.00856 |       0.00000 |       0.01413 |       0.00419 |       0.68035
     -0.00929 |       0.00000 |       0.01377 |       0.00572 |       0.67992
     -0.01034 |       0.00000 |       0.01337 |       0.00508 |       0.68318
     -0.01029 |       0.00000 |       0.01328 |       0.00512 |       0.68456
Evaluating losses...
     -0.01245 |       0.00000 |       0.01285 |       0.00530 |      

     -0.01093 |       0.00000 |       0.01025 |       0.00425 |       0.68432
     -0.01174 |       0.00000 |       0.01015 |       0.00471 |       0.68296
Evaluating losses...
     -0.01337 |       0.00000 |       0.00981 |       0.00436 |       0.68346
----------------------------------
| EpLenMean       | 637          |
| EpRewMean       | -4.85        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 1528         |
| TimeElapsed     | 2.01e+03     |
| TimestepsSoFar  | 970752       |
| ev_tdlam_before | 0.87         |
| loss_ent        | 0.6834631    |
| loss_kl         | 0.0043632854 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013373848 |
| loss_vf_loss    | 0.0098080225 |
----------------------------------
********** Iteration 237 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00049 |       0.00000 |       0.01371 |       0.00140 |       0.66673
     -0.00483 |       0.00000 |       0.01190 |

********** Iteration 242 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00082 |       0.00000 |       0.01480 |       0.00131 |       0.67179
     -0.00358 |       0.00000 |       0.01335 |       0.00360 |       0.67267
     -0.00621 |       0.00000 |       0.01285 |       0.00352 |       0.66742
     -0.00782 |       0.00000 |       0.01236 |       0.00389 |       0.66725
     -0.00891 |       0.00000 |       0.01197 |       0.00460 |       0.66722
     -0.01029 |       0.00000 |       0.01185 |       0.00543 |       0.66867
     -0.01051 |       0.00000 |       0.01156 |       0.00492 |       0.66475
     -0.01139 |       0.00000 |       0.01136 |       0.00519 |       0.66609
     -0.01204 |       0.00000 |       0.01121 |       0.00523 |       0.66648
     -0.01312 |       0.00000 |       0.01102 |       0.00545 |       0.66467
Evaluating losses...
     -0.01459 |       0.00000 |       0.01070 |       0.00534 |      

     -0.01133 |       0.00000 |       0.00966 |       0.00463 |       0.62812
     -0.01187 |       0.00000 |       0.00954 |       0.00497 |       0.62615
     -0.01293 |       0.00000 |       0.00930 |       0.00542 |       0.62887
Evaluating losses...
     -0.01325 |       0.00000 |       0.00907 |       0.00478 |       0.62846
----------------------------------
| EpLenMean       | 621          |
| EpRewMean       | -4.89        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 1600         |
| TimeElapsed     | 2.4e+03      |
| TimestepsSoFar  | 1015808      |
| ev_tdlam_before | 0.862        |
| loss_ent        | 0.62845665   |
| loss_kl         | 0.004775536  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013252943 |
| loss_vf_loss    | 0.009069     |
----------------------------------
********** Iteration 248 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00188 |       0.00000 |       0.01773 |

********** Iteration 253 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00055 |       0.00000 |       0.01609 |       0.00150 |       0.62228
     -0.00293 |       0.00000 |       0.01401 |       0.00262 |       0.62152
     -0.00575 |       0.00000 |       0.01277 |       0.00307 |       0.61736
     -0.00735 |       0.00000 |       0.01209 |       0.00374 |       0.61694
     -0.00822 |       0.00000 |       0.01157 |       0.00403 |       0.61459
     -0.00961 |       0.00000 |       0.01080 |       0.00493 |       0.61401
     -0.01026 |       0.00000 |       0.01069 |       0.00461 |       0.61224
     -0.01110 |       0.00000 |       0.01041 |       0.00499 |       0.61434
     -0.01156 |       0.00000 |       0.01004 |       0.00549 |       0.61509
     -0.01210 |       0.00000 |       0.00988 |       0.00572 |       0.61471
Evaluating losses...
     -0.01323 |       0.00000 |       0.00978 |       0.00540 |      

     -0.01019 |       0.00000 |       0.00942 |       0.00561 |       0.64952
     -0.01181 |       0.00000 |       0.00925 |       0.00580 |       0.65069
     -0.01230 |       0.00000 |       0.00900 |       0.00610 |       0.64882
Evaluating losses...
     -0.01418 |       0.00000 |       0.00879 |       0.00609 |       0.64911
----------------------------------
| EpLenMean       | 633          |
| EpRewMean       | -4.86        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 1671         |
| TimeElapsed     | 2.44e+03     |
| TimestepsSoFar  | 1060864      |
| ev_tdlam_before | 0.853        |
| loss_ent        | 0.64911115   |
| loss_kl         | 0.0060943025 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014177145 |
| loss_vf_loss    | 0.008789947  |
----------------------------------
********** Iteration 259 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00045 |       0.00000 |       0.01443 |

********** Iteration 264 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00084 |       0.00000 |       0.01274 |       0.00145 |       0.65815
     -0.00405 |       0.00000 |       0.01097 |       0.00248 |       0.65839
     -0.00609 |       0.00000 |       0.01019 |       0.00273 |       0.65789
     -0.00699 |       0.00000 |       0.00968 |       0.00317 |       0.66036
     -0.00842 |       0.00000 |       0.00946 |       0.00354 |       0.65894
     -0.00922 |       0.00000 |       0.00930 |       0.00381 |       0.65981
     -0.01004 |       0.00000 |       0.00916 |       0.00393 |       0.65814
     -0.01078 |       0.00000 |       0.00870 |       0.00447 |       0.66153
     -0.01150 |       0.00000 |       0.00850 |       0.00476 |       0.66156
     -0.01197 |       0.00000 |       0.00847 |       0.00456 |       0.66166
Evaluating losses...
     -0.01338 |       0.00000 |       0.00813 |       0.00559 |      

     -0.01050 |       0.00000 |       0.00929 |       0.00488 |       0.57954
     -0.01074 |       0.00000 |       0.00906 |       0.00530 |       0.57885
Evaluating losses...
     -0.01239 |       0.00000 |       0.00883 |       0.00511 |       0.57907
----------------------------------
| EpLenMean       | 627          |
| EpRewMean       | -4.85        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 1744         |
| TimeElapsed     | 2.48e+03     |
| TimestepsSoFar  | 1105920      |
| ev_tdlam_before | 0.878        |
| loss_ent        | 0.5790703    |
| loss_kl         | 0.00511294   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012387417 |
| loss_vf_loss    | 0.008832816  |
----------------------------------
********** Iteration 270 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00045 |       0.00000 |       0.01306 |       0.00128 |       0.55263
     -0.00295 |       0.00000 |       0.01158 |

********** Iteration 275 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -2.17e-05 |       0.00000 |       0.01205 |       0.00156 |       0.54274
     -0.00520 |       0.00000 |       0.01090 |       0.00318 |       0.53942
     -0.00686 |       0.00000 |       0.01041 |       0.00353 |       0.53975
     -0.00836 |       0.00000 |       0.00994 |       0.00420 |       0.53930
     -0.00990 |       0.00000 |       0.00957 |       0.00442 |       0.53900
     -0.01035 |       0.00000 |       0.00946 |       0.00413 |       0.54244
     -0.01190 |       0.00000 |       0.00919 |       0.00456 |       0.54123
     -0.01250 |       0.00000 |       0.00910 |       0.00540 |       0.53918
     -0.01294 |       0.00000 |       0.00892 |       0.00530 |       0.54136
     -0.01408 |       0.00000 |       0.00878 |       0.00570 |       0.54067
Evaluating losses...
     -0.01596 |       0.00000 |       0.00853 |       0.00587 |      

     -0.01051 |       0.00000 |       0.01047 |       0.00501 |       0.56186
     -0.01133 |       0.00000 |       0.01031 |       0.00574 |       0.56145
Evaluating losses...
     -0.01316 |       0.00000 |       0.00991 |       0.00581 |       0.56267
----------------------------------
| EpLenMean       | 620          |
| EpRewMean       | -4.82        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 1815         |
| TimeElapsed     | 2.51e+03     |
| TimestepsSoFar  | 1150976      |
| ev_tdlam_before | 0.828        |
| loss_ent        | 0.56266624   |
| loss_kl         | 0.005814944  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013161477 |
| loss_vf_loss    | 0.00991259   |
----------------------------------
********** Iteration 281 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00025 |       0.00000 |       0.01968 |       0.00139 |       0.57261
     -0.00425 |       0.00000 |       0.01649 |

********** Iteration 286 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00073 |       0.00000 |       0.01938 |       0.00118 |       0.55579
     -0.00464 |       0.00000 |       0.01740 |       0.00334 |       0.55916
     -0.00731 |       0.00000 |       0.01623 |       0.00287 |       0.55923
     -0.00887 |       0.00000 |       0.01566 |       0.00445 |       0.56142
     -0.00999 |       0.00000 |       0.01523 |       0.00401 |       0.56050
     -0.01153 |       0.00000 |       0.01485 |       0.00482 |       0.56160
     -0.01231 |       0.00000 |       0.01467 |       0.00483 |       0.56042
     -0.01326 |       0.00000 |       0.01427 |       0.00559 |       0.56284
     -0.01386 |       0.00000 |       0.01398 |       0.00620 |       0.56223
     -0.01403 |       0.00000 |       0.01384 |       0.00590 |       0.56240
Evaluating losses...
     -0.01596 |       0.00000 |       0.01331 |       0.00639 |      

     -0.01378 |       0.00000 |       0.01120 |       0.00553 |       0.57899
     -0.01417 |       0.00000 |       0.01104 |       0.00608 |       0.57795
Evaluating losses...
     -0.01603 |       0.00000 |       0.01078 |       0.00700 |       0.57972
----------------------------------
| EpLenMean       | 635          |
| EpRewMean       | -4.81        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 1888         |
| TimeElapsed     | 2.55e+03     |
| TimestepsSoFar  | 1196032      |
| ev_tdlam_before | 0.868        |
| loss_ent        | 0.57972276   |
| loss_kl         | 0.007004333  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016028378 |
| loss_vf_loss    | 0.010775253  |
----------------------------------
********** Iteration 292 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00083 |       0.00000 |       0.01838 |       0.00140 |       0.57114
     -0.00334 |       0.00000 |       0.01571 |

********** Iteration 297 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00109 |       0.00000 |       0.01299 |       0.00085 |       0.56005
     -0.00294 |       0.00000 |       0.01180 |       0.00184 |       0.56042
     -0.00525 |       0.00000 |       0.01082 |       0.00237 |       0.55869
     -0.00596 |       0.00000 |       0.01045 |       0.00302 |       0.55926
     -0.00663 |       0.00000 |       0.00993 |       0.00368 |       0.56067
     -0.00774 |       0.00000 |       0.00980 |       0.00391 |       0.55981
     -0.00876 |       0.00000 |       0.00954 |       0.00423 |       0.56181
     -0.00883 |       0.00000 |       0.00933 |       0.00440 |       0.56149
     -0.00931 |       0.00000 |       0.00918 |       0.00439 |       0.56144
     -0.00958 |       0.00000 |       0.00905 |       0.00490 |       0.56313
Evaluating losses...
     -0.01119 |       0.00000 |       0.00880 |       0.00477 |      

     -0.01261 |       0.00000 |       0.00927 |       0.00542 |       0.57884
     -0.01365 |       0.00000 |       0.00907 |       0.00565 |       0.57788
Evaluating losses...
     -0.01519 |       0.00000 |       0.00873 |       0.00519 |       0.57928
----------------------------------
| EpLenMean       | 630          |
| EpRewMean       | -4.84        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 1959         |
| TimeElapsed     | 2.59e+03     |
| TimestepsSoFar  | 1241088      |
| ev_tdlam_before | 0.871        |
| loss_ent        | 0.5792785    |
| loss_kl         | 0.0051936456 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015186911 |
| loss_vf_loss    | 0.008726706  |
----------------------------------
********** Iteration 303 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00075 |       0.00000 |       0.01536 |       0.00096 |       0.55874
     -0.00315 |       0.00000 |       0.01374 |

********** Iteration 308 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00085 |       0.00000 |       0.01535 |       0.00171 |       0.58540
     -0.00424 |       0.00000 |       0.01321 |       0.00273 |       0.58391
     -0.00624 |       0.00000 |       0.01226 |       0.00347 |       0.58340
     -0.00751 |       0.00000 |       0.01168 |       0.00378 |       0.58196
     -0.00876 |       0.00000 |       0.01128 |       0.00392 |       0.58427
     -0.01055 |       0.00000 |       0.01097 |       0.00445 |       0.58271
     -0.01112 |       0.00000 |       0.01072 |       0.00433 |       0.58474
     -0.01189 |       0.00000 |       0.01049 |       0.00533 |       0.58392
     -0.01277 |       0.00000 |       0.01029 |       0.00498 |       0.58492
     -0.01296 |       0.00000 |       0.01024 |       0.00562 |       0.58419
Evaluating losses...
     -0.01426 |       0.00000 |       0.00995 |       0.00587 |      

     -0.01128 |       0.00000 |       0.00792 |       0.00421 |       0.56933
     -0.01184 |       0.00000 |       0.00782 |       0.00475 |       0.56829
Evaluating losses...
     -0.01304 |       0.00000 |       0.00763 |       0.00445 |       0.56872
---------------------------------
| EpLenMean       | 650         |
| EpRewMean       | -4.87       |
| EpThisIter      | 7           |
| EpisodesSoFar   | 2030        |
| TimeElapsed     | 2.98e+03    |
| TimestepsSoFar  | 1286144     |
| ev_tdlam_before | 0.868       |
| loss_ent        | 0.5687157   |
| loss_kl         | 0.004448764 |
| loss_pol_entpen | 0.0         |
| loss_pol_surr   | -0.01303651 |
| loss_vf_loss    | 0.00762601  |
---------------------------------
********** Iteration 314 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00048 |       0.00000 |       0.01331 |       0.00120 |       0.56764
     -0.00415 |       0.00000 |       0.01205 |       0.00231

********** Iteration 319 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00089 |       0.00000 |       0.01217 |       0.00083 |       0.54714
     -0.00309 |       0.00000 |       0.01053 |       0.00214 |       0.54438
     -0.00563 |       0.00000 |       0.00991 |       0.00265 |       0.54559
     -0.00626 |       0.00000 |       0.00960 |       0.00280 |       0.54381
     -0.00780 |       0.00000 |       0.00928 |       0.00313 |       0.54436
     -0.00891 |       0.00000 |       0.00903 |       0.00329 |       0.54588
     -0.00963 |       0.00000 |       0.00882 |       0.00356 |       0.54663
     -0.00994 |       0.00000 |       0.00870 |       0.00377 |       0.54663
     -0.01109 |       0.00000 |       0.00853 |       0.00403 |       0.54719
     -0.01173 |       0.00000 |       0.00842 |       0.00446 |       0.54709
Evaluating losses...
     -0.01258 |       0.00000 |       0.00808 |       0.00495 |      

     -0.01083 |       0.00000 |       0.00960 |       0.00530 |       0.57307
     -0.01116 |       0.00000 |       0.00943 |       0.00559 |       0.57385
Evaluating losses...
     -0.01253 |       0.00000 |       0.00915 |       0.00528 |       0.57295
----------------------------------
| EpLenMean       | 619          |
| EpRewMean       | -4.87        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 2104         |
| TimeElapsed     | 3.03e+03     |
| TimestepsSoFar  | 1331200      |
| ev_tdlam_before | 0.852        |
| loss_ent        | 0.57294935   |
| loss_kl         | 0.005276022  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012533046 |
| loss_vf_loss    | 0.009153037  |
----------------------------------
********** Iteration 325 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     8.93e-05 |       0.00000 |       0.01369 |       0.00134 |       0.54438
     -0.00349 |       0.00000 |       0.01170 |

********** Iteration 330 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00160 |       0.00000 |       0.01795 |       0.00140 |       0.55837
     -0.00480 |       0.00000 |       0.01539 |       0.00216 |       0.56233
     -0.00738 |       0.00000 |       0.01448 |       0.00255 |       0.56215
     -0.00855 |       0.00000 |       0.01396 |       0.00321 |       0.56287
     -0.00978 |       0.00000 |       0.01328 |       0.00377 |       0.56451
     -0.01058 |       0.00000 |       0.01307 |       0.00389 |       0.56446
     -0.01147 |       0.00000 |       0.01283 |       0.00431 |       0.56636
     -0.01200 |       0.00000 |       0.01248 |       0.00473 |       0.56762
     -0.01307 |       0.00000 |       0.01221 |       0.00480 |       0.56692
     -0.01355 |       0.00000 |       0.01195 |       0.00510 |       0.56798
Evaluating losses...
     -0.01436 |       0.00000 |       0.01180 |       0.00546 |      

     -0.01076 |       0.00000 |       0.00881 |       0.00436 |       0.57630
     -0.01152 |       0.00000 |       0.00862 |       0.00460 |       0.57529
Evaluating losses...
     -0.01305 |       0.00000 |       0.00843 |       0.00478 |       0.57700
----------------------------------
| EpLenMean       | 625          |
| EpRewMean       | -4.79        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 2176         |
| TimeElapsed     | 3.06e+03     |
| TimestepsSoFar  | 1376256      |
| ev_tdlam_before | 0.874        |
| loss_ent        | 0.5769986    |
| loss_kl         | 0.004784602  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013048969 |
| loss_vf_loss    | 0.008430225  |
----------------------------------
********** Iteration 336 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00028 |       0.00000 |       0.01419 |       0.00088 |       0.58357
     -0.00282 |       0.00000 |       0.01239 |

********** Iteration 341 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00014 |       0.00000 |       0.01394 |       0.00189 |       0.59476
     -0.00417 |       0.00000 |       0.01272 |       0.00302 |       0.59853
     -0.00635 |       0.00000 |       0.01191 |       0.00410 |       0.59917
     -0.00735 |       0.00000 |       0.01137 |       0.00326 |       0.60070
     -0.00740 |       0.00000 |       0.01103 |       0.00499 |       0.59982
     -0.00915 |       0.00000 |       0.01074 |       0.00452 |       0.59753
     -0.01021 |       0.00000 |       0.01059 |       0.00474 |       0.59706
     -0.01105 |       0.00000 |       0.01041 |       0.00502 |       0.59863
     -0.01166 |       0.00000 |       0.01023 |       0.00534 |       0.59835
     -0.01153 |       0.00000 |       0.01021 |       0.00568 |       0.59987
Evaluating losses...
     -0.01371 |       0.00000 |       0.00968 |       0.00579 |      

     -0.01449 |       0.00000 |       0.00964 |       0.00656 |       0.57749
     -0.01513 |       0.00000 |       0.00954 |       0.00610 |       0.57763
Evaluating losses...
     -0.01661 |       0.00000 |       0.00928 |       0.00667 |       0.57793
----------------------------------
| EpLenMean       | 619          |
| EpRewMean       | -4.86        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 2249         |
| TimeElapsed     | 3.11e+03     |
| TimestepsSoFar  | 1421312      |
| ev_tdlam_before | 0.821        |
| loss_ent        | 0.577935     |
| loss_kl         | 0.0066698897 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016609374 |
| loss_vf_loss    | 0.009279001  |
----------------------------------
********** Iteration 347 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00180 |       0.00000 |       0.01399 |       0.00124 |       0.59806
     -0.00242 |       0.00000 |       0.01203 |

********** Iteration 352 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00148 |       0.00000 |       0.01444 |       0.00096 |       0.58671
     -0.00242 |       0.00000 |       0.01243 |       0.00154 |       0.58136
     -0.00396 |       0.00000 |       0.01154 |       0.00260 |       0.57558
     -0.00582 |       0.00000 |       0.01098 |       0.00285 |       0.57622
     -0.00665 |       0.00000 |       0.01060 |       0.00289 |       0.57417
     -0.00837 |       0.00000 |       0.01053 |       0.00354 |       0.57225
     -0.00846 |       0.00000 |       0.01016 |       0.00383 |       0.57118
     -0.00942 |       0.00000 |       0.01000 |       0.00379 |       0.56984
     -0.01007 |       0.00000 |       0.00966 |       0.00417 |       0.56866
     -0.01000 |       0.00000 |       0.00973 |       0.00451 |       0.56862
Evaluating losses...
     -0.01108 |       0.00000 |       0.00922 |       0.00477 |      

     -0.01438 |       0.00000 |       0.00975 |       0.00604 |       0.59593
     -0.01517 |       0.00000 |       0.00949 |       0.00637 |       0.59657
Evaluating losses...
     -0.01647 |       0.00000 |       0.00914 |       0.00668 |       0.59737
----------------------------------
| EpLenMean       | 609          |
| EpRewMean       | -4.83        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 2323         |
| TimeElapsed     | 3.15e+03     |
| TimestepsSoFar  | 1466368      |
| ev_tdlam_before | 0.854        |
| loss_ent        | 0.59737396   |
| loss_kl         | 0.0066837915 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01646537  |
| loss_vf_loss    | 0.009137597  |
----------------------------------
********** Iteration 358 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -8.22e-05 |       0.00000 |       0.01299 |       0.00128 |       0.57945
     -0.00423 |       0.00000 |       0.01124 |

********** Iteration 363 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00073 |       0.00000 |       0.01403 |       0.00188 |       0.53041
     -0.00419 |       0.00000 |       0.01170 |       0.00274 |       0.52697
     -0.00599 |       0.00000 |       0.01085 |       0.00267 |       0.52360
     -0.00760 |       0.00000 |       0.01052 |       0.00376 |       0.52134
     -0.00816 |       0.00000 |       0.01005 |       0.00358 |       0.51992
     -0.00979 |       0.00000 |       0.00961 |       0.00436 |       0.51893
     -0.01046 |       0.00000 |       0.00945 |       0.00390 |       0.51868
     -0.01098 |       0.00000 |       0.00932 |       0.00432 |       0.51937
     -0.01171 |       0.00000 |       0.00919 |       0.00460 |       0.51955
     -0.01246 |       0.00000 |       0.00889 |       0.00471 |       0.51923
Evaluating losses...
     -0.01412 |       0.00000 |       0.00865 |       0.00458 |      

     -0.01235 |       0.00000 |       0.01191 |       0.00639 |       0.53765
     -0.01309 |       0.00000 |       0.01168 |       0.00642 |       0.53595
     -0.01359 |       0.00000 |       0.01145 |       0.00660 |       0.53538
Evaluating losses...
     -0.01477 |       0.00000 |       0.01113 |       0.00704 |       0.53616
----------------------------------
| EpLenMean       | 633          |
| EpRewMean       | -4.88        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 2393         |
| TimeElapsed     | 3.7e+03      |
| TimestepsSoFar  | 1511424      |
| ev_tdlam_before | 0.799        |
| loss_ent        | 0.53615636   |
| loss_kl         | 0.0070375656 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014765452 |
| loss_vf_loss    | 0.011131945  |
----------------------------------
********** Iteration 369 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00076 |       0.00000 |       0.01367 |

********** Iteration 374 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00090 |       0.00000 |       0.01543 |       0.00201 |       0.55126
     -0.00605 |       0.00000 |       0.01292 |       0.00247 |       0.55008
     -0.00884 |       0.00000 |       0.01184 |       0.00333 |       0.55086
     -0.01043 |       0.00000 |       0.01126 |       0.00427 |       0.55348
     -0.01173 |       0.00000 |       0.01068 |       0.00423 |       0.55473
     -0.01289 |       0.00000 |       0.01030 |       0.00492 |       0.55470
     -0.01379 |       0.00000 |       0.00994 |       0.00514 |       0.55712
     -0.01448 |       0.00000 |       0.00970 |       0.00529 |       0.55589
     -0.01543 |       0.00000 |       0.00945 |       0.00586 |       0.55722
     -0.01614 |       0.00000 |       0.00922 |       0.00636 |       0.55767
Evaluating losses...
     -0.01759 |       0.00000 |       0.00892 |       0.00614 |      

     -0.01283 |       0.00000 |       0.00982 |       0.00504 |       0.52142
     -0.01301 |       0.00000 |       0.00966 |       0.00584 |       0.52189
Evaluating losses...
     -0.01458 |       0.00000 |       0.00927 |       0.00557 |       0.52153
----------------------------------
| EpLenMean       | 641          |
| EpRewMean       | -4.87        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 2463         |
| TimeElapsed     | 3.77e+03     |
| TimestepsSoFar  | 1556480      |
| ev_tdlam_before | 0.874        |
| loss_ent        | 0.5215323    |
| loss_kl         | 0.0055744727 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014584461 |
| loss_vf_loss    | 0.009271548  |
----------------------------------
********** Iteration 380 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00016 |       0.00000 |       0.01980 |       0.00102 |       0.54651
     -0.00541 |       0.00000 |       0.01666 |

********** Iteration 385 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00026 |       0.00000 |       0.01607 |       0.00102 |       0.52209
     -0.00552 |       0.00000 |       0.01387 |       0.00258 |       0.52097
     -0.00839 |       0.00000 |       0.01272 |       0.00264 |       0.52061
     -0.01019 |       0.00000 |       0.01207 |       0.00343 |       0.51898
     -0.01071 |       0.00000 |       0.01156 |       0.00345 |       0.52008
     -0.01179 |       0.00000 |       0.01132 |       0.00330 |       0.51922
     -0.01278 |       0.00000 |       0.01079 |       0.00379 |       0.51885
     -0.01287 |       0.00000 |       0.01064 |       0.00413 |       0.51931
     -0.01357 |       0.00000 |       0.01040 |       0.00448 |       0.51981
     -0.01456 |       0.00000 |       0.01014 |       0.00456 |       0.51881
Evaluating losses...
     -0.01598 |       0.00000 |       0.00997 |       0.00435 |      

     -0.01386 |       0.00000 |       0.00993 |       0.00501 |       0.52384
     -0.01474 |       0.00000 |       0.00965 |       0.00502 |       0.52345
Evaluating losses...
     -0.01611 |       0.00000 |       0.00932 |       0.00573 |       0.52351
----------------------------------
| EpLenMean       | 637          |
| EpRewMean       | -4.84        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 2535         |
| TimeElapsed     | 3.83e+03     |
| TimestepsSoFar  | 1601536      |
| ev_tdlam_before | 0.824        |
| loss_ent        | 0.5235061    |
| loss_kl         | 0.0057290955 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016111426 |
| loss_vf_loss    | 0.0093151275 |
----------------------------------
********** Iteration 391 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00072 |       0.00000 |       0.01506 |       0.00101 |       0.52938
     -0.00444 |       0.00000 |       0.01241 |

********** Iteration 396 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00110 |       0.00000 |       0.01705 |       0.00076 |       0.50924
     -0.00483 |       0.00000 |       0.01398 |       0.00199 |       0.50858
     -0.00802 |       0.00000 |       0.01284 |       0.00275 |       0.50577
     -0.00942 |       0.00000 |       0.01197 |       0.00351 |       0.50486
     -0.01108 |       0.00000 |       0.01143 |       0.00342 |       0.50462
     -0.01223 |       0.00000 |       0.01098 |       0.00411 |       0.50465
     -0.01288 |       0.00000 |       0.01063 |       0.00457 |       0.50519
     -0.01349 |       0.00000 |       0.01037 |       0.00481 |       0.50435
     -0.01477 |       0.00000 |       0.01015 |       0.00480 |       0.50456
     -0.01508 |       0.00000 |       0.01001 |       0.00500 |       0.50336
Evaluating losses...
     -0.01651 |       0.00000 |       0.00947 |       0.00519 |      

     -0.01273 |       0.00000 |       0.01049 |       0.00467 |       0.51415
     -0.01317 |       0.00000 |       0.01025 |       0.00509 |       0.51526
Evaluating losses...
     -0.01512 |       0.00000 |       0.01008 |       0.00505 |       0.51458
----------------------------------
| EpLenMean       | 646          |
| EpRewMean       | -4.82        |
| EpThisIter      | 8            |
| EpisodesSoFar   | 2605         |
| TimeElapsed     | 3.88e+03     |
| TimestepsSoFar  | 1646592      |
| ev_tdlam_before | 0.847        |
| loss_ent        | 0.5145813    |
| loss_kl         | 0.0050488967 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015117246 |
| loss_vf_loss    | 0.010083488  |
----------------------------------
********** Iteration 402 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00112 |       0.00000 |       0.01438 |       0.00091 |       0.50232
     -0.00567 |       0.00000 |       0.01275 |

********** Iteration 407 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00051 |       0.00000 |       0.01559 |       0.00122 |       0.51783
     -0.00478 |       0.00000 |       0.01233 |       0.00234 |       0.51928
     -0.00809 |       0.00000 |       0.01139 |       0.00267 |       0.51746
     -0.00965 |       0.00000 |       0.01084 |       0.00318 |       0.51702
     -0.01099 |       0.00000 |       0.01041 |       0.00388 |       0.51566
     -0.01230 |       0.00000 |       0.01013 |       0.00397 |       0.51533
     -0.01338 |       0.00000 |       0.00973 |       0.00475 |       0.51564
     -0.01418 |       0.00000 |       0.00952 |       0.00487 |       0.51441
     -0.01446 |       0.00000 |       0.00929 |       0.00512 |       0.51491
     -0.01489 |       0.00000 |       0.00910 |       0.00557 |       0.51565
Evaluating losses...
     -0.01717 |       0.00000 |       0.00882 |       0.00562 |      

     -0.01322 |       0.00000 |       0.00973 |       0.00484 |       0.51043
     -0.01405 |       0.00000 |       0.00946 |       0.00510 |       0.51045
Evaluating losses...
     -0.01558 |       0.00000 |       0.00909 |       0.00552 |       0.51102
----------------------------------
| EpLenMean       | 672          |
| EpRewMean       | -4.81        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 2672         |
| TimeElapsed     | 3.94e+03     |
| TimestepsSoFar  | 1691648      |
| ev_tdlam_before | 0.824        |
| loss_ent        | 0.5110158    |
| loss_kl         | 0.005523871  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015579063 |
| loss_vf_loss    | 0.009094709  |
----------------------------------
********** Iteration 413 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00042 |       0.00000 |       0.01671 |       0.00087 |       0.50950
     -0.00252 |       0.00000 |       0.01378 |

********** Iteration 418 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00119 |       0.00000 |       0.01840 |       0.00144 |       0.52840
     -0.00444 |       0.00000 |       0.01598 |       0.00193 |       0.53090
     -0.00681 |       0.00000 |       0.01481 |       0.00308 |       0.53358
     -0.00897 |       0.00000 |       0.01405 |       0.00356 |       0.53488
     -0.01010 |       0.00000 |       0.01332 |       0.00390 |       0.53534
     -0.01113 |       0.00000 |       0.01288 |       0.00469 |       0.53760
     -0.01203 |       0.00000 |       0.01240 |       0.00512 |       0.53895
     -0.01279 |       0.00000 |       0.01207 |       0.00482 |       0.53761
     -0.01342 |       0.00000 |       0.01179 |       0.00512 |       0.53979
     -0.01337 |       0.00000 |       0.01152 |       0.00561 |       0.53989
Evaluating losses...
     -0.01516 |       0.00000 |       0.01106 |       0.00583 |      

     -0.01340 |       0.00000 |       0.00878 |       0.00528 |       0.56786
     -0.01405 |       0.00000 |       0.00861 |       0.00555 |       0.56969
Evaluating losses...
     -0.01605 |       0.00000 |       0.00834 |       0.00575 |       0.56875
----------------------------------
| EpLenMean       | 627          |
| EpRewMean       | -4.86        |
| EpThisIter      | 5            |
| EpisodesSoFar   | 2745         |
| TimeElapsed     | 3.99e+03     |
| TimestepsSoFar  | 1736704      |
| ev_tdlam_before | 0.859        |
| loss_ent        | 0.5687502    |
| loss_kl         | 0.005750033  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016049093 |
| loss_vf_loss    | 0.008338811  |
----------------------------------
********** Iteration 424 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00088 |       0.00000 |       0.02146 |       0.00151 |       0.55195
     -0.00500 |       0.00000 |       0.01709 |

********** Iteration 429 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00097 |       0.00000 |       0.01429 |       0.00120 |       0.58248
     -0.00397 |       0.00000 |       0.01267 |       0.00258 |       0.58319
     -0.00644 |       0.00000 |       0.01177 |       0.00282 |       0.58366
     -0.00821 |       0.00000 |       0.01123 |       0.00296 |       0.58338
     -0.00935 |       0.00000 |       0.01076 |       0.00354 |       0.58344
     -0.01040 |       0.00000 |       0.01042 |       0.00374 |       0.58326
     -0.01078 |       0.00000 |       0.01026 |       0.00400 |       0.58233
     -0.01141 |       0.00000 |       0.00996 |       0.00406 |       0.58234
     -0.01180 |       0.00000 |       0.00987 |       0.00461 |       0.58281
     -0.01287 |       0.00000 |       0.00963 |       0.00498 |       0.58293
Evaluating losses...
     -0.01410 |       0.00000 |       0.00931 |       0.00491 |      

     -0.01610 |       0.00000 |       0.00900 |       0.00659 |       0.57981
     -0.01664 |       0.00000 |       0.00870 |       0.00696 |       0.58100
Evaluating losses...
     -0.01843 |       0.00000 |       0.00846 |       0.00758 |       0.58186
----------------------------------
| EpLenMean       | 640          |
| EpRewMean       | -4.85        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 2817         |
| TimeElapsed     | 4.51e+03     |
| TimestepsSoFar  | 1781760      |
| ev_tdlam_before | 0.871        |
| loss_ent        | 0.58185714   |
| loss_kl         | 0.0075783306 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018429574 |
| loss_vf_loss    | 0.008460555  |
----------------------------------
********** Iteration 435 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00078 |       0.00000 |       0.01773 |       0.00109 |       0.58569
     -0.00457 |       0.00000 |       0.01462 |

********** Iteration 440 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00013 |       0.00000 |       0.01501 |       0.00094 |       0.55832
     -0.00665 |       0.00000 |       0.01300 |       0.00248 |       0.56124
     -0.00981 |       0.00000 |       0.01200 |       0.00275 |       0.56062
     -0.01215 |       0.00000 |       0.01145 |       0.00347 |       0.56207
     -0.01384 |       0.00000 |       0.01103 |       0.00373 |       0.56319
     -0.01551 |       0.00000 |       0.01070 |       0.00409 |       0.56458
     -0.01625 |       0.00000 |       0.01044 |       0.00506 |       0.56397
     -0.01711 |       0.00000 |       0.01022 |       0.00496 |       0.56550
     -0.01717 |       0.00000 |       0.01003 |       0.00571 |       0.56521
     -0.01842 |       0.00000 |       0.00986 |       0.00604 |       0.56523
Evaluating losses...
     -0.01917 |       0.00000 |       0.00958 |       0.00667 |      

     -0.01598 |       0.00000 |       0.01019 |       0.00520 |       0.56216
     -0.01655 |       0.00000 |       0.01002 |       0.00598 |       0.55994
Evaluating losses...
     -0.01785 |       0.00000 |       0.00969 |       0.00614 |       0.56079
----------------------------------
| EpLenMean       | 640          |
| EpRewMean       | -4.81        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 2889         |
| TimeElapsed     | 4.55e+03     |
| TimestepsSoFar  | 1826816      |
| ev_tdlam_before | 0.853        |
| loss_ent        | 0.56079006   |
| loss_kl         | 0.006136096  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.017848404 |
| loss_vf_loss    | 0.0096919965 |
----------------------------------
********** Iteration 446 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00027 |       0.00000 |       0.01533 |       0.00181 |       0.57860
     -0.00519 |       0.00000 |       0.01301 |

********** Iteration 451 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00041 |       0.00000 |       0.01796 |       0.00110 |       0.57631
     -0.00733 |       0.00000 |       0.01515 |       0.00300 |       0.57649
     -0.00937 |       0.00000 |       0.01401 |       0.00378 |       0.57803
     -0.01149 |       0.00000 |       0.01340 |       0.00405 |       0.58070
     -0.01301 |       0.00000 |       0.01274 |       0.00457 |       0.58183
     -0.01454 |       0.00000 |       0.01238 |       0.00508 |       0.58189
     -0.01525 |       0.00000 |       0.01205 |       0.00544 |       0.58209
     -0.01528 |       0.00000 |       0.01190 |       0.00571 |       0.58257
     -0.01666 |       0.00000 |       0.01149 |       0.00611 |       0.58311
     -0.01745 |       0.00000 |       0.01121 |       0.00673 |       0.58427
Evaluating losses...
     -0.01904 |       0.00000 |       0.01089 |       0.00661 |      

     -0.01574 |       0.00000 |       0.01136 |       0.00534 |       0.55128
     -0.01688 |       0.00000 |       0.01102 |       0.00615 |       0.55275
Evaluating losses...
     -0.01838 |       0.00000 |       0.01086 |       0.00607 |       0.55083
----------------------------------
| EpLenMean       | 620          |
| EpRewMean       | -4.74        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 2961         |
| TimeElapsed     | 4.6e+03      |
| TimestepsSoFar  | 1871872      |
| ev_tdlam_before | 0.878        |
| loss_ent        | 0.5508341    |
| loss_kl         | 0.0060687494 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018383808 |
| loss_vf_loss    | 0.010861706  |
----------------------------------
********** Iteration 457 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00154 |       0.00000 |       0.01304 |       0.00053 |       0.54698
     -0.00448 |       0.00000 |       0.01059 |

********** Iteration 462 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00110 |       0.00000 |       0.01260 |       0.00088 |       0.56811
     -0.00437 |       0.00000 |       0.01092 |       0.00282 |       0.56839
     -0.00732 |       0.00000 |       0.01015 |       0.00352 |       0.56580
     -0.00907 |       0.00000 |       0.00968 |       0.00375 |       0.56402
     -0.01031 |       0.00000 |       0.00932 |       0.00431 |       0.56324
     -0.01144 |       0.00000 |       0.00917 |       0.00476 |       0.56303
     -0.01316 |       0.00000 |       0.00900 |       0.00506 |       0.56111
     -0.01346 |       0.00000 |       0.00892 |       0.00530 |       0.56205
     -0.01515 |       0.00000 |       0.00863 |       0.00564 |       0.56044
     -0.01569 |       0.00000 |       0.00861 |       0.00641 |       0.56035
Evaluating losses...
     -0.01662 |       0.00000 |       0.00825 |       0.00717 |      

     -0.01325 |       0.00000 |       0.00935 |       0.00518 |       0.55181
     -0.01423 |       0.00000 |       0.00917 |       0.00514 |       0.55201
Evaluating losses...
     -0.01630 |       0.00000 |       0.00889 |       0.00533 |       0.55209
----------------------------------
| EpLenMean       | 640          |
| EpRewMean       | -4.84        |
| EpThisIter      | 5            |
| EpisodesSoFar   | 3031         |
| TimeElapsed     | 4.66e+03     |
| TimestepsSoFar  | 1916928      |
| ev_tdlam_before | 0.832        |
| loss_ent        | 0.55208874   |
| loss_kl         | 0.0053341608 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016300641 |
| loss_vf_loss    | 0.00888947   |
----------------------------------
********** Iteration 468 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00010 |       0.00000 |       0.01298 |       0.00196 |       0.54635
     -0.00595 |       0.00000 |       0.01152 |

********** Iteration 473 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00092 |       0.00000 |       0.01354 |       0.00062 |       0.54607
     -0.00464 |       0.00000 |       0.01080 |       0.00218 |       0.54746
     -0.00650 |       0.00000 |       0.01016 |       0.00271 |       0.54524
     -0.00829 |       0.00000 |       0.00964 |       0.00302 |       0.54522
     -0.00988 |       0.00000 |       0.00932 |       0.00362 |       0.54669
     -0.01043 |       0.00000 |       0.00899 |       0.00343 |       0.54595
     -0.01087 |       0.00000 |       0.00876 |       0.00380 |       0.54622
     -0.01225 |       0.00000 |       0.00860 |       0.00437 |       0.54615
     -0.01305 |       0.00000 |       0.00834 |       0.00432 |       0.54656
     -0.01341 |       0.00000 |       0.00823 |       0.00463 |       0.54639
Evaluating losses...
     -0.01450 |       0.00000 |       0.00796 |       0.00473 |      

     -0.01425 |       0.00000 |       0.01073 |       0.00600 |       0.53144
     -0.01486 |       0.00000 |       0.01058 |       0.00617 |       0.53046
Evaluating losses...
     -0.01612 |       0.00000 |       0.01018 |       0.00647 |       0.52953
----------------------------------
| EpLenMean       | 660          |
| EpRewMean       | -4.85        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 3099         |
| TimeElapsed     | 4.71e+03     |
| TimestepsSoFar  | 1961984      |
| ev_tdlam_before | 0.869        |
| loss_ent        | 0.52953005   |
| loss_kl         | 0.006468405  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016119337 |
| loss_vf_loss    | 0.010184389  |
----------------------------------
********** Iteration 479 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00096 |       0.00000 |       0.01693 |       0.00126 |       0.52701
     -0.00537 |       0.00000 |       0.01397 |

********** Iteration 484 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00034 |       0.00000 |       0.01243 |       0.00101 |       0.56526
     -0.00484 |       0.00000 |       0.01112 |       0.00314 |       0.56545
     -0.00634 |       0.00000 |       0.01031 |       0.00318 |       0.56607
     -0.00818 |       0.00000 |       0.00992 |       0.00361 |       0.56647
     -0.00922 |       0.00000 |       0.00942 |       0.00376 |       0.56642
     -0.00996 |       0.00000 |       0.00923 |       0.00385 |       0.56763
     -0.01107 |       0.00000 |       0.00910 |       0.00470 |       0.56813
     -0.01229 |       0.00000 |       0.00877 |       0.00493 |       0.56865
     -0.01265 |       0.00000 |       0.00860 |       0.00499 |       0.56983
     -0.01340 |       0.00000 |       0.00846 |       0.00587 |       0.56920
Evaluating losses...
     -0.01496 |       0.00000 |       0.00827 |       0.00563 |      

     -0.01512 |       0.00000 |       0.01043 |       0.00535 |       0.56168
     -0.01603 |       0.00000 |       0.01025 |       0.00577 |       0.56230
     -0.01706 |       0.00000 |       0.01005 |       0.00615 |       0.56182
Evaluating losses...
     -0.01899 |       0.00000 |       0.00966 |       0.00625 |       0.56150
----------------------------------
| EpLenMean       | 645          |
| EpRewMean       | -4.91        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 3169         |
| TimeElapsed     | 5.25e+03     |
| TimestepsSoFar  | 2007040      |
| ev_tdlam_before | 0.862        |
| loss_ent        | 0.5615       |
| loss_kl         | 0.0062542935 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018988663 |
| loss_vf_loss    | 0.009662436  |
----------------------------------
********** Iteration 490 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00040 |       0.00000 |       0.01828 |

********** Iteration 495 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -3.72e-05 |       0.00000 |       0.01726 |       0.00132 |       0.62477
     -0.00540 |       0.00000 |       0.01471 |       0.00342 |       0.62507
     -0.00746 |       0.00000 |       0.01371 |       0.00423 |       0.62357
     -0.00989 |       0.00000 |       0.01317 |       0.00433 |       0.62483
     -0.01138 |       0.00000 |       0.01269 |       0.00458 |       0.62452
     -0.01220 |       0.00000 |       0.01222 |       0.00473 |       0.62154
     -0.01309 |       0.00000 |       0.01188 |       0.00516 |       0.62410
     -0.01362 |       0.00000 |       0.01165 |       0.00586 |       0.62351
     -0.01443 |       0.00000 |       0.01153 |       0.00624 |       0.62238
     -0.01469 |       0.00000 |       0.01136 |       0.00608 |       0.62259
Evaluating losses...
     -0.01618 |       0.00000 |       0.01098 |       0.00632 |      

     -0.01497 |       0.00000 |       0.00951 |       0.00484 |       0.57320
     -0.01587 |       0.00000 |       0.00946 |       0.00540 |       0.57366
Evaluating losses...
     -0.01757 |       0.00000 |       0.00923 |       0.00594 |       0.57347
----------------------------------
| EpLenMean       | 635          |
| EpRewMean       | -4.81        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 3241         |
| TimeElapsed     | 5.3e+03      |
| TimestepsSoFar  | 2052096      |
| ev_tdlam_before | 0.864        |
| loss_ent        | 0.57346857   |
| loss_kl         | 0.0059363404 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.017570227 |
| loss_vf_loss    | 0.009228542  |
----------------------------------
********** Iteration 501 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00030 |       0.00000 |       0.01277 |       0.00153 |       0.56826
     -0.00472 |       0.00000 |       0.01127 |

********** Iteration 506 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00092 |       0.00000 |       0.02046 |       0.00128 |       0.58978
     -0.00672 |       0.00000 |       0.01641 |       0.00293 |       0.58570
     -0.00875 |       0.00000 |       0.01492 |       0.00389 |       0.58422
     -0.01170 |       0.00000 |       0.01382 |       0.00391 |       0.58229
     -0.01297 |       0.00000 |       0.01311 |       0.00458 |       0.58156
     -0.01394 |       0.00000 |       0.01279 |       0.00481 |       0.58184
     -0.01486 |       0.00000 |       0.01237 |       0.00516 |       0.58104
     -0.01608 |       0.00000 |       0.01203 |       0.00528 |       0.58046
     -0.01694 |       0.00000 |       0.01167 |       0.00580 |       0.58035
     -0.01788 |       0.00000 |       0.01158 |       0.00627 |       0.58018
Evaluating losses...
     -0.01974 |       0.00000 |       0.01113 |       0.00614 |      

     -0.01592 |       0.00000 |       0.00894 |       0.00535 |       0.59168
     -0.01660 |       0.00000 |       0.00876 |       0.00524 |       0.59148
Evaluating losses...
     -0.01816 |       0.00000 |       0.00857 |       0.00584 |       0.58922
----------------------------------
| EpLenMean       | 629          |
| EpRewMean       | -4.79        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 3313         |
| TimeElapsed     | 5.35e+03     |
| TimestepsSoFar  | 2097152      |
| ev_tdlam_before | 0.858        |
| loss_ent        | 0.5892183    |
| loss_kl         | 0.005835017  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018155914 |
| loss_vf_loss    | 0.00857499   |
----------------------------------
********** Iteration 512 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -2.34e-05 |       0.00000 |       0.01657 |       0.00211 |       0.60393
     -0.00787 |       0.00000 |       0.01279 |

********** Iteration 517 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -9.22e-05 |       0.00000 |       0.01400 |       0.00159 |       0.62334
     -0.00625 |       0.00000 |       0.01236 |       0.00293 |       0.62184
     -0.00851 |       0.00000 |       0.01134 |       0.00268 |       0.62430
     -0.01046 |       0.00000 |       0.01081 |       0.00362 |       0.62205
     -0.01157 |       0.00000 |       0.01034 |       0.00349 |       0.62233
     -0.01279 |       0.00000 |       0.01017 |       0.00394 |       0.62289
     -0.01379 |       0.00000 |       0.01001 |       0.00478 |       0.62074
     -0.01440 |       0.00000 |       0.00986 |       0.00445 |       0.62290
     -0.01536 |       0.00000 |       0.00968 |       0.00558 |       0.62047
     -0.01630 |       0.00000 |       0.00949 |       0.00537 |       0.62388
Evaluating losses...
     -0.01758 |       0.00000 |       0.00922 |       0.00577 |      

     -0.01683 |       0.00000 |       0.01031 |       0.00559 |       0.60465
     -0.01751 |       0.00000 |       0.01015 |       0.00589 |       0.60372
Evaluating losses...
     -0.01930 |       0.00000 |       0.00977 |       0.00583 |       0.60396
----------------------------------
| EpLenMean       | 631          |
| EpRewMean       | -4.8         |
| EpThisIter      | 6            |
| EpisodesSoFar   | 3384         |
| TimeElapsed     | 5.4e+03      |
| TimestepsSoFar  | 2142208      |
| ev_tdlam_before | 0.843        |
| loss_ent        | 0.6039585    |
| loss_kl         | 0.0058312537 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01929938  |
| loss_vf_loss    | 0.009769282  |
----------------------------------
********** Iteration 523 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00128 |       0.00000 |       0.01448 |       0.00086 |       0.62047
     -0.00479 |       0.00000 |       0.01249 |

********** Iteration 528 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00141 |       0.00000 |       0.01370 |       0.00118 |       0.60863
     -0.00404 |       0.00000 |       0.01183 |       0.00188 |       0.60564
     -0.00678 |       0.00000 |       0.01117 |       0.00273 |       0.60460
     -0.00795 |       0.00000 |       0.01068 |       0.00350 |       0.60393
     -0.01039 |       0.00000 |       0.01046 |       0.00349 |       0.60353
     -0.01068 |       0.00000 |       0.01010 |       0.00435 |       0.60249
     -0.01110 |       0.00000 |       0.00991 |       0.00475 |       0.60324
     -0.01251 |       0.00000 |       0.00974 |       0.00503 |       0.60398
     -0.01313 |       0.00000 |       0.00946 |       0.00531 |       0.60236
     -0.01388 |       0.00000 |       0.00942 |       0.00552 |       0.60424
Evaluating losses...
     -0.01573 |       0.00000 |       0.00901 |       0.00628 |      

     -0.01910 |       0.00000 |       0.01141 |       0.00676 |       0.65371
     -0.01947 |       0.00000 |       0.01137 |       0.00703 |       0.65327
Evaluating losses...
     -0.02135 |       0.00000 |       0.01102 |       0.00755 |       0.65526
----------------------------------
| EpLenMean       | 613          |
| EpRewMean       | -4.79        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 3460         |
| TimeElapsed     | 5.45e+03     |
| TimestepsSoFar  | 2187264      |
| ev_tdlam_before | 0.827        |
| loss_ent        | 0.6552601    |
| loss_kl         | 0.007549285  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.021351378 |
| loss_vf_loss    | 0.01101817   |
----------------------------------
********** Iteration 534 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00052 |       0.00000 |       0.01826 |       0.00179 |       0.61968
     -0.00777 |       0.00000 |       0.01478 |

********** Iteration 539 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00117 |       0.00000 |       0.01653 |       0.00066 |       0.59040
     -0.00399 |       0.00000 |       0.01353 |       0.00259 |       0.59603
     -0.00683 |       0.00000 |       0.01250 |       0.00286 |       0.59874
     -0.00849 |       0.00000 |       0.01190 |       0.00356 |       0.59662
     -0.00996 |       0.00000 |       0.01151 |       0.00436 |       0.59879
     -0.01135 |       0.00000 |       0.01116 |       0.00475 |       0.59672
     -0.01206 |       0.00000 |       0.01081 |       0.00517 |       0.59770
     -0.01276 |       0.00000 |       0.01070 |       0.00561 |       0.59953
     -0.01421 |       0.00000 |       0.01044 |       0.00612 |       0.59938
     -0.01454 |       0.00000 |       0.01024 |       0.00652 |       0.60132
Evaluating losses...
     -0.01606 |       0.00000 |       0.00994 |       0.00668 |      

     -0.01663 |       0.00000 |       0.01001 |       0.00556 |       0.64819
     -0.01751 |       0.00000 |       0.00988 |       0.00564 |       0.64970
Evaluating losses...
     -0.01876 |       0.00000 |       0.00961 |       0.00629 |       0.65047
----------------------------------
| EpLenMean       | 606          |
| EpRewMean       | -4.83        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 3535         |
| TimeElapsed     | 5.5e+03      |
| TimestepsSoFar  | 2232320      |
| ev_tdlam_before | 0.847        |
| loss_ent        | 0.65047014   |
| loss_kl         | 0.006293349  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018761862 |
| loss_vf_loss    | 0.009606336  |
----------------------------------
********** Iteration 545 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00175 |       0.00000 |       0.01581 |       0.00204 |       0.63158
     -0.00731 |       0.00000 |       0.01300 |

********** Iteration 550 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00095 |       0.00000 |       0.01963 |       0.00172 |       0.65815
     -0.00665 |       0.00000 |       0.01674 |       0.00305 |       0.66020
     -0.00992 |       0.00000 |       0.01511 |       0.00352 |       0.66132
     -0.01183 |       0.00000 |       0.01398 |       0.00384 |       0.65813
     -0.01333 |       0.00000 |       0.01310 |       0.00467 |       0.66250
     -0.01431 |       0.00000 |       0.01256 |       0.00467 |       0.66148
     -0.01522 |       0.00000 |       0.01208 |       0.00527 |       0.66154
     -0.01674 |       0.00000 |       0.01172 |       0.00538 |       0.66166
     -0.01701 |       0.00000 |       0.01147 |       0.00579 |       0.66343
     -0.01799 |       0.00000 |       0.01129 |       0.00664 |       0.66204
Evaluating losses...
     -0.01918 |       0.00000 |       0.01074 |       0.00674 |      

     -0.01371 |       0.00000 |       0.00998 |       0.00574 |       0.63858
     -0.01460 |       0.00000 |       0.00969 |       0.00617 |       0.63878
Evaluating losses...
     -0.01662 |       0.00000 |       0.00930 |       0.00710 |       0.63875
----------------------------------
| EpLenMean       | 605          |
| EpRewMean       | -4.91        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 3607         |
| TimeElapsed     | 6.03e+03     |
| TimestepsSoFar  | 2277376      |
| ev_tdlam_before | 0.837        |
| loss_ent        | 0.63875455   |
| loss_kl         | 0.007097526  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016622398 |
| loss_vf_loss    | 0.009304868  |
----------------------------------
********** Iteration 556 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -5.21e-05 |       0.00000 |       0.01372 |       0.00091 |       0.62585
     -0.00591 |       0.00000 |       0.01168 |

********** Iteration 561 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00129 |       0.00000 |       0.01449 |       0.00077 |       0.62114
     -0.00537 |       0.00000 |       0.01228 |       0.00219 |       0.62225
     -0.00799 |       0.00000 |       0.01149 |       0.00303 |       0.62055
     -0.01111 |       0.00000 |       0.01086 |       0.00378 |       0.61701
     -0.01319 |       0.00000 |       0.01043 |       0.00457 |       0.61957
     -0.01430 |       0.00000 |       0.01000 |       0.00494 |       0.61755
     -0.01546 |       0.00000 |       0.00980 |       0.00525 |       0.62057
     -0.01659 |       0.00000 |       0.00958 |       0.00603 |       0.61827
     -0.01739 |       0.00000 |       0.00935 |       0.00658 |       0.62167
     -0.01798 |       0.00000 |       0.00912 |       0.00705 |       0.61934
Evaluating losses...
     -0.01967 |       0.00000 |       0.00883 |       0.00775 |      

     -0.01694 |       0.00000 |       0.01215 |       0.00544 |       0.60629
     -0.01759 |       0.00000 |       0.01178 |       0.00570 |       0.60559
Evaluating losses...
     -0.01964 |       0.00000 |       0.01138 |       0.00592 |       0.60595
----------------------------------
| EpLenMean       | 626          |
| EpRewMean       | -4.81        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 3679         |
| TimeElapsed     | 6.09e+03     |
| TimestepsSoFar  | 2322432      |
| ev_tdlam_before | 0.806        |
| loss_ent        | 0.60595435   |
| loss_kl         | 0.0059180064 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019636191 |
| loss_vf_loss    | 0.011382684  |
----------------------------------
********** Iteration 567 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00073 |       0.00000 |       0.02048 |       0.00126 |       0.60284
     -0.00821 |       0.00000 |       0.01677 |

********** Iteration 572 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00142 |       0.00000 |       0.01554 |       0.00123 |       0.59258
     -0.00526 |       0.00000 |       0.01314 |       0.00315 |       0.59500
     -0.00805 |       0.00000 |       0.01225 |       0.00319 |       0.59345
     -0.01031 |       0.00000 |       0.01145 |       0.00344 |       0.59382
     -0.01173 |       0.00000 |       0.01110 |       0.00394 |       0.59317
     -0.01325 |       0.00000 |       0.01078 |       0.00515 |       0.59455
     -0.01391 |       0.00000 |       0.01048 |       0.00471 |       0.59149
     -0.01529 |       0.00000 |       0.01027 |       0.00539 |       0.59324
     -0.01614 |       0.00000 |       0.01002 |       0.00549 |       0.59305
     -0.01682 |       0.00000 |       0.00997 |       0.00579 |       0.59328
Evaluating losses...
     -0.01829 |       0.00000 |       0.00964 |       0.00562 |      

     -0.01663 |       0.00000 |       0.00917 |       0.00628 |       0.57776
     -0.01706 |       0.00000 |       0.00922 |       0.00688 |       0.57789
Evaluating losses...
     -0.01884 |       0.00000 |       0.00878 |       0.00677 |       0.57650
----------------------------------
| EpLenMean       | 600          |
| EpRewMean       | -4.88        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 3754         |
| TimeElapsed     | 6.16e+03     |
| TimestepsSoFar  | 2367488      |
| ev_tdlam_before | 0.879        |
| loss_ent        | 0.57650113   |
| loss_kl         | 0.0067720236 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018838067 |
| loss_vf_loss    | 0.008778193  |
----------------------------------
********** Iteration 578 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00204 |       0.00000 |       0.01346 |       0.00133 |       0.55331
     -0.00574 |       0.00000 |       0.01137 |

********** Iteration 583 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     5.20e-05 |       0.00000 |       0.01363 |       0.00138 |       0.54236
     -0.00678 |       0.00000 |       0.01096 |       0.00284 |       0.54524
     -0.00925 |       0.00000 |       0.00994 |       0.00385 |       0.54635
     -0.01141 |       0.00000 |       0.00964 |       0.00412 |       0.54530
     -0.01194 |       0.00000 |       0.00912 |       0.00480 |       0.54659
     -0.01303 |       0.00000 |       0.00893 |       0.00496 |       0.54590
     -0.01445 |       0.00000 |       0.00874 |       0.00536 |       0.54623
     -0.01529 |       0.00000 |       0.00856 |       0.00594 |       0.54586
     -0.01641 |       0.00000 |       0.00841 |       0.00587 |       0.54738
     -0.01690 |       0.00000 |       0.00832 |       0.00587 |       0.54577
Evaluating losses...
     -0.01838 |       0.00000 |       0.00798 |       0.00655 |      

     -0.01733 |       0.00000 |       0.00997 |       0.00639 |       0.59088
     -0.01828 |       0.00000 |       0.00984 |       0.00657 |       0.59229
Evaluating losses...
     -0.02015 |       0.00000 |       0.00943 |       0.00660 |       0.59202
----------------------------------
| EpLenMean       | 633          |
| EpRewMean       | -4.86        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 3823         |
| TimeElapsed     | 6.25e+03     |
| TimestepsSoFar  | 2412544      |
| ev_tdlam_before | 0.817        |
| loss_ent        | 0.59202355   |
| loss_kl         | 0.006595705  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020149693 |
| loss_vf_loss    | 0.00942742   |
----------------------------------
********** Iteration 589 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00166 |       0.00000 |       0.01342 |       0.00106 |       0.60765
     -0.00688 |       0.00000 |       0.01194 |

********** Iteration 594 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00054 |       0.00000 |       0.01588 |       0.00158 |       0.59039
     -0.00709 |       0.00000 |       0.01356 |       0.00236 |       0.59107
     -0.00969 |       0.00000 |       0.01252 |       0.00335 |       0.59142
     -0.01176 |       0.00000 |       0.01203 |       0.00359 |       0.58973
     -0.01287 |       0.00000 |       0.01151 |       0.00429 |       0.58935
     -0.01397 |       0.00000 |       0.01124 |       0.00457 |       0.58870
     -0.01494 |       0.00000 |       0.01103 |       0.00472 |       0.58790
     -0.01576 |       0.00000 |       0.01062 |       0.00522 |       0.58766
     -0.01626 |       0.00000 |       0.01050 |       0.00579 |       0.58845
     -0.01753 |       0.00000 |       0.01029 |       0.00613 |       0.58745
Evaluating losses...
     -0.01935 |       0.00000 |       0.00993 |       0.00603 |      

     -0.01788 |       0.00000 |       0.00908 |       0.00615 |       0.55811
     -0.01844 |       0.00000 |       0.00897 |       0.00684 |       0.55781
Evaluating losses...
     -0.02064 |       0.00000 |       0.00880 |       0.00686 |       0.55643
----------------------------------
| EpLenMean       | 616          |
| EpRewMean       | -4.79        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 3898         |
| TimeElapsed     | 6.31e+03     |
| TimestepsSoFar  | 2457600      |
| ev_tdlam_before | 0.842        |
| loss_ent        | 0.5564325    |
| loss_kl         | 0.0068581686 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.02063589  |
| loss_vf_loss    | 0.008796295  |
----------------------------------
********** Iteration 600 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00150 |       0.00000 |       0.01734 |       0.00117 |       0.59004
     -0.00796 |       0.00000 |       0.01492 |

********** Iteration 605 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -6.24e-05 |       0.00000 |       0.01355 |       0.00116 |       0.58637
     -0.00696 |       0.00000 |       0.01133 |       0.00250 |       0.58139
     -0.00993 |       0.00000 |       0.01047 |       0.00294 |       0.57938
     -0.01266 |       0.00000 |       0.00997 |       0.00358 |       0.57956
     -0.01357 |       0.00000 |       0.00957 |       0.00439 |       0.57805
     -0.01469 |       0.00000 |       0.00928 |       0.00471 |       0.57889
     -0.01619 |       0.00000 |       0.00925 |       0.00551 |       0.57915
     -0.01708 |       0.00000 |       0.00891 |       0.00575 |       0.58029
     -0.01810 |       0.00000 |       0.00873 |       0.00598 |       0.57966
     -0.01911 |       0.00000 |       0.00856 |       0.00629 |       0.57874
Evaluating losses...
     -0.02069 |       0.00000 |       0.00838 |       0.00692 |      

     -0.01360 |       0.00000 |       0.00836 |       0.00451 |       0.54938
     -0.01433 |       0.00000 |       0.00827 |       0.00503 |       0.54897
     -0.01449 |       0.00000 |       0.00825 |       0.00551 |       0.54710
Evaluating losses...
     -0.01611 |       0.00000 |       0.00806 |       0.00573 |       0.54782
----------------------------------
| EpLenMean       | 635          |
| EpRewMean       | -4.84        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 3968         |
| TimeElapsed     | 6.94e+03     |
| TimestepsSoFar  | 2502656      |
| ev_tdlam_before | 0.881        |
| loss_ent        | 0.54781955   |
| loss_kl         | 0.0057328865 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01611408  |
| loss_vf_loss    | 0.008056794  |
----------------------------------
********** Iteration 611 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00072 |       0.00000 |       0.01469 |

********** Iteration 616 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00097 |       0.00000 |       0.01178 |       0.00098 |       0.60086
     -0.00350 |       0.00000 |       0.01022 |       0.00184 |       0.59414
     -0.00684 |       0.00000 |       0.00942 |       0.00249 |       0.59615
     -0.00891 |       0.00000 |       0.00895 |       0.00304 |       0.59537
     -0.01006 |       0.00000 |       0.00870 |       0.00302 |       0.59696
     -0.01141 |       0.00000 |       0.00847 |       0.00356 |       0.59573
     -0.01232 |       0.00000 |       0.00826 |       0.00391 |       0.59742
     -0.01340 |       0.00000 |       0.00807 |       0.00436 |       0.59656
     -0.01388 |       0.00000 |       0.00787 |       0.00483 |       0.59713
     -0.01480 |       0.00000 |       0.00770 |       0.00497 |       0.59691
Evaluating losses...
     -0.01653 |       0.00000 |       0.00744 |       0.00545 |      

     -0.01647 |       0.00000 |       0.01209 |       0.00689 |       0.54905
     -0.01747 |       0.00000 |       0.01182 |       0.00700 |       0.55193
Evaluating losses...
     -0.01907 |       0.00000 |       0.01129 |       0.00777 |       0.55093
----------------------------------
| EpLenMean       | 625          |
| EpRewMean       | -4.85        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 4040         |
| TimeElapsed     | 7e+03        |
| TimestepsSoFar  | 2547712      |
| ev_tdlam_before | 0.836        |
| loss_ent        | 0.5509341    |
| loss_kl         | 0.0077689667 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019073293 |
| loss_vf_loss    | 0.0112916725 |
----------------------------------
********** Iteration 622 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00014 |       0.00000 |       0.01568 |       0.00081 |       0.60446
     -0.00812 |       0.00000 |       0.01311 |

********** Iteration 627 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00013 |       0.00000 |       0.01451 |       0.00109 |       0.61845
     -0.00626 |       0.00000 |       0.01205 |       0.00275 |       0.61801
     -0.00935 |       0.00000 |       0.01103 |       0.00342 |       0.61831
     -0.01087 |       0.00000 |       0.01031 |       0.00341 |       0.61611
     -0.01281 |       0.00000 |       0.00988 |       0.00414 |       0.61584
     -0.01407 |       0.00000 |       0.00949 |       0.00451 |       0.61355
     -0.01405 |       0.00000 |       0.00923 |       0.00471 |       0.61528
     -0.01565 |       0.00000 |       0.00910 |       0.00495 |       0.61496
     -0.01606 |       0.00000 |       0.00889 |       0.00568 |       0.61419
     -0.01729 |       0.00000 |       0.00868 |       0.00577 |       0.61403
Evaluating losses...
     -0.01911 |       0.00000 |       0.00848 |       0.00588 |      

     -0.01859 |       0.00000 |       0.01011 |       0.00688 |       0.60413
     -0.01915 |       0.00000 |       0.00993 |       0.00723 |       0.60342
Evaluating losses...
     -0.02116 |       0.00000 |       0.00945 |       0.00739 |       0.60376
----------------------------------
| EpLenMean       | 626          |
| EpRewMean       | -4.82        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 4112         |
| TimeElapsed     | 7.05e+03     |
| TimestepsSoFar  | 2592768      |
| ev_tdlam_before | 0.864        |
| loss_ent        | 0.60375977   |
| loss_kl         | 0.0073860483 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.021163383 |
| loss_vf_loss    | 0.0094515225 |
----------------------------------
********** Iteration 633 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00039 |       0.00000 |       0.01524 |       0.00149 |       0.61092
     -0.00707 |       0.00000 |       0.01256 |

********** Iteration 638 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00076 |       0.00000 |       0.01874 |       0.00115 |       0.62690
     -0.00716 |       0.00000 |       0.01628 |       0.00254 |       0.62427
     -0.01029 |       0.00000 |       0.01496 |       0.00328 |       0.62575
     -0.01290 |       0.00000 |       0.01410 |       0.00402 |       0.62607
     -0.01415 |       0.00000 |       0.01355 |       0.00455 |       0.62559
     -0.01562 |       0.00000 |       0.01306 |       0.00496 |       0.62586
     -0.01666 |       0.00000 |       0.01260 |       0.00570 |       0.62509
     -0.01763 |       0.00000 |       0.01224 |       0.00620 |       0.62611
     -0.01851 |       0.00000 |       0.01193 |       0.00674 |       0.62786
     -0.01915 |       0.00000 |       0.01172 |       0.00733 |       0.62669
Evaluating losses...
     -0.02113 |       0.00000 |       0.01127 |       0.00747 |      

     -0.01773 |       0.00000 |       0.00932 |       0.00608 |       0.59614
     -0.01900 |       0.00000 |       0.00917 |       0.00688 |       0.59559
Evaluating losses...
     -0.02070 |       0.00000 |       0.00876 |       0.00694 |       0.59527
----------------------------------
| EpLenMean       | 646          |
| EpRewMean       | -4.81        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 4182         |
| TimeElapsed     | 7.1e+03      |
| TimestepsSoFar  | 2637824      |
| ev_tdlam_before | 0.863        |
| loss_ent        | 0.59526676   |
| loss_kl         | 0.0069424883 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.02069822  |
| loss_vf_loss    | 0.00875666   |
----------------------------------
********** Iteration 644 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00042 |       0.00000 |       0.01744 |       0.00134 |       0.59232
     -0.00591 |       0.00000 |       0.01414 |

********** Iteration 649 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00053 |       0.00000 |       0.01327 |       0.00146 |       0.58697
     -0.00595 |       0.00000 |       0.01114 |       0.00274 |       0.59004
     -0.00945 |       0.00000 |       0.01038 |       0.00298 |       0.58781
     -0.01133 |       0.00000 |       0.00986 |       0.00391 |       0.59064
     -0.01293 |       0.00000 |       0.00945 |       0.00476 |       0.58985
     -0.01437 |       0.00000 |       0.00910 |       0.00514 |       0.59053
     -0.01539 |       0.00000 |       0.00885 |       0.00563 |       0.59243
     -0.01643 |       0.00000 |       0.00863 |       0.00615 |       0.59256
     -0.01752 |       0.00000 |       0.00834 |       0.00696 |       0.59336
     -0.01830 |       0.00000 |       0.00819 |       0.00758 |       0.59383
Evaluating losses...
     -0.02011 |       0.00000 |       0.00796 |       0.00818 |      

     -0.01467 |       0.00000 |       0.00972 |       0.00534 |       0.67045
     -0.01553 |       0.00000 |       0.00956 |       0.00544 |       0.67222
Evaluating losses...
     -0.01788 |       0.00000 |       0.00928 |       0.00549 |       0.67458
----------------------------------
| EpLenMean       | 636          |
| EpRewMean       | -4.87        |
| EpThisIter      | 8            |
| EpisodesSoFar   | 4253         |
| TimeElapsed     | 7.15e+03     |
| TimestepsSoFar  | 2682880      |
| ev_tdlam_before | 0.883        |
| loss_ent        | 0.67458165   |
| loss_kl         | 0.0054883617 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.017876254 |
| loss_vf_loss    | 0.00928362   |
----------------------------------
********** Iteration 655 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00048 |       0.00000 |       0.01246 |       0.00157 |       0.58576
     -0.00553 |       0.00000 |       0.01048 |

********** Iteration 660 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00050 |       0.00000 |       0.01373 |       0.00141 |       0.64017
     -0.00592 |       0.00000 |       0.01093 |       0.00256 |       0.63878
     -0.00896 |       0.00000 |       0.00998 |       0.00443 |       0.63948
     -0.01028 |       0.00000 |       0.00945 |       0.00462 |       0.63920
     -0.01184 |       0.00000 |       0.00905 |       0.00485 |       0.63949
     -0.01270 |       0.00000 |       0.00883 |       0.00524 |       0.64082
     -0.01484 |       0.00000 |       0.00848 |       0.00517 |       0.63991
     -0.01473 |       0.00000 |       0.00822 |       0.00564 |       0.63911
     -0.01594 |       0.00000 |       0.00800 |       0.00595 |       0.63967
     -0.01620 |       0.00000 |       0.00800 |       0.00680 |       0.64057
Evaluating losses...
     -0.01877 |       0.00000 |       0.00760 |       0.00659 |      

     -0.01822 |       0.00000 |       0.01228 |       0.00549 |       0.65611
     -0.01857 |       0.00000 |       0.01209 |       0.00605 |       0.65484
Evaluating losses...
     -0.02072 |       0.00000 |       0.01168 |       0.00636 |       0.65393
----------------------------------
| EpLenMean       | 627          |
| EpRewMean       | -4.86        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 4323         |
| TimeElapsed     | 7.2e+03      |
| TimestepsSoFar  | 2727936      |
| ev_tdlam_before | 0.806        |
| loss_ent        | 0.6539318    |
| loss_kl         | 0.0063567287 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020715073 |
| loss_vf_loss    | 0.011680291  |
----------------------------------
********** Iteration 666 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00092 |       0.00000 |       0.01718 |       0.00131 |       0.70966
     -0.00641 |       0.00000 |       0.01473 |

********** Iteration 671 ************
Eval num_timesteps=2748416, episode_reward=-4.84 +/- 0.41
Episode length: 624.57 +/- 134.84
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00071 |       0.00000 |       0.01435 |       0.00067 |       0.63561
     -0.00686 |       0.00000 |       0.01231 |       0.00291 |       0.63940
     -0.01010 |       0.00000 |       0.01144 |       0.00419 |       0.63916
     -0.01289 |       0.00000 |       0.01083 |       0.00455 |       0.64143
     -0.01468 |       0.00000 |       0.01031 |       0.00519 |       0.64130
     -0.01598 |       0.00000 |       0.00997 |       0.00633 |       0.64233
     -0.01729 |       0.00000 |       0.00968 |       0.00636 |       0.64397
     -0.01778 |       0.00000 |       0.00944 |       0.00733 |       0.64354
     -0.01889 |       0.00000 |       0.00924 |       0.00753 |       0.64453
     -0.01988 |       0.00000 |       0.00908 |       0.00848 |       0.6437

     -0.01884 |       0.00000 |       0.01019 |       0.00667 |       0.68244
     -0.01864 |       0.00000 |       0.01016 |       0.00630 |       0.68433
     -0.02025 |       0.00000 |       0.00995 |       0.00671 |       0.68475
Evaluating losses...
     -0.02232 |       0.00000 |       0.00962 |       0.00691 |       0.68419
----------------------------------
| EpLenMean       | 648          |
| EpRewMean       | -4.84        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 4392         |
| TimeElapsed     | 7.7e+03      |
| TimestepsSoFar  | 2772992      |
| ev_tdlam_before | 0.87         |
| loss_ent        | 0.68418837   |
| loss_kl         | 0.006909956  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022316342 |
| loss_vf_loss    | 0.009622088  |
----------------------------------
********** Iteration 677 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00016 |       0.00000 |       0.01601 |

********** Iteration 682 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00046 |       0.00000 |       0.01543 |       0.00113 |       0.66427
     -0.00601 |       0.00000 |       0.01281 |       0.00234 |       0.66226
     -0.00843 |       0.00000 |       0.01201 |       0.00275 |       0.66385
     -0.01088 |       0.00000 |       0.01124 |       0.00347 |       0.66278
     -0.01243 |       0.00000 |       0.01079 |       0.00476 |       0.66370
     -0.01386 |       0.00000 |       0.01049 |       0.00478 |       0.66218
     -0.01495 |       0.00000 |       0.01022 |       0.00533 |       0.66279
     -0.01572 |       0.00000 |       0.01002 |       0.00610 |       0.66434
     -0.01650 |       0.00000 |       0.00982 |       0.00678 |       0.66310
     -0.01770 |       0.00000 |       0.00962 |       0.00739 |       0.66550
Evaluating losses...
     -0.01987 |       0.00000 |       0.00929 |       0.00734 |      

     -0.02152 |       0.00000 |       0.00908 |       0.00725 |       0.65582
     -0.02223 |       0.00000 |       0.00891 |       0.00755 |       0.65551
Evaluating losses...
     -0.02442 |       0.00000 |       0.00866 |       0.00746 |       0.65518
----------------------------------
| EpLenMean       | 653          |
| EpRewMean       | -4.8         |
| EpThisIter      | 6            |
| EpisodesSoFar   | 4461         |
| TimeElapsed     | 7.75e+03     |
| TimestepsSoFar  | 2818048      |
| ev_tdlam_before | 0.865        |
| loss_ent        | 0.6551815    |
| loss_kl         | 0.007457596  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.024417784 |
| loss_vf_loss    | 0.008661625  |
----------------------------------
********** Iteration 688 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00024 |       0.00000 |       0.01590 |       0.00157 |       0.67141
     -0.00852 |       0.00000 |       0.01334 |

********** Iteration 693 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00107 |       0.00000 |       0.01543 |       0.00111 |       0.65147
     -0.00534 |       0.00000 |       0.01296 |       0.00224 |       0.65266
     -0.00864 |       0.00000 |       0.01179 |       0.00274 |       0.65173
     -0.01090 |       0.00000 |       0.01097 |       0.00328 |       0.65165
     -0.01271 |       0.00000 |       0.01040 |       0.00399 |       0.65143
     -0.01435 |       0.00000 |       0.00991 |       0.00482 |       0.65156
     -0.01553 |       0.00000 |       0.00961 |       0.00473 |       0.65126
     -0.01669 |       0.00000 |       0.00932 |       0.00576 |       0.65214
     -0.01764 |       0.00000 |       0.00893 |       0.00593 |       0.65338
     -0.01850 |       0.00000 |       0.00876 |       0.00615 |       0.65163
Evaluating losses...
     -0.02070 |       0.00000 |       0.00852 |       0.00666 |      

     -0.01743 |       0.00000 |       0.00936 |       0.00602 |       0.66801
     -0.01810 |       0.00000 |       0.00929 |       0.00650 |       0.66777
Evaluating losses...
     -0.02041 |       0.00000 |       0.00889 |       0.00674 |       0.66778
----------------------------------
| EpLenMean       | 612          |
| EpRewMean       | -4.84        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 4535         |
| TimeElapsed     | 7.8e+03      |
| TimestepsSoFar  | 2863104      |
| ev_tdlam_before | 0.841        |
| loss_ent        | 0.66777503   |
| loss_kl         | 0.00674155   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020412708 |
| loss_vf_loss    | 0.008894638  |
----------------------------------
********** Iteration 699 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00019 |       0.00000 |       0.01495 |       0.00089 |       0.70396
     -0.00715 |       0.00000 |       0.01266 |

********** Iteration 704 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00136 |       0.00000 |       0.01582 |       0.00081 |       0.69097
     -0.00556 |       0.00000 |       0.01231 |       0.00192 |       0.68527
     -0.00946 |       0.00000 |       0.01138 |       0.00275 |       0.68579
     -0.01187 |       0.00000 |       0.01074 |       0.00338 |       0.68258
     -0.01369 |       0.00000 |       0.01024 |       0.00389 |       0.68142
     -0.01462 |       0.00000 |       0.01003 |       0.00419 |       0.68191
     -0.01596 |       0.00000 |       0.00961 |       0.00497 |       0.68149
     -0.01741 |       0.00000 |       0.00955 |       0.00519 |       0.68007
     -0.01835 |       0.00000 |       0.00928 |       0.00543 |       0.68143
     -0.01945 |       0.00000 |       0.00921 |       0.00602 |       0.68040
Evaluating losses...
     -0.02136 |       0.00000 |       0.00885 |       0.00628 |      

     -0.01668 |       0.00000 |       0.00921 |       0.00550 |       0.66091
     -0.01682 |       0.00000 |       0.00907 |       0.00563 |       0.66113
Evaluating losses...
     -0.01867 |       0.00000 |       0.00873 |       0.00610 |       0.66126
----------------------------------
| EpLenMean       | 607          |
| EpRewMean       | -4.92        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 4609         |
| TimeElapsed     | 7.86e+03     |
| TimestepsSoFar  | 2908160      |
| ev_tdlam_before | 0.836        |
| loss_ent        | 0.6612563    |
| loss_kl         | 0.006101724  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018674944 |
| loss_vf_loss    | 0.00873404   |
----------------------------------
********** Iteration 710 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00104 |       0.00000 |       0.01671 |       0.00155 |       0.65439
     -0.00747 |       0.00000 |       0.01325 |

********** Iteration 715 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00016 |       0.00000 |       0.01508 |       0.00101 |       0.64556
     -0.00800 |       0.00000 |       0.01214 |       0.00253 |       0.64434
     -0.01092 |       0.00000 |       0.01085 |       0.00344 |       0.64434
     -0.01340 |       0.00000 |       0.01012 |       0.00417 |       0.64331
     -0.01573 |       0.00000 |       0.00964 |       0.00446 |       0.64121
     -0.01696 |       0.00000 |       0.00934 |       0.00535 |       0.64340
     -0.01869 |       0.00000 |       0.00900 |       0.00577 |       0.64418
     -0.01977 |       0.00000 |       0.00885 |       0.00616 |       0.64125
     -0.02024 |       0.00000 |       0.00858 |       0.00650 |       0.64314
     -0.02158 |       0.00000 |       0.00837 |       0.00724 |       0.64459
Evaluating losses...
     -0.02352 |       0.00000 |       0.00813 |       0.00711 |      

     -0.01697 |       0.00000 |       0.00891 |       0.00559 |       0.64927
     -0.01732 |       0.00000 |       0.00877 |       0.00641 |       0.64961
Evaluating losses...
     -0.01884 |       0.00000 |       0.00843 |       0.00650 |       0.64905
----------------------------------
| EpLenMean       | 613          |
| EpRewMean       | -4.87        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 4682         |
| TimeElapsed     | 7.92e+03     |
| TimestepsSoFar  | 2953216      |
| ev_tdlam_before | 0.86         |
| loss_ent        | 0.64905375   |
| loss_kl         | 0.0065039806 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018840214 |
| loss_vf_loss    | 0.008433875  |
----------------------------------
********** Iteration 721 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00064 |       0.00000 |       0.01543 |       0.00159 |       0.63347
     -0.00830 |       0.00000 |       0.01275 |

********** Iteration 726 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00021 |       0.00000 |       0.01456 |       0.00095 |       0.63432
     -0.00590 |       0.00000 |       0.01271 |       0.00264 |       0.63878
     -0.00880 |       0.00000 |       0.01172 |       0.00270 |       0.63808
     -0.01126 |       0.00000 |       0.01116 |       0.00422 |       0.63552
     -0.01365 |       0.00000 |       0.01072 |       0.00430 |       0.63798
     -0.01432 |       0.00000 |       0.01038 |       0.00479 |       0.63691
     -0.01563 |       0.00000 |       0.01013 |       0.00496 |       0.63557
     -0.01676 |       0.00000 |       0.00988 |       0.00587 |       0.63727
     -0.01734 |       0.00000 |       0.00973 |       0.00611 |       0.63633
     -0.01834 |       0.00000 |       0.00956 |       0.00660 |       0.63580
Evaluating losses...
     -0.02018 |       0.00000 |       0.00931 |       0.00692 |      

     -0.02020 |       0.00000 |       0.00852 |       0.00749 |       0.64482
     -0.02152 |       0.00000 |       0.00840 |       0.00841 |       0.64852
Evaluating losses...
     -0.02370 |       0.00000 |       0.00821 |       0.00776 |       0.64704
----------------------------------
| EpLenMean       | 617          |
| EpRewMean       | -4.92        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 4755         |
| TimeElapsed     | 7.97e+03     |
| TimestepsSoFar  | 2998272      |
| ev_tdlam_before | 0.804        |
| loss_ent        | 0.6470401    |
| loss_kl         | 0.0077610095 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.023703123 |
| loss_vf_loss    | 0.008207841  |
----------------------------------
********** Iteration 732 ************
Eval num_timesteps=2998272, episode_reward=-4.83 +/- 0.43
Episode length: 634.00 +/- 141.52
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00064 |       0.00000 |  

********** Iteration 737 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00019 |       0.00000 |       0.01424 |       0.00115 |       0.62010
     -0.00822 |       0.00000 |       0.01191 |       0.00258 |       0.61410
     -0.01086 |       0.00000 |       0.01108 |       0.00339 |       0.61437
     -0.01362 |       0.00000 |       0.01043 |       0.00440 |       0.61275
     -0.01476 |       0.00000 |       0.00994 |       0.00478 |       0.61310
     -0.01618 |       0.00000 |       0.00959 |       0.00523 |       0.61186
     -0.01703 |       0.00000 |       0.00927 |       0.00575 |       0.61316
     -0.01813 |       0.00000 |       0.00902 |       0.00636 |       0.61251
     -0.01898 |       0.00000 |       0.00879 |       0.00686 |       0.61317
     -0.01926 |       0.00000 |       0.00855 |       0.00712 |       0.61335
Evaluating losses...
     -0.02149 |       0.00000 |       0.00830 |       0.00763 |      

     -0.02015 |       0.00000 |       0.01009 |       0.00618 |       0.65532
     -0.02095 |       0.00000 |       0.00993 |       0.00667 |       0.65683
Evaluating losses...
     -0.02277 |       0.00000 |       0.00962 |       0.00690 |       0.65590
----------------------------------
| EpLenMean       | 632          |
| EpRewMean       | -4.9         |
| EpThisIter      | 7            |
| EpisodesSoFar   | 4827         |
| TimeElapsed     | 8.49e+03     |
| TimestepsSoFar  | 3043328      |
| ev_tdlam_before | 0.834        |
| loss_ent        | 0.6559041    |
| loss_kl         | 0.0069042556 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022773828 |
| loss_vf_loss    | 0.009623087  |
----------------------------------
********** Iteration 743 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00050 |       0.00000 |       0.01263 |       0.00060 |       0.59800
     -0.00607 |       0.00000 |       0.01059 |

********** Iteration 748 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00151 |       0.00000 |       0.01085 |       0.00117 |       0.61273
     -0.00758 |       0.00000 |       0.00990 |       0.00314 |       0.60975
     -0.01017 |       0.00000 |       0.00947 |       0.00308 |       0.61281
     -0.01311 |       0.00000 |       0.00926 |       0.00399 |       0.61064
     -0.01404 |       0.00000 |       0.00897 |       0.00450 |       0.60980
     -0.01543 |       0.00000 |       0.00870 |       0.00507 |       0.61163
     -0.01658 |       0.00000 |       0.00849 |       0.00588 |       0.61002
     -0.01754 |       0.00000 |       0.00834 |       0.00591 |       0.61158
     -0.01821 |       0.00000 |       0.00822 |       0.00640 |       0.61222
     -0.01915 |       0.00000 |       0.00810 |       0.00674 |       0.61172
Evaluating losses...
     -0.02125 |       0.00000 |       0.00795 |       0.00687 |      

     -0.01914 |       0.00000 |       0.00837 |       0.00669 |       0.61718
     -0.01964 |       0.00000 |       0.00835 |       0.00715 |       0.61765
Evaluating losses...
     -0.02215 |       0.00000 |       0.00818 |       0.00722 |       0.61830
----------------------------------
| EpLenMean       | 657          |
| EpRewMean       | -4.88        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 4895         |
| TimeElapsed     | 8.53e+03     |
| TimestepsSoFar  | 3088384      |
| ev_tdlam_before | 0.845        |
| loss_ent        | 0.6183015    |
| loss_kl         | 0.0072191334 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022151347 |
| loss_vf_loss    | 0.008181586  |
----------------------------------
********** Iteration 754 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00163 |       0.00000 |       0.01924 |       0.00111 |       0.61762
     -0.00692 |       0.00000 |       0.01507 |

********** Iteration 759 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00216 |       0.00000 |       0.01228 |       0.00106 |       0.63593
     -0.00577 |       0.00000 |       0.01062 |       0.00242 |       0.64174
     -0.00938 |       0.00000 |       0.01001 |       0.00372 |       0.64247
     -0.01172 |       0.00000 |       0.00966 |       0.00425 |       0.64209
     -0.01264 |       0.00000 |       0.00944 |       0.00485 |       0.64126
     -0.01402 |       0.00000 |       0.00933 |       0.00529 |       0.64065
     -0.01514 |       0.00000 |       0.00917 |       0.00595 |       0.64188
     -0.01622 |       0.00000 |       0.00892 |       0.00639 |       0.64244
     -0.01691 |       0.00000 |       0.00882 |       0.00706 |       0.64417
     -0.01737 |       0.00000 |       0.00869 |       0.00715 |       0.64205
Evaluating losses...
     -0.01885 |       0.00000 |       0.00851 |       0.00820 |      

     -0.01896 |       0.00000 |       0.00894 |       0.00638 |       0.59174
     -0.01967 |       0.00000 |       0.00869 |       0.00661 |       0.59145
Evaluating losses...
     -0.02168 |       0.00000 |       0.00842 |       0.00674 |       0.59048
---------------------------------
| EpLenMean       | 615         |
| EpRewMean       | -4.88       |
| EpThisIter      | 7           |
| EpisodesSoFar   | 4969        |
| TimeElapsed     | 8.58e+03    |
| TimestepsSoFar  | 3133440     |
| ev_tdlam_before | 0.86        |
| loss_ent        | 0.5904832   |
| loss_kl         | 0.006740652 |
| loss_pol_entpen | 0.0         |
| loss_pol_surr   | -0.02168241 |
| loss_vf_loss    | 0.008421457 |
---------------------------------
********** Iteration 765 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00085 |       0.00000 |       0.01370 |       0.00093 |       0.62334
     -0.00660 |       0.00000 |       0.01184 |       0.00208

********** Iteration 770 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00018 |       0.00000 |       0.02086 |       0.00105 |       0.59104
     -0.00625 |       0.00000 |       0.01803 |       0.00253 |       0.58993
     -0.00948 |       0.00000 |       0.01680 |       0.00244 |       0.58768
     -0.01139 |       0.00000 |       0.01618 |       0.00309 |       0.59033
     -0.01259 |       0.00000 |       0.01543 |       0.00328 |       0.58879
     -0.01414 |       0.00000 |       0.01493 |       0.00389 |       0.58818
     -0.01444 |       0.00000 |       0.01451 |       0.00434 |       0.58925
     -0.01574 |       0.00000 |       0.01420 |       0.00475 |       0.58974
     -0.01673 |       0.00000 |       0.01402 |       0.00530 |       0.58908
     -0.01732 |       0.00000 |       0.01378 |       0.00546 |       0.59007
Evaluating losses...
     -0.01920 |       0.00000 |       0.01352 |       0.00555 |      

     -0.01900 |       0.00000 |       0.00874 |       0.00679 |       0.59083
     -0.01994 |       0.00000 |       0.00862 |       0.00682 |       0.59006
Evaluating losses...
     -0.02189 |       0.00000 |       0.00838 |       0.00664 |       0.59031
----------------------------------
| EpLenMean       | 621          |
| EpRewMean       | -4.85        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 5042         |
| TimeElapsed     | 8.63e+03     |
| TimestepsSoFar  | 3178496      |
| ev_tdlam_before | 0.846        |
| loss_ent        | 0.5903078    |
| loss_kl         | 0.0066375425 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.02188675  |
| loss_vf_loss    | 0.008377643  |
----------------------------------
********** Iteration 776 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     6.78e-05 |       0.00000 |       0.01554 |       0.00124 |       0.58326
     -0.00739 |       0.00000 |       0.01213 |

********** Iteration 781 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00081 |       0.00000 |       0.01472 |       0.00108 |       0.58447
     -0.00783 |       0.00000 |       0.01200 |       0.00253 |       0.58316
     -0.01115 |       0.00000 |       0.01106 |       0.00348 |       0.57826
     -0.01310 |       0.00000 |       0.01037 |       0.00482 |       0.57673
     -0.01510 |       0.00000 |       0.00992 |       0.00459 |       0.57632
     -0.01616 |       0.00000 |       0.00959 |       0.00543 |       0.57612
     -0.01760 |       0.00000 |       0.00937 |       0.00609 |       0.57471
     -0.01849 |       0.00000 |       0.00921 |       0.00640 |       0.57434
     -0.01956 |       0.00000 |       0.00902 |       0.00674 |       0.57389
     -0.02053 |       0.00000 |       0.00883 |       0.00716 |       0.57519
Evaluating losses...
     -0.02181 |       0.00000 |       0.00855 |       0.00803 |      

     -0.01756 |       0.00000 |       0.00941 |       0.00559 |       0.56890
     -0.01805 |       0.00000 |       0.00925 |       0.00584 |       0.56963
Evaluating losses...
     -0.02032 |       0.00000 |       0.00902 |       0.00578 |       0.57085
----------------------------------
| EpLenMean       | 638          |
| EpRewMean       | -4.87        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 5112         |
| TimeElapsed     | 8.68e+03     |
| TimestepsSoFar  | 3223552      |
| ev_tdlam_before | 0.861        |
| loss_ent        | 0.5708537    |
| loss_kl         | 0.005778922  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020316523 |
| loss_vf_loss    | 0.00901516   |
----------------------------------
********** Iteration 787 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00160 |       0.00000 |       0.01061 |       0.00071 |       0.59800
     -0.00491 |       0.00000 |       0.00912 |

********** Iteration 792 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00076 |       0.00000 |       0.01474 |       0.00152 |       0.59393
     -0.00673 |       0.00000 |       0.01169 |       0.00234 |       0.58776
     -0.00953 |       0.00000 |       0.01077 |       0.00357 |       0.58616
     -0.01239 |       0.00000 |       0.01019 |       0.00388 |       0.58608
     -0.01361 |       0.00000 |       0.00978 |       0.00409 |       0.58661
     -0.01503 |       0.00000 |       0.00946 |       0.00450 |       0.58600
     -0.01636 |       0.00000 |       0.00920 |       0.00462 |       0.58461
     -0.01726 |       0.00000 |       0.00899 |       0.00502 |       0.58477
     -0.01768 |       0.00000 |       0.00887 |       0.00585 |       0.58566
     -0.01895 |       0.00000 |       0.00864 |       0.00606 |       0.58432
Evaluating losses...
     -0.02042 |       0.00000 |       0.00833 |       0.00643 |      

     -0.01644 |       0.00000 |       0.00882 |       0.00534 |       0.59102
     -0.01786 |       0.00000 |       0.00860 |       0.00574 |       0.59076
     -0.01837 |       0.00000 |       0.00846 |       0.00616 |       0.59106
Evaluating losses...
     -0.02077 |       0.00000 |       0.00807 |       0.00602 |       0.59184
----------------------------------
| EpLenMean       | 643          |
| EpRewMean       | -4.89        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 5181         |
| TimeElapsed     | 9.18e+03     |
| TimestepsSoFar  | 3268608      |
| ev_tdlam_before | 0.861        |
| loss_ent        | 0.5918428    |
| loss_kl         | 0.0060231322 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020768633 |
| loss_vf_loss    | 0.008067041  |
----------------------------------
********** Iteration 798 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00125 |       0.00000 |       0.01436 |

********** Iteration 803 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00141 |       0.00000 |       0.01707 |       0.00079 |       0.60241
     -0.00613 |       0.00000 |       0.01423 |       0.00237 |       0.60225
     -0.00996 |       0.00000 |       0.01313 |       0.00320 |       0.60211
     -0.01251 |       0.00000 |       0.01274 |       0.00391 |       0.60315
     -0.01433 |       0.00000 |       0.01221 |       0.00441 |       0.60389
     -0.01590 |       0.00000 |       0.01172 |       0.00529 |       0.60326
     -0.01662 |       0.00000 |       0.01152 |       0.00520 |       0.60483
     -0.01711 |       0.00000 |       0.01130 |       0.00592 |       0.60481
     -0.01862 |       0.00000 |       0.01114 |       0.00627 |       0.60374
     -0.01968 |       0.00000 |       0.01089 |       0.00662 |       0.60382
Evaluating losses...
     -0.02166 |       0.00000 |       0.01077 |       0.00676 |      

     -0.01723 |       0.00000 |       0.00851 |       0.00717 |       0.58670
     -0.01853 |       0.00000 |       0.00835 |       0.00729 |       0.58681
Evaluating losses...
     -0.02014 |       0.00000 |       0.00809 |       0.00783 |       0.58653
----------------------------------
| EpLenMean       | 614          |
| EpRewMean       | -4.86        |
| EpThisIter      | 5            |
| EpisodesSoFar   | 5255         |
| TimeElapsed     | 9.24e+03     |
| TimestepsSoFar  | 3313664      |
| ev_tdlam_before | 0.817        |
| loss_ent        | 0.5865338    |
| loss_kl         | 0.007825525  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020140218 |
| loss_vf_loss    | 0.008087646  |
----------------------------------
********** Iteration 809 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00125 |       0.00000 |       0.01370 |       0.00091 |       0.59495
     -0.00627 |       0.00000 |       0.01193 |

********** Iteration 814 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00108 |       0.00000 |       0.01215 |       0.00200 |       0.60796
     -0.00548 |       0.00000 |       0.01073 |       0.00231 |       0.60944
     -0.00779 |       0.00000 |       0.01017 |       0.00228 |       0.60702
     -0.01049 |       0.00000 |       0.00969 |       0.00277 |       0.60421
     -0.01209 |       0.00000 |       0.00940 |       0.00300 |       0.60397
     -0.01303 |       0.00000 |       0.00913 |       0.00404 |       0.60503
     -0.01414 |       0.00000 |       0.00898 |       0.00421 |       0.60227
     -0.01563 |       0.00000 |       0.00889 |       0.00442 |       0.60386
     -0.01696 |       0.00000 |       0.00871 |       0.00450 |       0.60209
     -0.01749 |       0.00000 |       0.00866 |       0.00523 |       0.60148
Evaluating losses...
     -0.01969 |       0.00000 |       0.00834 |       0.00518 |      

     -0.01650 |       0.00000 |       0.00845 |       0.00685 |       0.59874
     -0.01740 |       0.00000 |       0.00841 |       0.00735 |       0.59708
Evaluating losses...
     -0.01959 |       0.00000 |       0.00812 |       0.00724 |       0.59950
----------------------------------
| EpLenMean       | 612          |
| EpRewMean       | -4.9         |
| EpThisIter      | 6            |
| EpisodesSoFar   | 5328         |
| TimeElapsed     | 9.28e+03     |
| TimestepsSoFar  | 3358720      |
| ev_tdlam_before | 0.87         |
| loss_ent        | 0.59950066   |
| loss_kl         | 0.007238873  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019591378 |
| loss_vf_loss    | 0.008115192  |
----------------------------------
********** Iteration 820 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00088 |       0.00000 |       0.01174 |       0.00165 |       0.59146
     -0.00796 |       0.00000 |       0.01030 |

********** Iteration 825 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00024 |       0.00000 |       0.01310 |       0.00081 |       0.62518
     -0.00706 |       0.00000 |       0.01097 |       0.00177 |       0.62533
     -0.01020 |       0.00000 |       0.01011 |       0.00289 |       0.63156
     -0.01255 |       0.00000 |       0.00958 |       0.00351 |       0.62743
     -0.01420 |       0.00000 |       0.00916 |       0.00427 |       0.62922
     -0.01600 |       0.00000 |       0.00884 |       0.00458 |       0.62778
     -0.01697 |       0.00000 |       0.00852 |       0.00540 |       0.62838
     -0.01790 |       0.00000 |       0.00833 |       0.00582 |       0.62853
     -0.01940 |       0.00000 |       0.00806 |       0.00643 |       0.62872
     -0.01976 |       0.00000 |       0.00795 |       0.00689 |       0.62829
Evaluating losses...
     -0.02239 |       0.00000 |       0.00772 |       0.00708 |      

     -0.01954 |       0.00000 |       0.00794 |       0.00549 |       0.61400
     -0.01970 |       0.00000 |       0.00777 |       0.00613 |       0.61539
Evaluating losses...
     -0.02226 |       0.00000 |       0.00749 |       0.00652 |       0.61649
----------------------------------
| EpLenMean       | 642          |
| EpRewMean       | -4.88        |
| EpThisIter      | 5            |
| EpisodesSoFar   | 5397         |
| TimeElapsed     | 9.33e+03     |
| TimestepsSoFar  | 3403776      |
| ev_tdlam_before | 0.844        |
| loss_ent        | 0.61649114   |
| loss_kl         | 0.006521339  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022264631 |
| loss_vf_loss    | 0.0074945698 |
----------------------------------
********** Iteration 831 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00062 |       0.00000 |       0.01375 |       0.00097 |       0.64876
     -0.00672 |       0.00000 |       0.01138 |

********** Iteration 836 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00139 |       0.00000 |       0.01249 |       0.00159 |       0.61254
     -0.00878 |       0.00000 |       0.01029 |       0.00336 |       0.61143
     -0.01149 |       0.00000 |       0.00952 |       0.00399 |       0.61154
     -0.01342 |       0.00000 |       0.00913 |       0.00391 |       0.61324
     -0.01504 |       0.00000 |       0.00878 |       0.00446 |       0.61305
     -0.01631 |       0.00000 |       0.00865 |       0.00518 |       0.61381
     -0.01751 |       0.00000 |       0.00843 |       0.00573 |       0.61453
     -0.01846 |       0.00000 |       0.00831 |       0.00606 |       0.61350
     -0.01940 |       0.00000 |       0.00813 |       0.00626 |       0.61681
     -0.02015 |       0.00000 |       0.00805 |       0.00663 |       0.61569
Evaluating losses...
     -0.02224 |       0.00000 |       0.00797 |       0.00710 |      

     -0.01973 |       0.00000 |       0.00910 |       0.00573 |       0.63493
     -0.02033 |       0.00000 |       0.00892 |       0.00629 |       0.63507
Evaluating losses...
     -0.02234 |       0.00000 |       0.00868 |       0.00711 |       0.63685
----------------------------------
| EpLenMean       | 646          |
| EpRewMean       | -4.85        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 5468         |
| TimeElapsed     | 9.39e+03     |
| TimestepsSoFar  | 3448832      |
| ev_tdlam_before | 0.841        |
| loss_ent        | 0.636852     |
| loss_kl         | 0.007106283  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022336852 |
| loss_vf_loss    | 0.008677127  |
----------------------------------
********** Iteration 842 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     7.01e-05 |       0.00000 |       0.01347 |       0.00111 |       0.61619
     -0.00852 |       0.00000 |       0.01126 |

********** Iteration 847 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00054 |       0.00000 |       0.01564 |       0.00131 |       0.63750
     -0.00625 |       0.00000 |       0.01338 |       0.00247 |       0.63395
     -0.01035 |       0.00000 |       0.01246 |       0.00319 |       0.63383
     -0.01257 |       0.00000 |       0.01160 |       0.00355 |       0.63479
     -0.01439 |       0.00000 |       0.01103 |       0.00366 |       0.63672
     -0.01522 |       0.00000 |       0.01064 |       0.00428 |       0.63437
     -0.01700 |       0.00000 |       0.01031 |       0.00461 |       0.63435
     -0.01752 |       0.00000 |       0.01001 |       0.00484 |       0.63667
     -0.01833 |       0.00000 |       0.00980 |       0.00563 |       0.63726
     -0.01882 |       0.00000 |       0.00956 |       0.00603 |       0.63589
Evaluating losses...
     -0.02137 |       0.00000 |       0.00930 |       0.00605 |      

     -0.02130 |       0.00000 |       0.00807 |       0.00842 |       0.65032
     -0.02221 |       0.00000 |       0.00792 |       0.00866 |       0.64857
Evaluating losses...
     -0.02459 |       0.00000 |       0.00768 |       0.00920 |       0.64965
----------------------------------
| EpLenMean       | 666          |
| EpRewMean       | -4.78        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 5534         |
| TimeElapsed     | 9.44e+03     |
| TimestepsSoFar  | 3493888      |
| ev_tdlam_before | 0.846        |
| loss_ent        | 0.64965004   |
| loss_kl         | 0.009203809  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.024590243 |
| loss_vf_loss    | 0.007682006  |
----------------------------------
********** Iteration 853 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00148 |       0.00000 |       0.01424 |       0.00091 |       0.63286
     -0.00642 |       0.00000 |       0.01169 |

********** Iteration 858 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00130 |       0.00000 |       0.01199 |       0.00106 |       0.65174
     -0.00682 |       0.00000 |       0.01060 |       0.00245 |       0.64921
     -0.00994 |       0.00000 |       0.01001 |       0.00297 |       0.64916
     -0.01253 |       0.00000 |       0.00967 |       0.00352 |       0.64860
     -0.01480 |       0.00000 |       0.00937 |       0.00417 |       0.65019
     -0.01630 |       0.00000 |       0.00910 |       0.00461 |       0.64960
     -0.01709 |       0.00000 |       0.00893 |       0.00521 |       0.64873
     -0.01785 |       0.00000 |       0.00870 |       0.00586 |       0.64947
     -0.01902 |       0.00000 |       0.00850 |       0.00591 |       0.64914
     -0.02027 |       0.00000 |       0.00836 |       0.00630 |       0.65007
Evaluating losses...
     -0.02242 |       0.00000 |       0.00818 |       0.00663 |      

     -0.02157 |       0.00000 |       0.00958 |       0.00598 |       0.64915
     -0.02236 |       0.00000 |       0.00938 |       0.00641 |       0.64959
Evaluating losses...
     -0.02455 |       0.00000 |       0.00906 |       0.00663 |       0.64939
----------------------------------
| EpLenMean       | 662          |
| EpRewMean       | -4.85        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 5602         |
| TimeElapsed     | 9.99e+03     |
| TimestepsSoFar  | 3538944      |
| ev_tdlam_before | 0.831        |
| loss_ent        | 0.6493857    |
| loss_kl         | 0.0066273436 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.02454893  |
| loss_vf_loss    | 0.0090553155 |
----------------------------------
********** Iteration 864 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00104 |       0.00000 |       0.01713 |       0.00147 |       0.64233
     -0.00649 |       0.00000 |       0.01458 |

********** Iteration 869 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     7.93e-05 |       0.00000 |       0.01395 |       0.00129 |       0.62352
     -0.00734 |       0.00000 |       0.01162 |       0.00274 |       0.62287
     -0.01047 |       0.00000 |       0.01109 |       0.00318 |       0.62241
     -0.01255 |       0.00000 |       0.01048 |       0.00378 |       0.62445
     -0.01441 |       0.00000 |       0.01031 |       0.00413 |       0.62237
     -0.01663 |       0.00000 |       0.01006 |       0.00486 |       0.62344
     -0.01742 |       0.00000 |       0.00985 |       0.00508 |       0.62240
     -0.01892 |       0.00000 |       0.00962 |       0.00547 |       0.62301
     -0.01989 |       0.00000 |       0.00958 |       0.00613 |       0.62487
     -0.02006 |       0.00000 |       0.00938 |       0.00656 |       0.62380
Evaluating losses...
     -0.02128 |       0.00000 |       0.00910 |       0.00768 |      

     -0.02009 |       0.00000 |       0.00880 |       0.00568 |       0.62943
     -0.02172 |       0.00000 |       0.00867 |       0.00600 |       0.63093
Evaluating losses...
     -0.02345 |       0.00000 |       0.00835 |       0.00625 |       0.62957
----------------------------------
| EpLenMean       | 634          |
| EpRewMean       | -4.83        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 5673         |
| TimeElapsed     | 1.01e+04     |
| TimestepsSoFar  | 3584000      |
| ev_tdlam_before | 0.833        |
| loss_ent        | 0.6295654    |
| loss_kl         | 0.0062510213 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.023447827 |
| loss_vf_loss    | 0.008348663  |
----------------------------------
********** Iteration 875 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -5.81e-05 |       0.00000 |       0.01407 |       0.00088 |       0.59946
     -0.00769 |       0.00000 |       0.01221 |

********** Iteration 880 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00021 |       0.00000 |       0.01564 |       0.00120 |       0.59569
     -0.00723 |       0.00000 |       0.01298 |       0.00247 |       0.59712
     -0.01061 |       0.00000 |       0.01176 |       0.00303 |       0.59801
     -0.01342 |       0.00000 |       0.01108 |       0.00347 |       0.59824
     -0.01556 |       0.00000 |       0.01054 |       0.00401 |       0.59703
     -0.01643 |       0.00000 |       0.01020 |       0.00431 |       0.59738
     -0.01803 |       0.00000 |       0.00985 |       0.00463 |       0.59620
     -0.01897 |       0.00000 |       0.00963 |       0.00504 |       0.59735
     -0.02036 |       0.00000 |       0.00933 |       0.00544 |       0.59584
     -0.02124 |       0.00000 |       0.00926 |       0.00591 |       0.59648
Evaluating losses...
     -0.02289 |       0.00000 |       0.00888 |       0.00580 |      

     -0.02016 |       0.00000 |       0.01042 |       0.00650 |       0.62685
     -0.02081 |       0.00000 |       0.01025 |       0.00673 |       0.62781
Evaluating losses...
     -0.02315 |       0.00000 |       0.00984 |       0.00678 |       0.62797
----------------------------------
| EpLenMean       | 667          |
| EpRewMean       | -4.76        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 5740         |
| TimeElapsed     | 1.01e+04     |
| TimestepsSoFar  | 3629056      |
| ev_tdlam_before | 0.838        |
| loss_ent        | 0.627975     |
| loss_kl         | 0.0067840363 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.023152132 |
| loss_vf_loss    | 0.009837603  |
----------------------------------
********** Iteration 886 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00161 |       0.00000 |       0.01166 |       0.00122 |       0.60628
     -0.00655 |       0.00000 |       0.00981 |

********** Iteration 891 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00110 |       0.00000 |       0.01372 |       0.00120 |       0.62565
     -0.00648 |       0.00000 |       0.01102 |       0.00248 |       0.62520
     -0.00986 |       0.00000 |       0.00994 |       0.00399 |       0.62871
     -0.01188 |       0.00000 |       0.00923 |       0.00388 |       0.63016
     -0.01432 |       0.00000 |       0.00884 |       0.00422 |       0.63081
     -0.01533 |       0.00000 |       0.00850 |       0.00455 |       0.63048
     -0.01614 |       0.00000 |       0.00832 |       0.00512 |       0.63100
     -0.01796 |       0.00000 |       0.00811 |       0.00527 |       0.63144
     -0.01847 |       0.00000 |       0.00792 |       0.00573 |       0.63248
     -0.01944 |       0.00000 |       0.00778 |       0.00603 |       0.63130
Evaluating losses...
     -0.02109 |       0.00000 |       0.00750 |       0.00653 |      

     -0.02133 |       0.00000 |       0.00752 |       0.00633 |       0.62954
     -0.02260 |       0.00000 |       0.00732 |       0.00696 |       0.62976
Evaluating losses...
     -0.02426 |       0.00000 |       0.00713 |       0.00696 |       0.63153
----------------------------------
| EpLenMean       | 673          |
| EpRewMean       | -4.87        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 5806         |
| TimeElapsed     | 1.02e+04     |
| TimestepsSoFar  | 3674112      |
| ev_tdlam_before | 0.877        |
| loss_ent        | 0.6315334    |
| loss_kl         | 0.0069631967 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.024258446 |
| loss_vf_loss    | 0.0071340953 |
----------------------------------
********** Iteration 897 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00138 |       0.00000 |       0.01919 |       0.00127 |       0.67457
     -0.00715 |       0.00000 |       0.01546 |

********** Iteration 902 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00065 |       0.00000 |       0.01393 |       0.00134 |       0.61991
     -0.00852 |       0.00000 |       0.01115 |       0.00245 |       0.61728
     -0.01287 |       0.00000 |       0.01016 |       0.00319 |       0.61775
     -0.01518 |       0.00000 |       0.00977 |       0.00391 |       0.61860
     -0.01647 |       0.00000 |       0.00932 |       0.00438 |       0.62016
     -0.01835 |       0.00000 |       0.00890 |       0.00473 |       0.61878
     -0.01946 |       0.00000 |       0.00862 |       0.00541 |       0.61992
     -0.02068 |       0.00000 |       0.00846 |       0.00547 |       0.61965
     -0.02122 |       0.00000 |       0.00828 |       0.00611 |       0.61954
     -0.02224 |       0.00000 |       0.00818 |       0.00687 |       0.61769
Evaluating losses...
     -0.02457 |       0.00000 |       0.00782 |       0.00697 |      

     -0.01973 |       0.00000 |       0.00914 |       0.00567 |       0.59344
     -0.02079 |       0.00000 |       0.00907 |       0.00608 |       0.59418
Evaluating losses...
     -0.02298 |       0.00000 |       0.00873 |       0.00628 |       0.59471
----------------------------------
| EpLenMean       | 656          |
| EpRewMean       | -4.84        |
| EpThisIter      | 5            |
| EpisodesSoFar   | 5876         |
| TimeElapsed     | 1.02e+04     |
| TimestepsSoFar  | 3719168      |
| ev_tdlam_before | 0.794        |
| loss_ent        | 0.59471154   |
| loss_kl         | 0.0062771398 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022980338 |
| loss_vf_loss    | 0.0087272925 |
----------------------------------
********** Iteration 908 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00039 |       0.00000 |       0.01395 |       0.00089 |       0.63137
     -0.00724 |       0.00000 |       0.01203 |

********** Iteration 913 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -5.37e-06 |       0.00000 |       0.01319 |       0.00141 |       0.62391
     -0.00584 |       0.00000 |       0.01096 |       0.00247 |       0.62717
     -0.00955 |       0.00000 |       0.01004 |       0.00340 |       0.62647
     -0.01198 |       0.00000 |       0.00960 |       0.00361 |       0.62843
     -0.01323 |       0.00000 |       0.00921 |       0.00457 |       0.62667
     -0.01444 |       0.00000 |       0.00882 |       0.00441 |       0.62784
     -0.01553 |       0.00000 |       0.00860 |       0.00497 |       0.62818
     -0.01697 |       0.00000 |       0.00843 |       0.00540 |       0.63051
     -0.01819 |       0.00000 |       0.00828 |       0.00557 |       0.63174
     -0.01852 |       0.00000 |       0.00825 |       0.00650 |       0.63134
Evaluating losses...
     -0.02097 |       0.00000 |       0.00787 |       0.00644 |      

     -0.02016 |       0.00000 |       0.01077 |       0.00618 |       0.64343
     -0.02150 |       0.00000 |       0.01049 |       0.00644 |       0.64547
     -0.02189 |       0.00000 |       0.01031 |       0.00706 |       0.64500
Evaluating losses...
     -0.02399 |       0.00000 |       0.00995 |       0.00718 |       0.64410
----------------------------------
| EpLenMean       | 644          |
| EpRewMean       | -4.83        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 5946         |
| TimeElapsed     | 1.09e+04     |
| TimestepsSoFar  | 3764224      |
| ev_tdlam_before | 0.835        |
| loss_ent        | 0.64410454   |
| loss_kl         | 0.0071835523 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.023994677 |
| loss_vf_loss    | 0.009945298  |
----------------------------------
********** Iteration 919 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00217 |       0.00000 |       0.01482 |

********** Iteration 924 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00049 |       0.00000 |       0.01402 |       0.00088 |       0.65052
     -0.00893 |       0.00000 |       0.01140 |       0.00208 |       0.64904
     -0.01228 |       0.00000 |       0.01063 |       0.00328 |       0.64837
     -0.01417 |       0.00000 |       0.01007 |       0.00372 |       0.64821
     -0.01630 |       0.00000 |       0.00960 |       0.00416 |       0.64854
     -0.01727 |       0.00000 |       0.00936 |       0.00450 |       0.64713
     -0.01798 |       0.00000 |       0.00908 |       0.00509 |       0.64700
     -0.01890 |       0.00000 |       0.00900 |       0.00537 |       0.64732
     -0.01951 |       0.00000 |       0.00876 |       0.00562 |       0.64793
     -0.02032 |       0.00000 |       0.00855 |       0.00613 |       0.64688
Evaluating losses...
     -0.02199 |       0.00000 |       0.00825 |       0.00618 |      

     -0.02162 |       0.00000 |       0.00882 |       0.00644 |       0.67428
     -0.02227 |       0.00000 |       0.00872 |       0.00675 |       0.67648
Evaluating losses...
     -0.02524 |       0.00000 |       0.00827 |       0.00716 |       0.67640
----------------------------------
| EpLenMean       | 658          |
| EpRewMean       | -4.82        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 6014         |
| TimeElapsed     | 1.09e+04     |
| TimestepsSoFar  | 3809280      |
| ev_tdlam_before | 0.822        |
| loss_ent        | 0.6764044    |
| loss_kl         | 0.0071561257 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.025238238 |
| loss_vf_loss    | 0.008272756  |
----------------------------------
********** Iteration 930 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00032 |       0.00000 |       0.01343 |       0.00107 |       0.67007
     -0.00807 |       0.00000 |       0.01072 |

********** Iteration 935 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00045 |       0.00000 |       0.01936 |       0.00162 |       0.64814
     -0.00936 |       0.00000 |       0.01584 |       0.00327 |       0.64762
     -0.01244 |       0.00000 |       0.01414 |       0.00411 |       0.64953
     -0.01446 |       0.00000 |       0.01304 |       0.00452 |       0.64825
     -0.01630 |       0.00000 |       0.01232 |       0.00554 |       0.64969
     -0.01778 |       0.00000 |       0.01190 |       0.00617 |       0.64906
     -0.01915 |       0.00000 |       0.01134 |       0.00652 |       0.64943
     -0.01972 |       0.00000 |       0.01094 |       0.00671 |       0.64990
     -0.02082 |       0.00000 |       0.01062 |       0.00760 |       0.65171
     -0.02176 |       0.00000 |       0.01035 |       0.00844 |       0.65095
Evaluating losses...
     -0.02427 |       0.00000 |       0.00995 |       0.00843 |      

     -0.02088 |       0.00000 |       0.00952 |       0.00706 |       0.66647
     -0.02166 |       0.00000 |       0.00938 |       0.00722 |       0.66776
Evaluating losses...
     -0.02352 |       0.00000 |       0.00902 |       0.00733 |       0.66593
----------------------------------
| EpLenMean       | 678          |
| EpRewMean       | -4.75        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 6080         |
| TimeElapsed     | 1.1e+04      |
| TimestepsSoFar  | 3854336      |
| ev_tdlam_before | 0.866        |
| loss_ent        | 0.6659265    |
| loss_kl         | 0.0073260684 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.023515418 |
| loss_vf_loss    | 0.009019175  |
----------------------------------
********** Iteration 941 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00075 |       0.00000 |       0.01461 |       0.00096 |       0.64890
     -0.00642 |       0.00000 |       0.01189 |

********** Iteration 946 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00014 |       0.00000 |       0.01220 |       0.00131 |       0.68407
     -0.00966 |       0.00000 |       0.01024 |       0.00350 |       0.68653
     -0.01325 |       0.00000 |       0.00940 |       0.00433 |       0.68450
     -0.01635 |       0.00000 |       0.00893 |       0.00537 |       0.68473
     -0.01860 |       0.00000 |       0.00864 |       0.00573 |       0.68473
     -0.01992 |       0.00000 |       0.00833 |       0.00668 |       0.68270
     -0.02093 |       0.00000 |       0.00819 |       0.00673 |       0.68294
     -0.02213 |       0.00000 |       0.00806 |       0.00773 |       0.68413
     -0.02307 |       0.00000 |       0.00796 |       0.00830 |       0.68504
     -0.02397 |       0.00000 |       0.00775 |       0.00848 |       0.68321
Evaluating losses...
     -0.02609 |       0.00000 |       0.00766 |       0.00882 |      

     -0.01887 |       0.00000 |       0.01025 |       0.00588 |       0.65820
     -0.01979 |       0.00000 |       0.01004 |       0.00643 |       0.65710
Evaluating losses...
     -0.02169 |       0.00000 |       0.00969 |       0.00654 |       0.65640
----------------------------------
| EpLenMean       | 666          |
| EpRewMean       | -4.77        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 6148         |
| TimeElapsed     | 1.1e+04      |
| TimestepsSoFar  | 3899392      |
| ev_tdlam_before | 0.832        |
| loss_ent        | 0.6563974    |
| loss_kl         | 0.006537121  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.021693146 |
| loss_vf_loss    | 0.009689309  |
----------------------------------
********** Iteration 952 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00175 |       0.00000 |       0.01687 |       0.00081 |       0.67332
     -0.00517 |       0.00000 |       0.01402 |

********** Iteration 957 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00025 |       0.00000 |       0.01395 |       0.00084 |       0.67744
     -0.00690 |       0.00000 |       0.01144 |       0.00270 |       0.67494
     -0.01012 |       0.00000 |       0.01046 |       0.00320 |       0.67487
     -0.01294 |       0.00000 |       0.00993 |       0.00382 |       0.67430
     -0.01458 |       0.00000 |       0.00934 |       0.00452 |       0.67252
     -0.01574 |       0.00000 |       0.00904 |       0.00479 |       0.67087
     -0.01733 |       0.00000 |       0.00887 |       0.00580 |       0.67175
     -0.01852 |       0.00000 |       0.00859 |       0.00572 |       0.67061
     -0.01950 |       0.00000 |       0.00831 |       0.00631 |       0.66875
     -0.02079 |       0.00000 |       0.00810 |       0.00664 |       0.67094
Evaluating losses...
     -0.02232 |       0.00000 |       0.00783 |       0.00674 |      

     -0.02105 |       0.00000 |       0.00832 |       0.00589 |       0.67926
     -0.02163 |       0.00000 |       0.00816 |       0.00624 |       0.67905
Evaluating losses...
     -0.02453 |       0.00000 |       0.00786 |       0.00661 |       0.67946
----------------------------------
| EpLenMean       | 639          |
| EpRewMean       | -4.89        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 6219         |
| TimeElapsed     | 1.11e+04     |
| TimestepsSoFar  | 3944448      |
| ev_tdlam_before | 0.845        |
| loss_ent        | 0.6794611    |
| loss_kl         | 0.006610064  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.024525786 |
| loss_vf_loss    | 0.007857809  |
----------------------------------
********** Iteration 963 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00057 |       0.00000 |       0.01868 |       0.00129 |       0.67559
     -0.00746 |       0.00000 |       0.01431 |

********** Iteration 968 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00107 |       0.00000 |       0.01931 |       0.00151 |       0.69986
     -0.00781 |       0.00000 |       0.01474 |       0.00331 |       0.69995
     -0.01222 |       0.00000 |       0.01319 |       0.00403 |       0.69917
     -0.01491 |       0.00000 |       0.01223 |       0.00457 |       0.69898
     -0.01724 |       0.00000 |       0.01172 |       0.00513 |       0.69881
     -0.01905 |       0.00000 |       0.01130 |       0.00595 |       0.69935
     -0.01987 |       0.00000 |       0.01112 |       0.00693 |       0.69789
     -0.02093 |       0.00000 |       0.01082 |       0.00735 |       0.69842
     -0.02219 |       0.00000 |       0.01050 |       0.00756 |       0.69893
     -0.02261 |       0.00000 |       0.01030 |       0.00766 |       0.69772
Evaluating losses...
     -0.02509 |       0.00000 |       0.01012 |       0.00803 |      

     -0.01893 |       0.00000 |       0.00970 |       0.00580 |       0.71355
     -0.01945 |       0.00000 |       0.00950 |       0.00619 |       0.71404
Evaluating losses...
     -0.02200 |       0.00000 |       0.00914 |       0.00618 |       0.71450
----------------------------------
| EpLenMean       | 634          |
| EpRewMean       | -4.8         |
| EpThisIter      | 8            |
| EpisodesSoFar   | 6291         |
| TimeElapsed     | 1.12e+04     |
| TimestepsSoFar  | 3989504      |
| ev_tdlam_before | 0.836        |
| loss_ent        | 0.71449524   |
| loss_kl         | 0.006175951  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022000007 |
| loss_vf_loss    | 0.009141494  |
----------------------------------
********** Iteration 974 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.13e-05 |       0.00000 |       0.01580 |       0.00109 |       0.69195
     -0.00847 |       0.00000 |       0.01326 |

********** Iteration 979 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00077 |       0.00000 |       0.01335 |       0.00131 |       0.70016
     -0.00652 |       0.00000 |       0.01139 |       0.00189 |       0.69927
     -0.01097 |       0.00000 |       0.01071 |       0.00288 |       0.69713
     -0.01260 |       0.00000 |       0.01028 |       0.00377 |       0.69696
     -0.01504 |       0.00000 |       0.00994 |       0.00408 |       0.69734
     -0.01590 |       0.00000 |       0.00958 |       0.00478 |       0.69598
     -0.01721 |       0.00000 |       0.00940 |       0.00505 |       0.69551
     -0.01846 |       0.00000 |       0.00923 |       0.00536 |       0.69525
     -0.01864 |       0.00000 |       0.00898 |       0.00569 |       0.69672
     -0.02009 |       0.00000 |       0.00887 |       0.00614 |       0.69490
Evaluating losses...
     -0.02236 |       0.00000 |       0.00862 |       0.00616 |      

     -0.01943 |       0.00000 |       0.00818 |       0.00733 |       0.71457
     -0.02058 |       0.00000 |       0.00809 |       0.00746 |       0.71490
Evaluating losses...
     -0.02293 |       0.00000 |       0.00784 |       0.00749 |       0.71472
----------------------------------
| EpLenMean       | 638          |
| EpRewMean       | -4.88        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 6360         |
| TimeElapsed     | 1.17e+04     |
| TimestepsSoFar  | 4034560      |
| ev_tdlam_before | 0.83         |
| loss_ent        | 0.714719     |
| loss_kl         | 0.0074910168 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022932468 |
| loss_vf_loss    | 0.0078433845 |
----------------------------------
********** Iteration 985 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00034 |       0.00000 |       0.01416 |       0.00099 |       0.67517
     -0.00726 |       0.00000 |       0.01191 |

********** Iteration 990 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00050 |       0.00000 |       0.01329 |       0.00086 |       0.69567
     -0.00817 |       0.00000 |       0.01080 |       0.00193 |       0.69962
     -0.01150 |       0.00000 |       0.00977 |       0.00288 |       0.69970
     -0.01351 |       0.00000 |       0.00920 |       0.00363 |       0.70009
     -0.01582 |       0.00000 |       0.00874 |       0.00417 |       0.70158
     -0.01729 |       0.00000 |       0.00849 |       0.00470 |       0.69887
     -0.01825 |       0.00000 |       0.00820 |       0.00520 |       0.70064
     -0.01900 |       0.00000 |       0.00803 |       0.00565 |       0.70022
     -0.01979 |       0.00000 |       0.00788 |       0.00582 |       0.70096
     -0.02134 |       0.00000 |       0.00767 |       0.00632 |       0.70058
Evaluating losses...
     -0.02344 |       0.00000 |       0.00746 |       0.00660 |      

     -0.02076 |       0.00000 |       0.00877 |       0.00638 |       0.69096
     -0.02132 |       0.00000 |       0.00855 |       0.00678 |       0.69019
Evaluating losses...
     -0.02343 |       0.00000 |       0.00839 |       0.00680 |       0.69027
----------------------------------
| EpLenMean       | 637          |
| EpRewMean       | -4.89        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 6431         |
| TimeElapsed     | 1.18e+04     |
| TimestepsSoFar  | 4079616      |
| ev_tdlam_before | 0.838        |
| loss_ent        | 0.69027436   |
| loss_kl         | 0.0067970646 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.023433873 |
| loss_vf_loss    | 0.008390546  |
----------------------------------
********** Iteration 996 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00024 |       0.00000 |       0.01453 |       0.00149 |       0.69731
     -0.00900 |       0.00000 |       0.01240 |

********** Iteration 1001 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00070 |       0.00000 |       0.01708 |       0.00062 |       0.68541
     -0.00734 |       0.00000 |       0.01364 |       0.00213 |       0.68681
     -0.01173 |       0.00000 |       0.01249 |       0.00304 |       0.68699
     -0.01309 |       0.00000 |       0.01165 |       0.00365 |       0.68785
     -0.01469 |       0.00000 |       0.01115 |       0.00414 |       0.68559
     -0.01648 |       0.00000 |       0.01075 |       0.00469 |       0.68563
     -0.01731 |       0.00000 |       0.01042 |       0.00518 |       0.68515
     -0.01878 |       0.00000 |       0.01016 |       0.00533 |       0.68491
     -0.01927 |       0.00000 |       0.00982 |       0.00605 |       0.68561
     -0.02014 |       0.00000 |       0.00963 |       0.00628 |       0.68485
Evaluating losses...
     -0.02230 |       0.00000 |       0.00926 |       0.00659 |     

     -0.02135 |       0.00000 |       0.00808 |       0.00588 |       0.66391
     -0.02222 |       0.00000 |       0.00796 |       0.00634 |       0.66309
Evaluating losses...
     -0.02464 |       0.00000 |       0.00774 |       0.00657 |       0.66376
----------------------------------
| EpLenMean       | 658          |
| EpRewMean       | -4.82        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 6499         |
| TimeElapsed     | 1.18e+04     |
| TimestepsSoFar  | 4124672      |
| ev_tdlam_before | 0.846        |
| loss_ent        | 0.6637569    |
| loss_kl         | 0.0065668044 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.024639979 |
| loss_vf_loss    | 0.0077356775 |
----------------------------------
********** Iteration 1007 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00102 |       0.00000 |       0.01315 |       0.00135 |       0.68193
     -0.00656 |       0.00000 |       0.01139 

********** Iteration 1012 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00088 |       0.00000 |       0.01756 |       0.00136 |       0.65086
     -0.00887 |       0.00000 |       0.01475 |       0.00258 |       0.64890
     -0.01318 |       0.00000 |       0.01351 |       0.00345 |       0.64894
     -0.01514 |       0.00000 |       0.01264 |       0.00448 |       0.64884
     -0.01689 |       0.00000 |       0.01197 |       0.00480 |       0.64902
     -0.01824 |       0.00000 |       0.01146 |       0.00536 |       0.64975
     -0.01963 |       0.00000 |       0.01099 |       0.00554 |       0.64914
     -0.02025 |       0.00000 |       0.01060 |       0.00579 |       0.64955
     -0.02097 |       0.00000 |       0.01039 |       0.00679 |       0.64857
     -0.02159 |       0.00000 |       0.00992 |       0.00671 |       0.64932
Evaluating losses...
     -0.02351 |       0.00000 |       0.00958 |       0.00699 |     

     -0.01899 |       0.00000 |       0.00698 |       0.00622 |       0.62906
     -0.02016 |       0.00000 |       0.00685 |       0.00669 |       0.62843
Evaluating losses...
     -0.02218 |       0.00000 |       0.00670 |       0.00684 |       0.62868
----------------------------------
| EpLenMean       | 646          |
| EpRewMean       | -4.87        |
| EpThisIter      | 5            |
| EpisodesSoFar   | 6569         |
| TimeElapsed     | 1.19e+04     |
| TimestepsSoFar  | 4169728      |
| ev_tdlam_before | 0.86         |
| loss_ent        | 0.6286822    |
| loss_kl         | 0.0068386053 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.02218052  |
| loss_vf_loss    | 0.0066986233 |
----------------------------------
********** Iteration 1018 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00046 |       0.00000 |       0.01572 |       0.00106 |       0.65509
     -0.00883 |       0.00000 |       0.01312 

********** Iteration 1023 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00162 |       0.00000 |       0.01922 |       0.00111 |       0.68116
     -0.00755 |       0.00000 |       0.01562 |       0.00335 |       0.68423
     -0.01073 |       0.00000 |       0.01417 |       0.00332 |       0.68253
     -0.01290 |       0.00000 |       0.01338 |       0.00423 |       0.68197
     -0.01470 |       0.00000 |       0.01271 |       0.00432 |       0.68069
     -0.01615 |       0.00000 |       0.01233 |       0.00484 |       0.68127
     -0.01731 |       0.00000 |       0.01170 |       0.00584 |       0.68193
     -0.01840 |       0.00000 |       0.01127 |       0.00606 |       0.68183
     -0.01963 |       0.00000 |       0.01096 |       0.00609 |       0.68224
     -0.02017 |       0.00000 |       0.01067 |       0.00686 |       0.68039
Evaluating losses...
     -0.02272 |       0.00000 |       0.01029 |       0.00735 |     

     -0.01945 |       0.00000 |       0.00921 |       0.00621 |       0.62709
     -0.02068 |       0.00000 |       0.00906 |       0.00688 |       0.62744
Evaluating losses...
     -0.02237 |       0.00000 |       0.00880 |       0.00713 |       0.62714
----------------------------------
| EpLenMean       | 634          |
| EpRewMean       | -4.88        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 6641         |
| TimeElapsed     | 1.19e+04     |
| TimestepsSoFar  | 4214784      |
| ev_tdlam_before | 0.861        |
| loss_ent        | 0.62713504   |
| loss_kl         | 0.0071280925 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022365583 |
| loss_vf_loss    | 0.008800413  |
----------------------------------
********** Iteration 1029 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00038 |       0.00000 |       0.01490 |       0.00126 |       0.63314
     -0.00699 |       0.00000 |       0.01177 

********** Iteration 1034 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00029 |       0.00000 |       0.01550 |       0.00159 |       0.64838
     -0.00821 |       0.00000 |       0.01291 |       0.00304 |       0.64369
     -0.01100 |       0.00000 |       0.01179 |       0.00296 |       0.64318
     -0.01405 |       0.00000 |       0.01105 |       0.00329 |       0.64379
     -0.01517 |       0.00000 |       0.01044 |       0.00382 |       0.64300
     -0.01665 |       0.00000 |       0.01004 |       0.00438 |       0.64333
     -0.01803 |       0.00000 |       0.00972 |       0.00481 |       0.64373
     -0.01902 |       0.00000 |       0.00935 |       0.00495 |       0.64342
     -0.01974 |       0.00000 |       0.00917 |       0.00533 |       0.64426
     -0.02072 |       0.00000 |       0.00892 |       0.00544 |       0.64515
Evaluating losses...
     -0.02310 |       0.00000 |       0.00863 |       0.00554 |     

     -0.01811 |       0.00000 |       0.01048 |       0.00635 |       0.64015
     -0.01883 |       0.00000 |       0.01031 |       0.00648 |       0.64050
     -0.01992 |       0.00000 |       0.01013 |       0.00660 |       0.64023
Evaluating losses...
     -0.02198 |       0.00000 |       0.00979 |       0.00680 |       0.63932
----------------------------------
| EpLenMean       | 647          |
| EpRewMean       | -4.87        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 6711         |
| TimeElapsed     | 1.25e+04     |
| TimestepsSoFar  | 4259840      |
| ev_tdlam_before | 0.846        |
| loss_ent        | 0.6393238    |
| loss_kl         | 0.006795184  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.021983076 |
| loss_vf_loss    | 0.009788767  |
----------------------------------
********** Iteration 1040 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00115 |       0.00000 |       0.01092 

********** Iteration 1045 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00012 |       0.00000 |       0.01250 |       0.00092 |       0.63843
     -0.00839 |       0.00000 |       0.01072 |       0.00233 |       0.63505
     -0.01212 |       0.00000 |       0.01000 |       0.00361 |       0.63478
     -0.01451 |       0.00000 |       0.00931 |       0.00375 |       0.63533
     -0.01617 |       0.00000 |       0.00888 |       0.00445 |       0.63582
     -0.01760 |       0.00000 |       0.00854 |       0.00475 |       0.63553
     -0.01915 |       0.00000 |       0.00833 |       0.00492 |       0.63697
     -0.01976 |       0.00000 |       0.00813 |       0.00557 |       0.63556
     -0.02083 |       0.00000 |       0.00790 |       0.00601 |       0.63701
     -0.02168 |       0.00000 |       0.00774 |       0.00656 |       0.63621
Evaluating losses...
     -0.02371 |       0.00000 |       0.00747 |       0.00645 |     

     -0.02077 |       0.00000 |       0.00936 |       0.00624 |       0.61129
     -0.02152 |       0.00000 |       0.00914 |       0.00647 |       0.61188
Evaluating losses...
     -0.02293 |       0.00000 |       0.00880 |       0.00713 |       0.61145
----------------------------------
| EpLenMean       | 661          |
| EpRewMean       | -4.91        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 6779         |
| TimeElapsed     | 1.26e+04     |
| TimestepsSoFar  | 4304896      |
| ev_tdlam_before | 0.844        |
| loss_ent        | 0.6114518    |
| loss_kl         | 0.007128458  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022927267 |
| loss_vf_loss    | 0.008801196  |
----------------------------------
********** Iteration 1051 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00099 |       0.00000 |       0.01393 |       0.00131 |       0.61183
     -0.00796 |       0.00000 |       0.01189 

********** Iteration 1056 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00013 |       0.00000 |       0.01838 |       0.00131 |       0.60285
     -0.00920 |       0.00000 |       0.01483 |       0.00306 |       0.60478
     -0.01311 |       0.00000 |       0.01358 |       0.00359 |       0.60606
     -0.01521 |       0.00000 |       0.01281 |       0.00428 |       0.60529
     -0.01704 |       0.00000 |       0.01235 |       0.00493 |       0.60627
     -0.01841 |       0.00000 |       0.01178 |       0.00568 |       0.60525
     -0.01929 |       0.00000 |       0.01138 |       0.00573 |       0.60469
     -0.02070 |       0.00000 |       0.01123 |       0.00638 |       0.60444
     -0.02176 |       0.00000 |       0.01080 |       0.00663 |       0.60641
     -0.02241 |       0.00000 |       0.01052 |       0.00713 |       0.60546
Evaluating losses...
     -0.02401 |       0.00000 |       0.01016 |       0.00746 |     

     -0.01800 |       0.00000 |       0.00793 |       0.00592 |       0.62617
     -0.01913 |       0.00000 |       0.00775 |       0.00638 |       0.62564
Evaluating losses...
     -0.02128 |       0.00000 |       0.00752 |       0.00672 |       0.62534
----------------------------------
| EpLenMean       | 663          |
| EpRewMean       | -4.86        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 6847         |
| TimeElapsed     | 1.26e+04     |
| TimestepsSoFar  | 4349952      |
| ev_tdlam_before | 0.878        |
| loss_ent        | 0.6253359    |
| loss_kl         | 0.006723295  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.02127716  |
| loss_vf_loss    | 0.0075204936 |
----------------------------------
********** Iteration 1062 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00067 |       0.00000 |       0.01324 |       0.00144 |       0.60153
     -0.00706 |       0.00000 |       0.01093 

********** Iteration 1067 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00122 |       0.00000 |       0.01507 |       0.00098 |       0.61747
     -0.00539 |       0.00000 |       0.01249 |       0.00228 |       0.61983
     -0.00964 |       0.00000 |       0.01139 |       0.00259 |       0.61857
     -0.01222 |       0.00000 |       0.01084 |       0.00329 |       0.61823
     -0.01351 |       0.00000 |       0.01045 |       0.00404 |       0.61928
     -0.01577 |       0.00000 |       0.01010 |       0.00489 |       0.61937
     -0.01768 |       0.00000 |       0.00997 |       0.00537 |       0.61844
     -0.01874 |       0.00000 |       0.00964 |       0.00620 |       0.61749
     -0.01978 |       0.00000 |       0.00941 |       0.00682 |       0.61984
     -0.02101 |       0.00000 |       0.00924 |       0.00706 |       0.61825
Evaluating losses...
     -0.02279 |       0.00000 |       0.00905 |       0.00744 |     

     -0.02018 |       0.00000 |       0.00945 |       0.00611 |       0.58962
     -0.02114 |       0.00000 |       0.00922 |       0.00617 |       0.58981
Evaluating losses...
     -0.02320 |       0.00000 |       0.00897 |       0.00639 |       0.58963
----------------------------------
| EpLenMean       | 659          |
| EpRewMean       | -4.89        |
| EpThisIter      | 5            |
| EpisodesSoFar   | 6914         |
| TimeElapsed     | 1.27e+04     |
| TimestepsSoFar  | 4395008      |
| ev_tdlam_before | 0.837        |
| loss_ent        | 0.58963      |
| loss_kl         | 0.006385566  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.023204386 |
| loss_vf_loss    | 0.008970067  |
----------------------------------
********** Iteration 1073 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00062 |       0.00000 |       0.01684 |       0.00083 |       0.60705
     -0.00752 |       0.00000 |       0.01391 

********** Iteration 1078 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00154 |       0.00000 |       0.01639 |       0.00149 |       0.58809
     -0.00773 |       0.00000 |       0.01325 |       0.00312 |       0.58675
     -0.01032 |       0.00000 |       0.01208 |       0.00306 |       0.58695
     -0.01275 |       0.00000 |       0.01139 |       0.00367 |       0.58685
     -0.01399 |       0.00000 |       0.01094 |       0.00394 |       0.58725
     -0.01539 |       0.00000 |       0.01071 |       0.00460 |       0.58806
     -0.01669 |       0.00000 |       0.01029 |       0.00452 |       0.58700
     -0.01763 |       0.00000 |       0.00996 |       0.00531 |       0.58802
     -0.01860 |       0.00000 |       0.00982 |       0.00560 |       0.58758
     -0.01947 |       0.00000 |       0.00968 |       0.00616 |       0.58911
Evaluating losses...
     -0.02106 |       0.00000 |       0.00948 |       0.00631 |     

     -0.01877 |       0.00000 |       0.00824 |       0.00572 |       0.62975
     -0.01902 |       0.00000 |       0.00804 |       0.00576 |       0.62997
Evaluating losses...
     -0.02182 |       0.00000 |       0.00779 |       0.00602 |       0.63049
----------------------------------
| EpLenMean       | 692          |
| EpRewMean       | -4.81        |
| EpThisIter      | 5            |
| EpisodesSoFar   | 6979         |
| TimeElapsed     | 1.27e+04     |
| TimestepsSoFar  | 4440064      |
| ev_tdlam_before | 0.853        |
| loss_ent        | 0.63048846   |
| loss_kl         | 0.0060156696 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.021821408 |
| loss_vf_loss    | 0.0077858646 |
----------------------------------
********** Iteration 1084 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00024 |       0.00000 |       0.01380 |       0.00097 |       0.62654
     -0.00771 |       0.00000 |       0.01155 

********** Iteration 1089 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00069 |       0.00000 |       0.01202 |       0.00088 |       0.61323
     -0.00735 |       0.00000 |       0.01017 |       0.00212 |       0.61422
     -0.01140 |       0.00000 |       0.00941 |       0.00255 |       0.61364
     -0.01340 |       0.00000 |       0.00887 |       0.00344 |       0.61437
     -0.01523 |       0.00000 |       0.00851 |       0.00413 |       0.61431
     -0.01721 |       0.00000 |       0.00818 |       0.00463 |       0.61527
     -0.01828 |       0.00000 |       0.00792 |       0.00462 |       0.61459
     -0.01981 |       0.00000 |       0.00770 |       0.00543 |       0.61548
     -0.02063 |       0.00000 |       0.00750 |       0.00611 |       0.61534
     -0.02184 |       0.00000 |       0.00738 |       0.00638 |       0.61566
Evaluating losses...
     -0.02406 |       0.00000 |       0.00726 |       0.00665 |     

     -0.01988 |       0.00000 |       0.00746 |       0.00604 |       0.62715
     -0.02083 |       0.00000 |       0.00739 |       0.00665 |       0.62671
Evaluating losses...
     -0.02291 |       0.00000 |       0.00718 |       0.00647 |       0.62717
----------------------------------
| EpLenMean       | 682          |
| EpRewMean       | -4.92        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 7046         |
| TimeElapsed     | 1.28e+04     |
| TimestepsSoFar  | 4485120      |
| ev_tdlam_before | 0.87         |
| loss_ent        | 0.6271731    |
| loss_kl         | 0.0064711324 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022912329 |
| loss_vf_loss    | 0.007178402  |
----------------------------------
********** Iteration 1095 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00030 |       0.00000 |       0.01517 |       0.00103 |       0.64124
     -0.00711 |       0.00000 |       0.01290 

********** Iteration 1100 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00061 |       0.00000 |       0.01325 |       0.00120 |       0.62768
     -0.00737 |       0.00000 |       0.01082 |       0.00254 |       0.62777
     -0.01114 |       0.00000 |       0.00977 |       0.00332 |       0.62780
     -0.01350 |       0.00000 |       0.00913 |       0.00329 |       0.62760
     -0.01503 |       0.00000 |       0.00863 |       0.00422 |       0.62796
     -0.01589 |       0.00000 |       0.00827 |       0.00439 |       0.62940
     -0.01733 |       0.00000 |       0.00791 |       0.00529 |       0.62925
     -0.01880 |       0.00000 |       0.00761 |       0.00539 |       0.62958
     -0.01941 |       0.00000 |       0.00749 |       0.00563 |       0.62830
     -0.02038 |       0.00000 |       0.00727 |       0.00621 |       0.62924
Evaluating losses...
     -0.02256 |       0.00000 |       0.00698 |       0.00649 |     

     -0.01967 |       0.00000 |       0.00993 |       0.00549 |       0.66181
     -0.02021 |       0.00000 |       0.00975 |       0.00567 |       0.66140
Evaluating losses...
     -0.02267 |       0.00000 |       0.00938 |       0.00581 |       0.66233
----------------------------------
| EpLenMean       | 731          |
| EpRewMean       | -4.88        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 7105         |
| TimeElapsed     | 1.34e+04     |
| TimestepsSoFar  | 4530176      |
| ev_tdlam_before | 0.849        |
| loss_ent        | 0.6623333    |
| loss_kl         | 0.005812714  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022672951 |
| loss_vf_loss    | 0.009375084  |
----------------------------------
********** Iteration 1106 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00031 |       0.00000 |       0.01646 |       0.00117 |       0.64301
     -0.00797 |       0.00000 |       0.01397 

********** Iteration 1111 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00079 |       0.00000 |       0.01477 |       0.00130 |       0.63164
     -0.00825 |       0.00000 |       0.01132 |       0.00301 |       0.63307
     -0.01104 |       0.00000 |       0.01033 |       0.00364 |       0.63217
     -0.01267 |       0.00000 |       0.00963 |       0.00416 |       0.63127
     -0.01512 |       0.00000 |       0.00923 |       0.00468 |       0.63079
     -0.01616 |       0.00000 |       0.00898 |       0.00531 |       0.63127
     -0.01685 |       0.00000 |       0.00876 |       0.00585 |       0.63105
     -0.01765 |       0.00000 |       0.00858 |       0.00610 |       0.63157
     -0.01842 |       0.00000 |       0.00832 |       0.00634 |       0.63155
     -0.01930 |       0.00000 |       0.00825 |       0.00701 |       0.63202
Evaluating losses...
     -0.02097 |       0.00000 |       0.00799 |       0.00723 |     

     -0.01812 |       0.00000 |       0.01007 |       0.00563 |       0.63539
     -0.01937 |       0.00000 |       0.00980 |       0.00615 |       0.63531
Evaluating losses...
     -0.02112 |       0.00000 |       0.00957 |       0.00662 |       0.63543
----------------------------------
| EpLenMean       | 726          |
| EpRewMean       | -4.82        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 7169         |
| TimeElapsed     | 1.35e+04     |
| TimestepsSoFar  | 4575232      |
| ev_tdlam_before | 0.847        |
| loss_ent        | 0.635426     |
| loss_kl         | 0.0066167302 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.021122582 |
| loss_vf_loss    | 0.009565472  |
----------------------------------
********** Iteration 1117 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00052 |       0.00000 |       0.01253 |       0.00096 |       0.61211
     -0.00632 |       0.00000 |       0.01051 

********** Iteration 1122 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00085 |       0.00000 |       0.01536 |       0.00084 |       0.62539
     -0.00716 |       0.00000 |       0.01243 |       0.00142 |       0.62704
     -0.01037 |       0.00000 |       0.01108 |       0.00243 |       0.62678
     -0.01285 |       0.00000 |       0.01029 |       0.00317 |       0.62762
     -0.01449 |       0.00000 |       0.00984 |       0.00343 |       0.62691
     -0.01594 |       0.00000 |       0.00927 |       0.00406 |       0.62725
     -0.01706 |       0.00000 |       0.00904 |       0.00455 |       0.62628
     -0.01784 |       0.00000 |       0.00870 |       0.00504 |       0.62631
     -0.01907 |       0.00000 |       0.00848 |       0.00556 |       0.62705
     -0.01993 |       0.00000 |       0.00830 |       0.00576 |       0.62535
Evaluating losses...
     -0.02195 |       0.00000 |       0.00801 |       0.00594 |     

     -0.01857 |       0.00000 |       0.00972 |       0.00578 |       0.59793
     -0.01923 |       0.00000 |       0.00939 |       0.00598 |       0.59813
Evaluating losses...
     -0.02124 |       0.00000 |       0.00906 |       0.00637 |       0.59828
----------------------------------
| EpLenMean       | 761          |
| EpRewMean       | -4.83        |
| EpThisIter      | 4            |
| EpisodesSoFar   | 7225         |
| TimeElapsed     | 1.35e+04     |
| TimestepsSoFar  | 4620288      |
| ev_tdlam_before | 0.796        |
| loss_ent        | 0.59828496   |
| loss_kl         | 0.0063681314 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.021236897 |
| loss_vf_loss    | 0.009063129  |
----------------------------------
********** Iteration 1128 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00091 |       0.00000 |       0.01216 |       0.00091 |       0.61336
     -0.00803 |       0.00000 |       0.00995 

********** Iteration 1133 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     9.10e-06 |       0.00000 |       0.01667 |       0.00130 |       0.63566
     -0.00795 |       0.00000 |       0.01380 |       0.00291 |       0.63489
     -0.01184 |       0.00000 |       0.01267 |       0.00345 |       0.63553
     -0.01356 |       0.00000 |       0.01214 |       0.00438 |       0.63393
     -0.01607 |       0.00000 |       0.01168 |       0.00476 |       0.63431
     -0.01712 |       0.00000 |       0.01124 |       0.00534 |       0.63319
     -0.01817 |       0.00000 |       0.01090 |       0.00561 |       0.63540
     -0.01941 |       0.00000 |       0.01065 |       0.00640 |       0.63414
     -0.02020 |       0.00000 |       0.01042 |       0.00645 |       0.63298
     -0.02105 |       0.00000 |       0.01010 |       0.00712 |       0.63338
Evaluating losses...
     -0.02301 |       0.00000 |       0.00988 |       0.00730 |     

     -0.01874 |       0.00000 |       0.00958 |       0.00488 |       0.60097
     -0.01958 |       0.00000 |       0.00935 |       0.00522 |       0.60090
Evaluating losses...
     -0.02184 |       0.00000 |       0.00897 |       0.00521 |       0.60108
----------------------------------
| EpLenMean       | 770          |
| EpRewMean       | -4.72        |
| EpThisIter      | 5            |
| EpisodesSoFar   | 7285         |
| TimeElapsed     | 1.36e+04     |
| TimestepsSoFar  | 4665344      |
| ev_tdlam_before | 0.837        |
| loss_ent        | 0.6010829    |
| loss_kl         | 0.0052092373 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.021835865 |
| loss_vf_loss    | 0.008972     |
----------------------------------
********** Iteration 1139 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00031 |       0.00000 |       0.01776 |       0.00129 |       0.63214
     -0.00925 |       0.00000 |       0.01478 

********** Iteration 1144 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00084 |       0.00000 |       0.01477 |       0.00102 |       0.63946
     -0.00684 |       0.00000 |       0.01231 |       0.00232 |       0.63825
     -0.00980 |       0.00000 |       0.01136 |       0.00298 |       0.63779
     -0.01172 |       0.00000 |       0.01059 |       0.00318 |       0.63812
     -0.01336 |       0.00000 |       0.01018 |       0.00354 |       0.63814
     -0.01481 |       0.00000 |       0.00977 |       0.00430 |       0.63889
     -0.01679 |       0.00000 |       0.00949 |       0.00468 |       0.63784
     -0.01750 |       0.00000 |       0.00931 |       0.00496 |       0.63938
     -0.01874 |       0.00000 |       0.00905 |       0.00539 |       0.63852
     -0.01939 |       0.00000 |       0.00882 |       0.00586 |       0.63958
Evaluating losses...
     -0.02181 |       0.00000 |       0.00860 |       0.00588 |     

     -0.01885 |       0.00000 |       0.01146 |       0.00640 |       0.58788
     -0.01949 |       0.00000 |       0.01120 |       0.00695 |       0.58662
Evaluating losses...
     -0.02235 |       0.00000 |       0.01079 |       0.00704 |       0.58670
----------------------------------
| EpLenMean       | 750          |
| EpRewMean       | -4.72        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 7343         |
| TimeElapsed     | 1.36e+04     |
| TimestepsSoFar  | 4710400      |
| ev_tdlam_before | 0.789        |
| loss_ent        | 0.5866999    |
| loss_kl         | 0.0070437007 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.02234611  |
| loss_vf_loss    | 0.010794996  |
----------------------------------
********** Iteration 1150 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -2.88e-05 |       0.00000 |       0.01577 |       0.00117 |       0.59729
     -0.00846 |       0.00000 |       0.01287 

********** Iteration 1155 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00064 |       0.00000 |       0.01222 |       0.00157 |       0.60202
     -0.00844 |       0.00000 |       0.01044 |       0.00262 |       0.60332
     -0.01183 |       0.00000 |       0.00972 |       0.00349 |       0.60374
     -0.01439 |       0.00000 |       0.00909 |       0.00422 |       0.60351
     -0.01650 |       0.00000 |       0.00877 |       0.00484 |       0.60457
     -0.01771 |       0.00000 |       0.00837 |       0.00546 |       0.60439
     -0.01867 |       0.00000 |       0.00818 |       0.00595 |       0.60591
     -0.01995 |       0.00000 |       0.00795 |       0.00614 |       0.60486
     -0.02094 |       0.00000 |       0.00773 |       0.00668 |       0.60541
     -0.02147 |       0.00000 |       0.00760 |       0.00698 |       0.60512
Evaluating losses...
     -0.02328 |       0.00000 |       0.00739 |       0.00784 |     

     -0.01842 |       0.00000 |       0.01130 |       0.00573 |       0.65851
     -0.01997 |       0.00000 |       0.01099 |       0.00553 |       0.65862
     -0.02099 |       0.00000 |       0.01078 |       0.00606 |       0.65909
     -0.02192 |       0.00000 |       0.01050 |       0.00643 |       0.65884
Evaluating losses...
     -0.02453 |       0.00000 |       0.01014 |       0.00652 |       0.65865
----------------------------------
| EpLenMean       | 762          |
| EpRewMean       | -4.85        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 7402         |
| TimeElapsed     | 1.43e+04     |
| TimestepsSoFar  | 4755456      |
| ev_tdlam_before | 0.806        |
| loss_ent        | 0.6586454    |
| loss_kl         | 0.006522882  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.024530904 |
| loss_vf_loss    | 0.010136848  |
----------------------------------
********** Iteration 1161 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss 

********** Iteration 1166 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00069 |       0.00000 |       0.01661 |       0.00093 |       0.58346
     -0.00895 |       0.00000 |       0.01332 |       0.00289 |       0.58199
     -0.01278 |       0.00000 |       0.01229 |       0.00355 |       0.58211
     -0.01506 |       0.00000 |       0.01143 |       0.00429 |       0.58199
     -0.01653 |       0.00000 |       0.01097 |       0.00489 |       0.58197
     -0.01788 |       0.00000 |       0.01057 |       0.00542 |       0.58265
     -0.01894 |       0.00000 |       0.01026 |       0.00566 |       0.58211
     -0.01974 |       0.00000 |       0.00992 |       0.00626 |       0.58131
     -0.02059 |       0.00000 |       0.00970 |       0.00667 |       0.58178
     -0.02140 |       0.00000 |       0.00950 |       0.00728 |       0.58290
Evaluating losses...
     -0.02321 |       0.00000 |       0.00910 |       0.00722 |     

     -0.01963 |       0.00000 |       0.00812 |       0.00567 |       0.59703
     -0.02027 |       0.00000 |       0.00799 |       0.00618 |       0.59624
Evaluating losses...
     -0.02243 |       0.00000 |       0.00778 |       0.00640 |       0.59635
----------------------------------
| EpLenMean       | 775          |
| EpRewMean       | -4.84        |
| EpThisIter      | 4            |
| EpisodesSoFar   | 7459         |
| TimeElapsed     | 1.43e+04     |
| TimestepsSoFar  | 4800512      |
| ev_tdlam_before | 0.849        |
| loss_ent        | 0.5963517    |
| loss_kl         | 0.0063977637 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022430716 |
| loss_vf_loss    | 0.0077774753 |
----------------------------------
********** Iteration 1172 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00069 |       0.00000 |       0.01430 |       0.00082 |       0.59422
     -0.00763 |       0.00000 |       0.01197 

********** Iteration 1177 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00084 |       0.00000 |       0.01307 |       0.00095 |       0.60187
     -0.00800 |       0.00000 |       0.01067 |       0.00236 |       0.60279
     -0.01184 |       0.00000 |       0.00995 |       0.00283 |       0.60051
     -0.01373 |       0.00000 |       0.00938 |       0.00343 |       0.60191
     -0.01586 |       0.00000 |       0.00896 |       0.00390 |       0.60124
     -0.01691 |       0.00000 |       0.00872 |       0.00442 |       0.60036
     -0.01858 |       0.00000 |       0.00838 |       0.00490 |       0.60084
     -0.01944 |       0.00000 |       0.00815 |       0.00542 |       0.59974
     -0.02029 |       0.00000 |       0.00800 |       0.00595 |       0.60063
     -0.02068 |       0.00000 |       0.00784 |       0.00659 |       0.60065
Evaluating losses...
     -0.02302 |       0.00000 |       0.00763 |       0.00686 |     

     -0.02121 |       0.00000 |       0.01010 |       0.00662 |       0.64608
     -0.02150 |       0.00000 |       0.00985 |       0.00701 |       0.64625
Evaluating losses...
     -0.02403 |       0.00000 |       0.00953 |       0.00700 |       0.64560
----------------------------------
| EpLenMean       | 790          |
| EpRewMean       | -4.79        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 7517         |
| TimeElapsed     | 1.44e+04     |
| TimestepsSoFar  | 4845568      |
| ev_tdlam_before | 0.821        |
| loss_ent        | 0.6455974    |
| loss_kl         | 0.00699881   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.024034787 |
| loss_vf_loss    | 0.009533738  |
----------------------------------
********** Iteration 1183 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00050 |       0.00000 |       0.01369 |       0.00139 |       0.59962
     -0.00723 |       0.00000 |       0.01175 

********** Iteration 1188 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00025 |       0.00000 |       0.01477 |       0.00105 |       0.62646
     -0.00768 |       0.00000 |       0.01231 |       0.00270 |       0.62590
     -0.01125 |       0.00000 |       0.01131 |       0.00344 |       0.62532
     -0.01357 |       0.00000 |       0.01064 |       0.00423 |       0.62638
     -0.01568 |       0.00000 |       0.01018 |       0.00478 |       0.62651
     -0.01731 |       0.00000 |       0.00980 |       0.00539 |       0.62538
     -0.01835 |       0.00000 |       0.00942 |       0.00594 |       0.62582
     -0.01897 |       0.00000 |       0.00926 |       0.00604 |       0.62521
     -0.02024 |       0.00000 |       0.00905 |       0.00671 |       0.62511
     -0.02103 |       0.00000 |       0.00888 |       0.00708 |       0.62552
Evaluating losses...
     -0.02277 |       0.00000 |       0.00858 |       0.00722 |     

     -0.01915 |       0.00000 |       0.00909 |       0.00608 |       0.61227
     -0.01897 |       0.00000 |       0.00884 |       0.00693 |       0.61215
Evaluating losses...
     -0.02186 |       0.00000 |       0.00861 |       0.00645 |       0.61363
----------------------------------
| EpLenMean       | 797          |
| EpRewMean       | -4.8         |
| EpThisIter      | 4            |
| EpisodesSoFar   | 7572         |
| TimeElapsed     | 1.44e+04     |
| TimestepsSoFar  | 4890624      |
| ev_tdlam_before | 0.799        |
| loss_ent        | 0.6136251    |
| loss_kl         | 0.0064510736 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.021861045 |
| loss_vf_loss    | 0.008610294  |
----------------------------------
********** Iteration 1194 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00080 |       0.00000 |       0.01203 |       0.00061 |       0.59345
     -0.00512 |       0.00000 |       0.01043 

********** Iteration 1199 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00201 |       0.00000 |       0.01242 |       0.00057 |       0.59262
     -0.00546 |       0.00000 |       0.01018 |       0.00170 |       0.59262
     -0.00801 |       0.00000 |       0.00921 |       0.00234 |       0.59186
     -0.01148 |       0.00000 |       0.00866 |       0.00279 |       0.59097
     -0.01279 |       0.00000 |       0.00827 |       0.00339 |       0.59099
     -0.01429 |       0.00000 |       0.00788 |       0.00382 |       0.59048
     -0.01537 |       0.00000 |       0.00770 |       0.00435 |       0.59117
     -0.01680 |       0.00000 |       0.00742 |       0.00492 |       0.59130
     -0.01776 |       0.00000 |       0.00722 |       0.00526 |       0.59029
     -0.01847 |       0.00000 |       0.00706 |       0.00557 |       0.59010
Evaluating losses...
     -0.02117 |       0.00000 |       0.00684 |       0.00581 |     

     -0.01917 |       0.00000 |       0.00778 |       0.00604 |       0.57110
     -0.02015 |       0.00000 |       0.00760 |       0.00659 |       0.57057
Evaluating losses...
     -0.02241 |       0.00000 |       0.00733 |       0.00688 |       0.57136
----------------------------------
| EpLenMean       | 861          |
| EpRewMean       | -4.8         |
| EpThisIter      | 4            |
| EpisodesSoFar   | 7621         |
| TimeElapsed     | 1.45e+04     |
| TimestepsSoFar  | 4935680      |
| ev_tdlam_before | 0.839        |
| loss_ent        | 0.57135546   |
| loss_kl         | 0.0068814377 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022410104 |
| loss_vf_loss    | 0.007332891  |
----------------------------------
********** Iteration 1205 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00063 |       0.00000 |       0.01505 |       0.00104 |       0.59099
     -0.00603 |       0.00000 |       0.01175 

********** Iteration 1210 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00081 |       0.00000 |       0.01425 |       0.00099 |       0.59788
     -0.00661 |       0.00000 |       0.01093 |       0.00219 |       0.59931
     -0.00992 |       0.00000 |       0.00981 |       0.00306 |       0.60212
     -0.01246 |       0.00000 |       0.00926 |       0.00309 |       0.59927
     -0.01454 |       0.00000 |       0.00878 |       0.00408 |       0.60127
     -0.01540 |       0.00000 |       0.00834 |       0.00458 |       0.59965
     -0.01657 |       0.00000 |       0.00808 |       0.00521 |       0.60030
     -0.01820 |       0.00000 |       0.00788 |       0.00562 |       0.60095
     -0.01878 |       0.00000 |       0.00764 |       0.00599 |       0.60036
     -0.01950 |       0.00000 |       0.00742 |       0.00668 |       0.60218
Evaluating losses...
     -0.02183 |       0.00000 |       0.00714 |       0.00685 |     

     -0.01985 |       0.00000 |       0.00780 |       0.00734 |       0.65613
     -0.02096 |       0.00000 |       0.00765 |       0.00746 |       0.65605
Evaluating losses...
     -0.02268 |       0.00000 |       0.00739 |       0.00836 |       0.65772
----------------------------------
| EpLenMean       | 917          |
| EpRewMean       | -4.77        |
| EpThisIter      | 5            |
| EpisodesSoFar   | 7671         |
| TimeElapsed     | 1.45e+04     |
| TimestepsSoFar  | 4980736      |
| ev_tdlam_before | 0.826        |
| loss_ent        | 0.6577186    |
| loss_kl         | 0.008359235  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022681015 |
| loss_vf_loss    | 0.007386633  |
----------------------------------
********** Iteration 1216 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00139 |       0.00000 |       0.01168 |       0.00074 |       0.66463
     -0.00787 |       0.00000 |       0.00996 

********** Iteration 1221 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00094 |       0.00000 |       0.01271 |       0.00194 |       0.62832
     -0.00794 |       0.00000 |       0.01007 |       0.00247 |       0.62662
     -0.01124 |       0.00000 |       0.00921 |       0.00299 |       0.62727
     -0.01393 |       0.00000 |       0.00870 |       0.00369 |       0.62830
     -0.01532 |       0.00000 |       0.00824 |       0.00432 |       0.62928
     -0.01661 |       0.00000 |       0.00801 |       0.00541 |       0.62816
     -0.01793 |       0.00000 |       0.00775 |       0.00557 |       0.62900
     -0.01896 |       0.00000 |       0.00749 |       0.00568 |       0.62853
     -0.01972 |       0.00000 |       0.00733 |       0.00666 |       0.62866
     -0.02086 |       0.00000 |       0.00720 |       0.00648 |       0.63081
Evaluating losses...
     -0.02305 |       0.00000 |       0.00695 |       0.00675 |     

     -0.01995 |       0.00000 |       0.00797 |       0.00577 |       0.62132
     -0.01965 |       0.00000 |       0.00780 |       0.00602 |       0.62162
Evaluating losses...
     -0.02266 |       0.00000 |       0.00762 |       0.00609 |       0.62188
----------------------------------
| EpLenMean       | 885          |
| EpRewMean       | -4.86        |
| EpThisIter      | 4            |
| EpisodesSoFar   | 7722         |
| TimeElapsed     | 1.52e+04     |
| TimestepsSoFar  | 5025792      |
| ev_tdlam_before | 0.741        |
| loss_ent        | 0.621884     |
| loss_kl         | 0.0060879868 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022663157 |
| loss_vf_loss    | 0.007618427  |
----------------------------------
********** Iteration 1227 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00212 |       0.00000 |       0.01188 |       0.00143 |       0.64030
     -0.00538 |       0.00000 |       0.01018 

********** Iteration 1232 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00217 |       0.00000 |       0.01254 |       0.00077 |       0.63632
     -0.00498 |       0.00000 |       0.01048 |       0.00261 |       0.64217
     -0.00816 |       0.00000 |       0.00986 |       0.00302 |       0.64155
     -0.01053 |       0.00000 |       0.00944 |       0.00342 |       0.64179
     -0.01237 |       0.00000 |       0.00915 |       0.00381 |       0.64266
     -0.01401 |       0.00000 |       0.00884 |       0.00458 |       0.64093
     -0.01474 |       0.00000 |       0.00861 |       0.00503 |       0.64363
     -0.01520 |       0.00000 |       0.00844 |       0.00514 |       0.64275
     -0.01659 |       0.00000 |       0.00823 |       0.00553 |       0.64265
     -0.01741 |       0.00000 |       0.00812 |       0.00626 |       0.64316
Evaluating losses...
     -0.01931 |       0.00000 |       0.00791 |       0.00639 |     

     -0.01928 |       0.00000 |       0.00844 |       0.00630 |       0.66806
     -0.02016 |       0.00000 |       0.00827 |       0.00697 |       0.66669
Evaluating losses...
     -0.02239 |       0.00000 |       0.00795 |       0.00705 |       0.66736
----------------------------------
| EpLenMean       | 926          |
| EpRewMean       | -4.91        |
| EpThisIter      | 4            |
| EpisodesSoFar   | 7768         |
| TimeElapsed     | 1.53e+04     |
| TimestepsSoFar  | 5070848      |
| ev_tdlam_before | 0.785        |
| loss_ent        | 0.66735893   |
| loss_kl         | 0.007047932  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022393426 |
| loss_vf_loss    | 0.007953458  |
----------------------------------
********** Iteration 1238 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00093 |       0.00000 |       0.01489 |       0.00106 |       0.62448
     -0.00636 |       0.00000 |       0.01181 

********** Iteration 1243 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00035 |       0.00000 |       0.01271 |       0.00107 |       0.61917
     -0.00790 |       0.00000 |       0.01047 |       0.00247 |       0.61752
     -0.01096 |       0.00000 |       0.00970 |       0.00332 |       0.61753
     -0.01348 |       0.00000 |       0.00911 |       0.00354 |       0.61763
     -0.01510 |       0.00000 |       0.00863 |       0.00391 |       0.61761
     -0.01599 |       0.00000 |       0.00837 |       0.00459 |       0.61746
     -0.01745 |       0.00000 |       0.00803 |       0.00496 |       0.61756
     -0.01844 |       0.00000 |       0.00778 |       0.00496 |       0.61702
     -0.01931 |       0.00000 |       0.00764 |       0.00545 |       0.61706
     -0.02048 |       0.00000 |       0.00743 |       0.00572 |       0.61694
Evaluating losses...
     -0.02179 |       0.00000 |       0.00715 |       0.00591 |     

     -0.02109 |       0.00000 |       0.01114 |       0.00596 |       0.63167
     -0.02240 |       0.00000 |       0.01099 |       0.00644 |       0.63250
Evaluating losses...
     -0.02442 |       0.00000 |       0.01067 |       0.00695 |       0.63383
----------------------------------
| EpLenMean       | 1.03e+03     |
| EpRewMean       | -4.81        |
| EpThisIter      | 3            |
| EpisodesSoFar   | 7807         |
| TimeElapsed     | 1.53e+04     |
| TimestepsSoFar  | 5115904      |
| ev_tdlam_before | 0.744        |
| loss_ent        | 0.6338335    |
| loss_kl         | 0.0069484985 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.024424093 |
| loss_vf_loss    | 0.010665169  |
----------------------------------
********** Iteration 1249 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00043 |       0.00000 |       0.01478 |       0.00144 |       0.62319
     -0.00717 |       0.00000 |       0.01182 

********** Iteration 1254 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00107 |       0.00000 |       0.01337 |       0.00102 |       0.60623
     -0.00711 |       0.00000 |       0.01021 |       0.00230 |       0.60718
     -0.01032 |       0.00000 |       0.00898 |       0.00293 |       0.60482
     -0.01292 |       0.00000 |       0.00809 |       0.00367 |       0.60460
     -0.01515 |       0.00000 |       0.00755 |       0.00364 |       0.60209
     -0.01667 |       0.00000 |       0.00716 |       0.00409 |       0.60217
     -0.01771 |       0.00000 |       0.00686 |       0.00478 |       0.60155
     -0.01870 |       0.00000 |       0.00661 |       0.00505 |       0.60073
     -0.01962 |       0.00000 |       0.00640 |       0.00594 |       0.60032
     -0.02023 |       0.00000 |       0.00616 |       0.00616 |       0.60060
Evaluating losses...
     -0.02208 |       0.00000 |       0.00590 |       0.00648 |     

     -0.02007 |       0.00000 |       0.00618 |       0.00633 |       0.62344
     -0.02082 |       0.00000 |       0.00607 |       0.00685 |       0.62280
Evaluating losses...
     -0.02300 |       0.00000 |       0.00583 |       0.00700 |       0.62333
----------------------------------
| EpLenMean       | 1.1e+03      |
| EpRewMean       | -4.73        |
| EpThisIter      | 3            |
| EpisodesSoFar   | 7848         |
| TimeElapsed     | 1.54e+04     |
| TimestepsSoFar  | 5160960      |
| ev_tdlam_before | 0.842        |
| loss_ent        | 0.62332815   |
| loss_kl         | 0.007001625  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022998922 |
| loss_vf_loss    | 0.005830819  |
----------------------------------
********** Iteration 1260 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00017 |       0.00000 |       0.01129 |       0.00099 |       0.60484
     -0.00819 |       0.00000 |       0.00890 

********** Iteration 1265 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00072 |       0.00000 |       0.01352 |       0.00113 |       0.67741
     -0.00714 |       0.00000 |       0.01078 |       0.00270 |       0.67622
     -0.01127 |       0.00000 |       0.00965 |       0.00319 |       0.67736
     -0.01376 |       0.00000 |       0.00898 |       0.00391 |       0.67759
     -0.01529 |       0.00000 |       0.00844 |       0.00451 |       0.67932
     -0.01671 |       0.00000 |       0.00808 |       0.00550 |       0.68019
     -0.01828 |       0.00000 |       0.00787 |       0.00548 |       0.67952
     -0.01904 |       0.00000 |       0.00754 |       0.00602 |       0.68095
     -0.02020 |       0.00000 |       0.00732 |       0.00648 |       0.68032
     -0.02073 |       0.00000 |       0.00714 |       0.00719 |       0.68100
Evaluating losses...
     -0.02287 |       0.00000 |       0.00686 |       0.00690 |     

     -0.01930 |       0.00000 |       0.00795 |       0.00585 |       0.62480
     -0.01967 |       0.00000 |       0.00780 |       0.00626 |       0.62626
Evaluating losses...
     -0.02227 |       0.00000 |       0.00749 |       0.00670 |       0.62462
----------------------------------
| EpLenMean       | 1.13e+03     |
| EpRewMean       | -4.74        |
| EpThisIter      | 3            |
| EpisodesSoFar   | 7887         |
| TimeElapsed     | 1.54e+04     |
| TimestepsSoFar  | 5206016      |
| ev_tdlam_before | 0.797        |
| loss_ent        | 0.6246243    |
| loss_kl         | 0.006699168  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022270977 |
| loss_vf_loss    | 0.007493421  |
----------------------------------
********** Iteration 1271 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00143 |       0.00000 |       0.01371 |       0.00084 |       0.65872
     -0.00666 |       0.00000 |       0.01087 

********** Iteration 1276 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00039 |       0.00000 |       0.01547 |       0.00154 |       0.61104
     -0.00833 |       0.00000 |       0.01200 |       0.00377 |       0.61664
     -0.01183 |       0.00000 |       0.01068 |       0.00313 |       0.61369
     -0.01409 |       0.00000 |       0.00991 |       0.00369 |       0.61362
     -0.01533 |       0.00000 |       0.00931 |       0.00423 |       0.61440
     -0.01687 |       0.00000 |       0.00896 |       0.00474 |       0.61546
     -0.01837 |       0.00000 |       0.00857 |       0.00508 |       0.61460
     -0.01956 |       0.00000 |       0.00834 |       0.00553 |       0.61590
     -0.02055 |       0.00000 |       0.00802 |       0.00583 |       0.61667
     -0.02120 |       0.00000 |       0.00788 |       0.00628 |       0.61604
Evaluating losses...
     -0.02325 |       0.00000 |       0.00760 |       0.00632 |     

     -0.01821 |       0.00000 |       0.00933 |       0.00547 |       0.64540
     -0.01874 |       0.00000 |       0.00902 |       0.00594 |       0.64428
     -0.01966 |       0.00000 |       0.00878 |       0.00660 |       0.64543
     -0.02082 |       0.00000 |       0.00856 |       0.00665 |       0.64584
Evaluating losses...
     -0.02331 |       0.00000 |       0.00821 |       0.00687 |       0.64572
----------------------------------
| EpLenMean       | 1.2e+03      |
| EpRewMean       | -4.83        |
| EpThisIter      | 3            |
| EpisodesSoFar   | 7923         |
| TimeElapsed     | 1.64e+04     |
| TimestepsSoFar  | 5251072      |
| ev_tdlam_before | 0.773        |
| loss_ent        | 0.6457197    |
| loss_kl         | 0.0068702498 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.023305498 |
| loss_vf_loss    | 0.008211873  |
----------------------------------
********** Iteration 1282 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss 

********** Iteration 1287 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00023 |       0.00000 |       0.00887 |       0.00158 |       0.64045
     -0.00821 |       0.00000 |       0.00782 |       0.00259 |       0.64131
     -0.01090 |       0.00000 |       0.00723 |       0.00365 |       0.64223
     -0.01380 |       0.00000 |       0.00686 |       0.00416 |       0.64103
     -0.01544 |       0.00000 |       0.00660 |       0.00445 |       0.64161
     -0.01747 |       0.00000 |       0.00639 |       0.00520 |       0.64058
     -0.01830 |       0.00000 |       0.00622 |       0.00635 |       0.64175
     -0.01953 |       0.00000 |       0.00602 |       0.00636 |       0.64102
     -0.02063 |       0.00000 |       0.00590 |       0.00659 |       0.64016
     -0.02174 |       0.00000 |       0.00583 |       0.00715 |       0.64132
Evaluating losses...
     -0.02277 |       0.00000 |       0.00558 |       0.00721 |     

     -0.02126 |       0.00000 |       0.00707 |       0.00640 |       0.63847
     -0.02207 |       0.00000 |       0.00687 |       0.00692 |       0.63899
Evaluating losses...
     -0.02424 |       0.00000 |       0.00657 |       0.00734 |       0.63824
---------------------------------
| EpLenMean       | 1.26e+03    |
| EpRewMean       | -4.88       |
| EpThisIter      | 3           |
| EpisodesSoFar   | 7956        |
| TimeElapsed     | 1.64e+04    |
| TimestepsSoFar  | 5296128     |
| ev_tdlam_before | 0.72        |
| loss_ent        | 0.6382354   |
| loss_kl         | 0.007338644 |
| loss_pol_entpen | 0.0         |
| loss_pol_surr   | -0.02423868 |
| loss_vf_loss    | 0.006570819 |
---------------------------------
********** Iteration 1293 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00022 |       0.00000 |       0.01789 |       0.00163 |       0.66966
     -0.00787 |       0.00000 |       0.01512 |       0.0026

********** Iteration 1298 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00080 |       0.00000 |       0.01221 |       0.00111 |       0.69587
     -0.00701 |       0.00000 |       0.00960 |       0.00214 |       0.69690
     -0.01096 |       0.00000 |       0.00878 |       0.00301 |       0.69577
     -0.01347 |       0.00000 |       0.00817 |       0.00363 |       0.69675
     -0.01531 |       0.00000 |       0.00773 |       0.00401 |       0.69525
     -0.01656 |       0.00000 |       0.00736 |       0.00433 |       0.69608
     -0.01768 |       0.00000 |       0.00700 |       0.00502 |       0.69632
     -0.01857 |       0.00000 |       0.00686 |       0.00515 |       0.69714
     -0.01921 |       0.00000 |       0.00661 |       0.00591 |       0.69668
     -0.02028 |       0.00000 |       0.00648 |       0.00623 |       0.69791
Evaluating losses...
     -0.02257 |       0.00000 |       0.00618 |       0.00640 |     

     -0.01814 |       0.00000 |       0.00655 |       0.00583 |       0.62033
     -0.01897 |       0.00000 |       0.00636 |       0.00609 |       0.62240
Evaluating losses...
     -0.02128 |       0.00000 |       0.00617 |       0.00670 |       0.62086
----------------------------------
| EpLenMean       | 1.37e+03     |
| EpRewMean       | -4.84        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 7985         |
| TimeElapsed     | 1.64e+04     |
| TimestepsSoFar  | 5341184      |
| ev_tdlam_before | 0.727        |
| loss_ent        | 0.62085867   |
| loss_kl         | 0.0067025875 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.021279462 |
| loss_vf_loss    | 0.006170621  |
----------------------------------
********** Iteration 1304 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00036 |       0.00000 |       0.01593 |       0.00158 |       0.67102
     -0.00897 |       0.00000 |       0.01285 

********** Iteration 1309 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00014 |       0.00000 |       0.01477 |       0.00113 |       0.68570
     -0.00801 |       0.00000 |       0.01257 |       0.00269 |       0.68571
     -0.01163 |       0.00000 |       0.01162 |       0.00301 |       0.68465
     -0.01422 |       0.00000 |       0.01104 |       0.00394 |       0.68455
     -0.01581 |       0.00000 |       0.01062 |       0.00429 |       0.68600
     -0.01690 |       0.00000 |       0.01031 |       0.00537 |       0.68401
     -0.01809 |       0.00000 |       0.01004 |       0.00551 |       0.68473
     -0.01944 |       0.00000 |       0.00978 |       0.00603 |       0.68501
     -0.02038 |       0.00000 |       0.00966 |       0.00653 |       0.68519
     -0.02136 |       0.00000 |       0.00938 |       0.00653 |       0.68628
Evaluating losses...
     -0.02341 |       0.00000 |       0.00906 |       0.00652 |     

     -0.02155 |       0.00000 |       0.00756 |       0.00670 |       0.66267
     -0.02209 |       0.00000 |       0.00739 |       0.00697 |       0.66301
Evaluating losses...
     -0.02462 |       0.00000 |       0.00713 |       0.00683 |       0.66145
----------------------------------
| EpLenMean       | 1.51e+03     |
| EpRewMean       | -4.84        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 8011         |
| TimeElapsed     | 1.65e+04     |
| TimestepsSoFar  | 5386240      |
| ev_tdlam_before | 0.676        |
| loss_ent        | 0.66144544   |
| loss_kl         | 0.006834713  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.024618356 |
| loss_vf_loss    | 0.0071263225 |
----------------------------------
********** Iteration 1315 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00101 |       0.00000 |       0.01167 |       0.00145 |       0.68473
     -0.00886 |       0.00000 |       0.00917 

********** Iteration 1320 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00045 |       0.00000 |       0.01018 |       0.00113 |       0.70917
     -0.00744 |       0.00000 |       0.00858 |       0.00299 |       0.71147
     -0.00951 |       0.00000 |       0.00790 |       0.00264 |       0.70952
     -0.01187 |       0.00000 |       0.00738 |       0.00357 |       0.71025
     -0.01357 |       0.00000 |       0.00703 |       0.00395 |       0.71094
     -0.01422 |       0.00000 |       0.00676 |       0.00433 |       0.70998
     -0.01599 |       0.00000 |       0.00656 |       0.00495 |       0.71009
     -0.01683 |       0.00000 |       0.00641 |       0.00588 |       0.71068
     -0.01730 |       0.00000 |       0.00619 |       0.00580 |       0.70922
     -0.01821 |       0.00000 |       0.00610 |       0.00642 |       0.71057
Evaluating losses...
     -0.02101 |       0.00000 |       0.00586 |       0.00640 |     

     -0.01886 |       0.00000 |       0.00907 |       0.00634 |       0.68808
     -0.01949 |       0.00000 |       0.00879 |       0.00692 |       0.68802
Evaluating losses...
     -0.02072 |       0.00000 |       0.00850 |       0.00751 |       0.69036
---------------------------------
| EpLenMean       | 1.64e+03    |
| EpRewMean       | -4.73       |
| EpThisIter      | 2           |
| EpisodesSoFar   | 8034        |
| TimeElapsed     | 1.65e+04    |
| TimestepsSoFar  | 5431296     |
| ev_tdlam_before | 0.608       |
| loss_ent        | 0.690356    |
| loss_kl         | 0.007511303 |
| loss_pol_entpen | 0.0         |
| loss_pol_surr   | -0.02071584 |
| loss_vf_loss    | 0.008502844 |
---------------------------------
********** Iteration 1326 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00072 |       0.00000 |       0.01580 |       0.00186 |       0.71013
     -0.00719 |       0.00000 |       0.01347 |       0.0027

********** Iteration 1331 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00011 |       0.00000 |       0.01174 |       0.00182 |       0.69092
     -0.00702 |       0.00000 |       0.00970 |       0.00286 |       0.68785
     -0.01041 |       0.00000 |       0.00870 |       0.00323 |       0.68829
     -0.01202 |       0.00000 |       0.00807 |       0.00391 |       0.69051
     -0.01389 |       0.00000 |       0.00773 |       0.00435 |       0.69265
     -0.01533 |       0.00000 |       0.00745 |       0.00481 |       0.69108
     -0.01607 |       0.00000 |       0.00712 |       0.00506 |       0.69260
     -0.01694 |       0.00000 |       0.00696 |       0.00565 |       0.69083
     -0.01737 |       0.00000 |       0.00674 |       0.00652 |       0.69281
     -0.01835 |       0.00000 |       0.00660 |       0.00683 |       0.69232
Evaluating losses...
     -0.02041 |       0.00000 |       0.00635 |       0.00666 |     

     -0.01866 |       0.00000 |       0.00309 |       0.00708 |       0.67289
     -0.01876 |       0.00000 |       0.00300 |       0.00721 |       0.67472
Evaluating losses...
     -0.02144 |       0.00000 |       0.00285 |       0.00740 |       0.67371
----------------------------------
| EpLenMean       | 1.81e+03     |
| EpRewMean       | -4.67        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 8056         |
| TimeElapsed     | 1.66e+04     |
| TimestepsSoFar  | 5476352      |
| ev_tdlam_before | 0.525        |
| loss_ent        | 0.6737145    |
| loss_kl         | 0.0074011534 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.021439958 |
| loss_vf_loss    | 0.0028549859 |
----------------------------------
********** Iteration 1337 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00084 |       0.00000 |       0.00770 |       0.00142 |       0.71026
     -0.00690 |       0.00000 |       0.00627 

********** Iteration 1342 ************
Eval num_timesteps=5496832, episode_reward=-4.56 +/- 0.78
Episode length: 2037.12 +/- 587.34
New best mean reward!
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00094 |       0.00000 |       0.01332 |       0.00205 |       0.66868
     -0.00582 |       0.00000 |       0.01084 |       0.00289 |       0.66914
     -0.00897 |       0.00000 |       0.00992 |       0.00308 |       0.66892
     -0.01164 |       0.00000 |       0.00941 |       0.00393 |       0.67123
     -0.01332 |       0.00000 |       0.00894 |       0.00418 |       0.67119
     -0.01378 |       0.00000 |       0.00874 |       0.00495 |       0.67277
     -0.01550 |       0.00000 |       0.00848 |       0.00569 |       0.67322
     -0.01663 |       0.00000 |       0.00828 |       0.00608 |       0.67167
     -0.01710 |       0.00000 |       0.00810 |       0.00698 |       0.67320
     -0.01834 |       0.00000 |       0.00799 |     

     -0.01660 |       0.00000 |       0.00848 |       0.00472 |       0.66663
     -0.01791 |       0.00000 |       0.00829 |       0.00558 |       0.66683
     -0.01780 |       0.00000 |       0.00804 |       0.00638 |       0.66860
     -0.01857 |       0.00000 |       0.00789 |       0.00645 |       0.66635
Evaluating losses...
     -0.02048 |       0.00000 |       0.00764 |       0.00729 |       0.66807
----------------------------------
| EpLenMean       | 2e+03        |
| EpRewMean       | -4.63        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 8074         |
| TimeElapsed     | 1.8e+04      |
| TimestepsSoFar  | 5521408      |
| ev_tdlam_before | 0.689        |
| loss_ent        | 0.66807085   |
| loss_kl         | 0.00728831   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020481298 |
| loss_vf_loss    | 0.007642692  |
----------------------------------
********** Iteration 1348 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss 

********** Iteration 1353 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -1.03e-06 |       0.00000 |       0.00775 |       0.00099 |       0.64020
     -0.00802 |       0.00000 |       0.00631 |       0.00247 |       0.64010
     -0.01091 |       0.00000 |       0.00583 |       0.00343 |       0.63969
     -0.01251 |       0.00000 |       0.00540 |       0.00408 |       0.63983
     -0.01487 |       0.00000 |       0.00512 |       0.00431 |       0.63911
     -0.01505 |       0.00000 |       0.00494 |       0.00491 |       0.63905
     -0.01614 |       0.00000 |       0.00481 |       0.00533 |       0.64101
     -0.01650 |       0.00000 |       0.00460 |       0.00586 |       0.63945
     -0.01790 |       0.00000 |       0.00446 |       0.00608 |       0.64006
     -0.01878 |       0.00000 |       0.00435 |       0.00685 |       0.64082
Evaluating losses...
     -0.02021 |       0.00000 |       0.00419 |       0.00673 |     

     -0.01967 |       0.00000 |       0.00608 |       0.00635 |       0.70795
     -0.02024 |       0.00000 |       0.00592 |       0.00672 |       0.70875
Evaluating losses...
     -0.02239 |       0.00000 |       0.00580 |       0.00704 |       0.70855
----------------------------------
| EpLenMean       | 2.17e+03     |
| EpRewMean       | -4.47        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 8091         |
| TimeElapsed     | 1.8e+04      |
| TimestepsSoFar  | 5566464      |
| ev_tdlam_before | 0.61         |
| loss_ent        | 0.7085461    |
| loss_kl         | 0.007038844  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022385698 |
| loss_vf_loss    | 0.0057994667 |
----------------------------------
********** Iteration 1359 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00157 |       0.00000 |       0.01200 |       0.00148 |       0.65678
     -0.00762 |       0.00000 |       0.01021 

********** Iteration 1364 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00150 |       0.00000 |       0.00767 |       0.00133 |       0.74651
     -0.00679 |       0.00000 |       0.00630 |       0.00240 |       0.74516
     -0.00961 |       0.00000 |       0.00568 |       0.00280 |       0.74561
     -0.01275 |       0.00000 |       0.00528 |       0.00355 |       0.74799
     -0.01364 |       0.00000 |       0.00497 |       0.00433 |       0.74736
     -0.01530 |       0.00000 |       0.00477 |       0.00464 |       0.74804
     -0.01636 |       0.00000 |       0.00461 |       0.00516 |       0.74890
     -0.01673 |       0.00000 |       0.00449 |       0.00572 |       0.74829
     -0.01892 |       0.00000 |       0.00439 |       0.00588 |       0.74878
     -0.01965 |       0.00000 |       0.00431 |       0.00646 |       0.75115
Evaluating losses...
     -0.02101 |       0.00000 |       0.00422 |       0.00698 |     

     -0.01922 |       0.00000 |       0.00221 |       0.00677 |       0.76014
     -0.02034 |       0.00000 |       0.00218 |       0.00716 |       0.76021
Evaluating losses...
     -0.02123 |       0.00000 |       0.00209 |       0.00744 |       0.75850
----------------------------------
| EpLenMean       | 2.32e+03     |
| EpRewMean       | -4.26        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 8108         |
| TimeElapsed     | 1.81e+04     |
| TimestepsSoFar  | 5611520      |
| ev_tdlam_before | 0.468        |
| loss_ent        | 0.7584986    |
| loss_kl         | 0.00744124   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.02122775  |
| loss_vf_loss    | 0.0020904173 |
----------------------------------
********** Iteration 1370 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00105 |       0.00000 |       0.00637 |       0.00232 |       0.68495
     -0.00685 |       0.00000 |       0.00522 

********** Iteration 1375 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00092 |       0.00000 |       0.00814 |       0.00070 |       0.71878
     -0.00505 |       0.00000 |       0.00554 |       0.00234 |       0.72054
     -0.00740 |       0.00000 |       0.00488 |       0.00348 |       0.72307
     -0.00951 |       0.00000 |       0.00448 |       0.00347 |       0.72216
     -0.01074 |       0.00000 |       0.00426 |       0.00418 |       0.72312
     -0.01196 |       0.00000 |       0.00401 |       0.00458 |       0.72506
     -0.01334 |       0.00000 |       0.00390 |       0.00492 |       0.72513
     -0.01387 |       0.00000 |       0.00374 |       0.00539 |       0.72502
     -0.01459 |       0.00000 |       0.00368 |       0.00590 |       0.72477
     -0.01559 |       0.00000 |       0.00353 |       0.00659 |       0.72603
Evaluating losses...
     -0.01623 |       0.00000 |       0.00339 |       0.00771 |     

     -0.01639 |       0.00000 |       0.00553 |       0.00874 |       0.74846
     -0.01745 |       0.00000 |       0.00532 |       0.00844 |       0.74758
Evaluating losses...
     -0.01937 |       0.00000 |       0.00521 |       0.00831 |       0.74869
----------------------------------
| EpLenMean       | 2.45e+03     |
| EpRewMean       | -4.02        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 8123         |
| TimeElapsed     | 1.81e+04     |
| TimestepsSoFar  | 5656576      |
| ev_tdlam_before | 0.573        |
| loss_ent        | 0.74868685   |
| loss_kl         | 0.008310787  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019372279 |
| loss_vf_loss    | 0.005211922  |
----------------------------------
********** Iteration 1381 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00187 |       0.00000 |       0.00835 |       0.00296 |       0.73485
     -0.00708 |       0.00000 |       0.00684 

********** Iteration 1386 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00031 |       0.00000 |       0.01074 |       0.00160 |       0.72174
     -0.00690 |       0.00000 |       0.00797 |       0.00229 |       0.72536
     -0.00916 |       0.00000 |       0.00728 |       0.00266 |       0.72284
     -0.01130 |       0.00000 |       0.00685 |       0.00340 |       0.72444
     -0.01237 |       0.00000 |       0.00649 |       0.00424 |       0.72462
     -0.01345 |       0.00000 |       0.00629 |       0.00468 |       0.72431
     -0.01460 |       0.00000 |       0.00610 |       0.00520 |       0.72455
     -0.01556 |       0.00000 |       0.00586 |       0.00589 |       0.72611
     -0.01607 |       0.00000 |       0.00572 |       0.00641 |       0.72478
     -0.01641 |       0.00000 |       0.00559 |       0.00673 |       0.72153
Evaluating losses...
     -0.01813 |       0.00000 |       0.00544 |       0.00714 |     

     -0.01859 |       0.00000 |       0.00415 |       0.00637 |       0.69162
     -0.01865 |       0.00000 |       0.00395 |       0.00690 |       0.69211
Evaluating losses...
     -0.02142 |       0.00000 |       0.00377 |       0.00711 |       0.69292
----------------------------------
| EpLenMean       | 2.62e+03     |
| EpRewMean       | -3.68        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 8139         |
| TimeElapsed     | 1.82e+04     |
| TimestepsSoFar  | 5701632      |
| ev_tdlam_before | 0.663        |
| loss_ent        | 0.6929241    |
| loss_kl         | 0.0071086828 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.021423642 |
| loss_vf_loss    | 0.0037734732 |
----------------------------------
********** Iteration 1392 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00069 |       0.00000 |       0.00906 |       0.00146 |       0.71145
     -0.00536 |       0.00000 |       0.00723 

********** Iteration 1397 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00228 |       0.00000 |       0.00744 |       0.00234 |       0.72997
     -0.00749 |       0.00000 |       0.00586 |       0.00318 |       0.72580
     -0.01099 |       0.00000 |       0.00520 |       0.00436 |       0.72916
     -0.01386 |       0.00000 |       0.00481 |       0.00598 |       0.72930
     -0.01475 |       0.00000 |       0.00444 |       0.00573 |       0.72986
     -0.01684 |       0.00000 |       0.00420 |       0.00624 |       0.72787
     -0.01802 |       0.00000 |       0.00404 |       0.00687 |       0.72978
     -0.01903 |       0.00000 |       0.00391 |       0.00804 |       0.72830
     -0.01936 |       0.00000 |       0.00370 |       0.00843 |       0.73014
     -0.02019 |       0.00000 |       0.00356 |       0.00879 |       0.72989
Evaluating losses...
     -0.02164 |       0.00000 |       0.00347 |       0.00896 |     

     -0.01815 |       0.00000 |       0.00412 |       0.00659 |       0.75094
     -0.01869 |       0.00000 |       0.00405 |       0.00666 |       0.75299
Evaluating losses...
     -0.02016 |       0.00000 |       0.00386 |       0.00672 |       0.74986
----------------------------------
| EpLenMean       | 2.71e+03     |
| EpRewMean       | -3.33        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 8155         |
| TimeElapsed     | 1.82e+04     |
| TimestepsSoFar  | 5746688      |
| ev_tdlam_before | 0.683        |
| loss_ent        | 0.74986213   |
| loss_kl         | 0.0067235134 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020162154 |
| loss_vf_loss    | 0.0038596569 |
----------------------------------
********** Iteration 1403 ************
Eval num_timesteps=5746688, episode_reward=-2.06 +/- 1.60
Episode length: 2937.33 +/- 230.63
New best mean reward!
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.0

********** Iteration 1408 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00219 |       0.00000 |       0.00566 |       0.00257 |       0.72453
     -0.00469 |       0.00000 |       0.00454 |       0.00292 |       0.71958
     -0.00798 |       0.00000 |       0.00419 |       0.00315 |       0.72247
     -0.01017 |       0.00000 |       0.00404 |       0.00391 |       0.71932
     -0.01201 |       0.00000 |       0.00389 |       0.00452 |       0.72005
     -0.01283 |       0.00000 |       0.00371 |       0.00474 |       0.71994
     -0.01433 |       0.00000 |       0.00361 |       0.00524 |       0.71945
     -0.01476 |       0.00000 |       0.00356 |       0.00524 |       0.71975
     -0.01576 |       0.00000 |       0.00344 |       0.00576 |       0.71970
     -0.01633 |       0.00000 |       0.00336 |       0.00640 |       0.71747
Evaluating losses...
     -0.01851 |       0.00000 |       0.00329 |       0.00654 |     

     -0.02011 |       0.00000 |       0.00476 |       0.00628 |       0.72176
     -0.02043 |       0.00000 |       0.00464 |       0.00643 |       0.72276
Evaluating losses...
     -0.02221 |       0.00000 |       0.00444 |       0.00648 |       0.72261
----------------------------------
| EpLenMean       | 2.82e+03     |
| EpRewMean       | -3.02        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 8169         |
| TimeElapsed     | 2.03e+04     |
| TimestepsSoFar  | 5791744      |
| ev_tdlam_before | 0.579        |
| loss_ent        | 0.72261107   |
| loss_kl         | 0.0064813057 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022214297 |
| loss_vf_loss    | 0.004438943  |
----------------------------------
********** Iteration 1414 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00168 |       0.00000 |       0.00569 |       0.00150 |       0.71677
     -0.00749 |       0.00000 |       0.00444 