In [1]:
import webotsgym as wg

from webotsgym.config import WebotConfig
from webotsgym.environment import WebotsEnv, WebotsGrid
from webotsgym.evaluate import Evaluate, EvaluateMats, EvaluatePJ0
from webotsgym.action import DiscreteAction, ContinuousAction
from webotsgym.observation import Observation

import numpy as np

import gym
import stable_baselines
from stable_baselines import A2C, ACER, ACKTR, DQN, DDPG, SAC, PPO1, PPO2, TD3, TRPO
from stable_baselines.common.env_checker import check_env
from stable_baselines.common.policies import MlpPolicy

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [2]:
class MyObs(Observation):
    def __init__(self, env):
        super(MyObs, self).__init__(env)
        self.env = env

class MyEval(Evaluate):
    def __init__(self, env, config: WebotConfig = WebotConfig()):
        super(MyEval, self).__init__(env, config)
        self.reward_range = (-100, 100)

    def calc_reward(self):
        if self.env.get_target_distance() < 0.1:
            return self.reward_range[1]
        else:
            dist = self.env.get_target_distance()
            denom = self.env.max_distance / 2
            return -1 * np.tanh(dist / denom)

    def check_done(self):
        if self.env.iterations % self.config.reset_env_after == 0:
            return True
        if self.env.get_target_distance() < 0.25:
            return True
        return False
    

config = WebotConfig()
config.fast_simulation = False
config.reset_env_after = 20000
config.num_obstacles = 2
config.world_size = 8
config.world_scaling = 0.5
#action_class = ContinuousAction(direction_type="steering", relative=False)
env = WebotsGrid(train=True, 
                evaluate_class=MyEval,
                config=config)



Accepting on Port:  10201
sending: env


In [3]:
env.step(2)

(array([0.2651177 , 1.76268482, 2.25      , 3.25      , 0.21443133,
        0.21407032, 1.72854364, 3.5       , 0.        ]),
 None,
 None,
 {})

In [None]:
time_steps = 499999
model_name = "Webots2"

model = PPO1("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=time_steps, log_interval=100)
model.save("models/{}".format(model_name))

# model = PPO1.load("models/{}".format('DQN_WebotFakeMini_TRPO_pj1_nReward2_200000'))
# env = MyEnv()
# obs = env.reset()

# env.render()
# done = False
# max_num_steps = 100
# time = 0





Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.









Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
********** Iteration 0 ************
sending: reset
Reward ( 250 )	 -0.09901840057042414
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00247 |      -0.02835 |       0.90671 |       0.00124 |       2.83529
     -0.00446 |      -0.02833 |       0.26141 |       0.00557 |       2.83279
     -0.00820 |      -0.02831 |       0.13836 |       0.00745 |       2.83147
     -0.01132 |      -0.02829 |       0.09601 |       0.00545 |       2.82915
Evaluating losses...
     -0.01295 |      -0.02827 |       0.07373 |       0.00439 |       2.82738
----------------------------------
| EpThisIter      | 0            |
| EpisodesSoFar   | 0            |
| TimeElapsed     | 5.83         |
| TimestepsSoFar  | 256       

Evaluating losses...
     -0.02426 |      -0.02805 |       0.03608 |       0.01285 |       2.80459
----------------------------------
| EpThisIter      | 0            |
| EpisodesSoFar   | 0            |
| TimeElapsed     | 8.56         |
| TimestepsSoFar  | 1280         |
| ev_tdlam_before | -0.116       |
| loss_ent        | 2.804593     |
| loss_kl         | 0.012853502  |
| loss_pol_entpen | -0.02804593  |
| loss_pol_surr   | -0.024258677 |
| loss_vf_loss    | 0.03607729   |
----------------------------------
********** Iteration 5 ************
Reward ( 1500 )	 -0.06585694114115476
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00228 |      -0.02804 |       0.05217 |       0.00043 |       2.80419
      0.00110 |      -0.02804 |       0.01043 |       0.00262 |       2.80379
     -0.00201 |      -0.02804 |       0.00942 |       0.00533 |       2.80365
     -0.00165 |      -0.02804 |       0.01435 |       0.00693 |       2.80371
Eva

********** Iteration 13 ************
Reward ( 3500 )	 -0.09436057304970316
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00395 |      -0.02767 |       0.15268 |       0.00128 |       2.76679
     -0.00385 |      -0.02764 |       0.03817 |       0.00598 |       2.76377
     -0.00527 |      -0.02761 |       0.00879 |       0.00883 |       2.76094
     -0.00314 |      -0.02759 |       0.03004 |       0.00797 |       2.75862
Evaluating losses...
     -0.00721 |      -0.02757 |       0.03217 |       0.00564 |       2.75744
-----------------------------------
| EpThisIter      | 0             |
| EpisodesSoFar   | 0             |
| TimeElapsed     | 14.6          |
| TimestepsSoFar  | 3584          |
| ev_tdlam_before | -0.086        |
| loss_ent        | 2.757437      |
| loss_kl         | 0.0056393994  |
| loss_pol_entpen | -0.02757437   |
| loss_pol_surr   | -0.0072132642 |
| loss_vf_loss    | 0.032174297   |
--------------------------

      0.00355 |      -0.02777 |       0.08326 |       0.01303 |       2.77702
     -0.00592 |      -0.02779 |       0.01324 |       0.00671 |       2.77872
     -0.00811 |      -0.02780 |       0.02405 |       0.00562 |       2.78008
Evaluating losses...
     -0.00715 |      -0.02781 |       0.03899 |       0.00824 |       2.78064
----------------------------------
| EpThisIter      | 0            |
| EpisodesSoFar   | 0            |
| TimeElapsed     | 20.1         |
| TimestepsSoFar  | 5632         |
| ev_tdlam_before | -0.00332     |
| loss_ent        | 2.7806375    |
| loss_kl         | 0.008242819  |
| loss_pol_entpen | -0.027806375 |
| loss_pol_surr   | -0.007151533 |
| loss_vf_loss    | 0.038988613  |
----------------------------------
********** Iteration 22 ************
Reward ( 5750 )	 -0.16623307985834015
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00072 |      -0.02781 |       0.16236 |       0.00076 |       2.78105
  

Evaluating losses...
     -0.00410 |      -0.02814 |       0.00282 |       0.02044 |       2.81371
-----------------------------------
| EpThisIter      | 0             |
| EpisodesSoFar   | 0             |
| TimeElapsed     | 25.8          |
| TimestepsSoFar  | 7680          |
| ev_tdlam_before | -0.0113       |
| loss_ent        | 2.8137057     |
| loss_kl         | 0.020441022   |
| loss_pol_entpen | -0.028137056  |
| loss_pol_surr   | -0.0040956177 |
| loss_vf_loss    | 0.002820882   |
-----------------------------------
********** Iteration 30 ************
Reward ( 7750 )	 -0.1744123556964021
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -6.48e-05 |      -0.02814 |       0.03316 |      2.53e-05 |       2.81388
     -0.00423 |      -0.02813 |       0.01266 |       0.00113 |       2.81344
     -0.00773 |      -0.02811 |       0.00193 |       0.00807 |       2.81149
     -0.00338 |      -0.02808 |       0.00356 |       0.01822 |      

********** Iteration 38 ************
Reward ( 9750 )	 -0.17797570749278901
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00345 |      -0.02757 |       0.01628 |       0.00140 |       2.75697
     -0.00286 |      -0.02755 |       0.00837 |       0.00424 |       2.75512
     -0.00621 |      -0.02754 |       0.00392 |       0.00879 |       2.75425
     -0.00734 |      -0.02754 |       0.00491 |       0.00852 |       2.75413
Evaluating losses...
     -0.00920 |      -0.02754 |       0.00581 |       0.00610 |       2.75434
----------------------------------
| EpThisIter      | 0            |
| EpisodesSoFar   | 0            |
| TimeElapsed     | 32           |
| TimestepsSoFar  | 9984         |
| ev_tdlam_before | -0.0364      |
| loss_ent        | 2.7543414    |
| loss_kl         | 0.006099542  |
| loss_pol_entpen | -0.027543413 |
| loss_pol_surr   | -0.009204338 |
| loss_vf_loss    | 0.0058119306 |
----------------------------------
**

    -8.95e-05 |      -0.02781 |       0.01215 |       0.00041 |       2.78112
     -0.00493 |      -0.02781 |       0.00492 |       0.00564 |       2.78101
     -0.00930 |      -0.02780 |       0.00072 |       0.01319 |       2.77997
     -0.01429 |      -0.02779 |       0.00187 |       0.01041 |       2.77861
Evaluating losses...
     -0.01661 |      -0.02778 |       0.00274 |       0.00614 |       2.77761
----------------------------------
| EpThisIter      | 0            |
| EpisodesSoFar   | 0            |
| TimeElapsed     | 37.4         |
| TimestepsSoFar  | 12032        |
| ev_tdlam_before | 0.0149       |
| loss_ent        | 2.7776055    |
| loss_kl         | 0.006139364  |
| loss_pol_entpen | -0.027776055 |
| loss_pol_surr   | -0.016608886 |
| loss_vf_loss    | 0.0027379536 |
----------------------------------
********** Iteration 47 ************
Reward ( 12250 )	 -0.13799273026685355
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
 

     -0.00254 |      -0.02756 |       0.00073 |       0.01072 |       2.75571
     -0.00646 |      -0.02755 |       0.00045 |       0.00861 |       2.75538
Evaluating losses...
     -0.00818 |      -0.02755 |       0.00041 |       0.00432 |       2.75535
-----------------------------------
| EpThisIter      | 0             |
| EpisodesSoFar   | 0             |
| TimeElapsed     | 42.8          |
| TimestepsSoFar  | 14080         |
| ev_tdlam_before | 0.00659       |
| loss_ent        | 2.755351      |
| loss_kl         | 0.0043182136  |
| loss_pol_entpen | -0.02755351   |
| loss_pol_surr   | -0.008176746  |
| loss_vf_loss    | 0.00041101046 |
-----------------------------------
********** Iteration 55 ************
Reward ( 14250 )	 -0.13821191730460144
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00141 |      -0.02755 |       0.00119 |       0.00080 |       2.75546
     -0.00794 |      -0.02755 |       0.00050 |       0.00831 |    

Evaluating losses...
     -0.00243 |      -0.02782 |       0.00230 |       0.00550 |       2.78165
-----------------------------------
| EpThisIter      | 0             |
| EpisodesSoFar   | 0             |
| TimeElapsed     | 48.4          |
| TimestepsSoFar  | 16128         |
| ev_tdlam_before | 0.117         |
| loss_ent        | 2.7816496     |
| loss_kl         | 0.0055024587  |
| loss_pol_entpen | -0.027816495  |
| loss_pol_surr   | -0.0024303123 |
| loss_vf_loss    | 0.002300761   |
-----------------------------------
********** Iteration 63 ************
Reward ( 16250 )	 -0.13115399384188764
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00138 |      -0.02781 |       0.00484 |       0.00050 |       2.78090
     -0.00659 |      -0.02780 |       0.00414 |       0.00465 |       2.77951
     -0.00419 |      -0.02778 |       0.00285 |       0.01323 |       2.77824
     -0.00587 |      -0.02777 |       0.00223 |       0.00442 |    

********** Iteration 71 ************
Reward ( 18250 )	 -0.1311271778047517
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00140 |      -0.02764 |       0.00272 |       0.00183 |       2.76390
     -0.00558 |      -0.02763 |       0.00062 |       0.00979 |       2.76317
      0.00412 |      -0.02762 |       0.00139 |       0.00327 |       2.76192
     -0.00074 |      -0.02760 |       0.00045 |       0.00322 |       2.76044
Evaluating losses...
     -0.00407 |      -0.02760 |       0.00070 |       0.00731 |       2.75970
-----------------------------------
| EpThisIter      | 0             |
| EpisodesSoFar   | 0             |
| TimeElapsed     | 54.7          |
| TimestepsSoFar  | 18432         |
| ev_tdlam_before | 0.0127        |
| loss_ent        | 2.7597032     |
| loss_kl         | 0.007312312   |
| loss_pol_entpen | -0.02759703   |
| loss_pol_surr   | -0.004071938  |
| loss_vf_loss    | 0.00070084154 |
--------------------------

Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00677 |      -0.02791 |       0.13715 |       0.00178 |       2.79080
     -0.01420 |      -0.02792 |       0.07979 |       0.01668 |       2.79217
     -0.01152 |      -0.02795 |       0.00904 |       0.03548 |       2.79465
     -0.01198 |      -0.02798 |       0.02438 |       0.04401 |       2.79789
Evaluating losses...
     -0.01253 |      -0.02800 |       0.03570 |       0.04364 |       2.80012
----------------------------------
| EpLenMean       | 2e+04        |
| EpRewMean       | -2.67e+03    |
| EpThisIter      | 0            |
| EpisodesSoFar   | 1            |
| TimeElapsed     | 65.1         |
| TimestepsSoFar  | 20480        |
| ev_tdlam_before | -0.00177     |
| loss_ent        | 2.8001246    |
| loss_kl         | 0.043638796  |
| loss_pol_entpen | -0.028001245 |
| loss_pol_surr   | -0.012526481 |
| loss_vf_loss    | 0.03569573   |
----------------------------------
*******

********** Iteration 87 ************
Reward ( 22500 )	 -0.12393669853935503
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00112 |      -0.02808 |       0.02612 |       0.00063 |       2.80837
     -0.00750 |      -0.02809 |       0.01227 |       0.00441 |       2.80924
     -0.00610 |      -0.02810 |       0.00759 |       0.01053 |       2.81047
     -0.00652 |      -0.02812 |       0.01041 |       0.01277 |       2.81208
Evaluating losses...
     -0.00782 |      -0.02813 |       0.01086 |       0.00993 |       2.81322
----------------------------------
| EpLenMean       | 2e+04        |
| EpRewMean       | -2.67e+03    |
| EpThisIter      | 0            |
| EpisodesSoFar   | 1            |
| TimeElapsed     | 70.8         |
| TimestepsSoFar  | 22528        |
| ev_tdlam_before | 0.00918      |
| loss_ent        | 2.8132174    |
| loss_kl         | 0.009929502  |
| loss_pol_entpen | -0.028132174 |
| loss_pol_surr   | -0.007821021 |
|

********** Iteration 95 ************
Reward ( 24500 )	 -0.08218696816620981
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00150 |      -0.02786 |       0.09406 |       0.00010 |       2.78578
     -0.00170 |      -0.02782 |       0.02865 |       0.00270 |       2.78187
     -0.00183 |      -0.02779 |       0.01462 |       0.01350 |       2.77877
     -0.00585 |      -0.02777 |       0.02473 |       0.01215 |       2.77673
Evaluating losses...
     -0.00846 |      -0.02776 |       0.00797 |       0.00595 |       2.77599
----------------------------------
| EpLenMean       | 2e+04        |
| EpRewMean       | -2.67e+03    |
| EpThisIter      | 0            |
| EpisodesSoFar   | 1            |
| TimeElapsed     | 76.2         |
| TimestepsSoFar  | 24576        |
| ev_tdlam_before | 0.00356      |
| loss_ent        | 2.7759936    |
| loss_kl         | 0.0059479736 |
| loss_pol_entpen | -0.027759936 |
| loss_pol_surr   | -0.008464919 |
|

********** Iteration 103 ************
Reward ( 26500 )	 -0.07780473283376678
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00079 |      -0.02766 |       0.01352 |       0.00093 |       2.76636
     -0.00264 |      -0.02766 |       0.00820 |       0.00463 |       2.76557
     -0.00284 |      -0.02764 |       0.00514 |       0.00919 |       2.76430
     -0.00441 |      -0.02763 |       0.00495 |       0.00850 |       2.76330
Evaluating losses...
     -0.00716 |      -0.02763 |       0.00463 |       0.00539 |       2.76280
-----------------------------------
| EpLenMean       | 2e+04         |
| EpRewMean       | -2.67e+03     |
| EpThisIter      | 0             |
| EpisodesSoFar   | 1             |
| TimeElapsed     | 81.7          |
| TimestepsSoFar  | 26624         |
| ev_tdlam_before | 0.00387       |
| loss_ent        | 2.7627962     |
| loss_kl         | 0.005394025   |
| loss_pol_entpen | -0.027627962  |
| loss_pol_surr   | -0.0

********** Iteration 111 ************
Reward ( 28500 )	 -0.07856478476805402
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -4.65e-05 |      -0.02789 |       0.00171 |      5.87e-05 |       2.78874
     -0.00251 |      -0.02788 |       0.00101 |       0.00133 |       2.78760
     -0.00983 |      -0.02786 |       0.00076 |       0.00793 |       2.78612
     -0.00915 |      -0.02784 |       0.00054 |       0.01549 |       2.78425
Evaluating losses...
     -0.00892 |      -0.02783 |       0.00072 |       0.01668 |       2.78302
-----------------------------------
| EpLenMean       | 2e+04         |
| EpRewMean       | -2.67e+03     |
| EpThisIter      | 0             |
| EpisodesSoFar   | 1             |
| TimeElapsed     | 87.2          |
| TimestepsSoFar  | 28672         |
| ev_tdlam_before | -0.00601      |
| loss_ent        | 2.7830198     |
| loss_kl         | 0.01667989    |
| loss_pol_entpen | -0.027830197  |
| loss_pol_surr   | -0.0

********** Iteration 119 ************
Reward ( 30500 )	 -0.07826528300879368
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00261 |      -0.02776 |       0.00015 |       0.00050 |       2.77578
     -0.01029 |      -0.02778 |       0.00015 |       0.00717 |       2.77840
     -0.00779 |      -0.02781 |      5.90e-05 |       0.01130 |       2.78052
     -0.00830 |      -0.02782 |      8.56e-05 |       0.00973 |       2.78200
Evaluating losses...
     -0.00887 |      -0.02783 |      5.60e-05 |       0.00621 |       2.78274
----------------------------------
| EpLenMean       | 2e+04        |
| EpRewMean       | -2.67e+03    |
| EpThisIter      | 0            |
| EpisodesSoFar   | 1            |
| TimeElapsed     | 92.8         |
| TimestepsSoFar  | 30720        |
| ev_tdlam_before | -0.0924      |
| loss_ent        | 2.7827437    |
| loss_kl         | 0.0062120706 |
| loss_pol_entpen | -0.027827436 |
| loss_pol_surr   | -0.008874388 |


********** Iteration 127 ************
Reward ( 32750 )	 -0.07845543798097632
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00020 |      -0.02830 |      1.46e-05 |       0.00017 |       2.83047
     -0.00370 |      -0.02833 |      2.61e-05 |       0.00322 |       2.83257
     -0.00248 |      -0.02834 |      6.74e-06 |       0.00980 |       2.83423
     -0.00432 |      -0.02835 |      1.31e-05 |       0.00655 |       2.83548
Evaluating losses...
     -0.00499 |      -0.02836 |      3.17e-06 |       0.00271 |       2.83599
-----------------------------------
| EpLenMean       | 2e+04         |
| EpRewMean       | -2.67e+03     |
| EpThisIter      | 0             |
| EpisodesSoFar   | 1             |
| TimeElapsed     | 98.4          |
| TimestepsSoFar  | 32768         |
| ev_tdlam_before | -0.163        |
| loss_ent        | 2.8359911     |
| loss_kl         | 0.0027095212  |
| loss_pol_entpen | -0.02835991   |
| loss_pol_surr   | -0.0

********** Iteration 135 ************
Reward ( 34750 )	 -0.07807912424580182
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00110 |      -0.02850 |       0.00013 |       0.00014 |       2.85014
     -0.00317 |      -0.02850 |      7.95e-05 |       0.00141 |       2.84962
     -0.00624 |      -0.02849 |      8.28e-05 |       0.00661 |       2.84939
     -0.00522 |      -0.02850 |      7.47e-05 |       0.00989 |       2.84951
Evaluating losses...
     -0.00739 |      -0.02850 |      6.93e-05 |       0.00864 |       2.84994
-----------------------------------
| EpLenMean       | 2e+04         |
| EpRewMean       | -2.67e+03     |
| EpThisIter      | 0             |
| EpisodesSoFar   | 1             |
| TimeElapsed     | 104           |
| TimestepsSoFar  | 34816         |
| ev_tdlam_before | -0.00929      |
| loss_ent        | 2.8499372     |
| loss_kl         | 0.008638818   |
| loss_pol_entpen | -0.02849937   |
| loss_pol_surr   | -0.0

********** Iteration 143 ************
Reward ( 36750 )	 -0.07814834714636204
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00112 |      -0.02850 |      1.57e-05 |      8.35e-05 |       2.84961
     -0.00401 |      -0.02849 |      1.47e-05 |       0.00236 |       2.84912
     -0.00587 |      -0.02847 |      1.29e-05 |       0.00964 |       2.84744
     -0.00746 |      -0.02845 |      1.38e-05 |       0.01692 |       2.84521
Evaluating losses...
     -0.00825 |      -0.02844 |      1.28e-05 |       0.01619 |       2.84391
-----------------------------------
| EpLenMean       | 2e+04         |
| EpRewMean       | -2.67e+03     |
| EpThisIter      | 0             |
| EpisodesSoFar   | 1             |
| TimeElapsed     | 110           |
| TimestepsSoFar  | 36864         |
| ev_tdlam_before | -0.0932       |
| loss_ent        | 2.8439105     |
| loss_kl         | 0.016194059   |
| loss_pol_entpen | -0.028439105  |
| loss_pol_surr   | -0.0

********** Iteration 151 ************
Reward ( 38750 )	 -0.07820889333828732
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00057 |      -0.02803 |      2.60e-05 |      2.70e-05 |       2.80342
     -0.00057 |      -0.02803 |      2.72e-05 |       0.00126 |       2.80324
     -0.00261 |      -0.02804 |      1.39e-05 |       0.00623 |       2.80424
     -0.00361 |      -0.02805 |      1.46e-05 |       0.00739 |       2.80545
Evaluating losses...
     -0.00517 |      -0.02806 |      7.82e-06 |       0.00513 |       2.80622
-----------------------------------
| EpLenMean       | 2e+04         |
| EpRewMean       | -2.67e+03     |
| EpThisIter      | 0             |
| EpisodesSoFar   | 1             |
| TimeElapsed     | 115           |
| TimestepsSoFar  | 38912         |
| ev_tdlam_before | -0.49         |
| loss_ent        | 2.806215      |
| loss_kl         | 0.0051280996  |
| loss_pol_entpen | -0.02806215   |
| loss_pol_surr   | -0.0

********** Iteration 159 ************
Reward ( 40750 )	 -0.07539921302318243
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00136 |      -0.02832 |       0.08439 |      9.16e-05 |       2.83160
      0.00072 |      -0.02832 |       0.07625 |       0.00020 |       2.83162
     -0.00049 |      -0.02831 |       0.07183 |      4.62e-05 |       2.83129
     -0.00110 |      -0.02831 |       0.04920 |       0.00089 |       2.83090
Evaluating losses...
     -0.00335 |      -0.02830 |       0.06184 |       0.00249 |       2.83035
-----------------------------------
| EpLenMean       | 2e+04         |
| EpRewMean       | -2.17e+03     |
| EpThisIter      | 0             |
| EpisodesSoFar   | 2             |
| TimeElapsed     | 126           |
| TimestepsSoFar  | 40960         |
| ev_tdlam_before | -0.404        |
| loss_ent        | 2.830353      |
| loss_kl         | 0.0024884297  |
| loss_pol_entpen | -0.02830353   |
| loss_pol_surr   | -0.0

********** Iteration 167 ************
Reward ( 43000 )	 -0.14571098589895784
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00111 |      -0.02854 |       0.05692 |       0.00043 |       2.85368
     -0.00110 |      -0.02853 |       0.01690 |       0.00190 |       2.85318
     -0.00150 |      -0.02852 |       0.00584 |       0.00499 |       2.85247
     -0.00210 |      -0.02852 |       0.01547 |       0.00693 |       2.85161
Evaluating losses...
     -0.00274 |      -0.02851 |       0.01157 |       0.00638 |       2.85110
-----------------------------------
| EpLenMean       | 2e+04         |
| EpRewMean       | -2.17e+03     |
| EpThisIter      | 0             |
| EpisodesSoFar   | 2             |
| TimeElapsed     | 131           |
| TimestepsSoFar  | 43008         |
| ev_tdlam_before | 0.0364        |
| loss_ent        | 2.8510954     |
| loss_kl         | 0.0063813846  |
| loss_pol_entpen | -0.028510954  |
| loss_pol_surr   | -0.0

********** Iteration 175 ************
Reward ( 45000 )	 -0.15486215476019852
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00040 |      -0.02864 |       0.00585 |       0.00030 |       2.86379
     -0.00151 |      -0.02866 |       0.00182 |       0.00303 |       2.86573
     -0.00362 |      -0.02869 |       0.00038 |       0.00569 |       2.86886
     -0.00428 |      -0.02872 |       0.00122 |       0.00471 |       2.87231
Evaluating losses...
     -0.00519 |      -0.02874 |       0.00148 |       0.00301 |       2.87445
----------------------------------
| EpLenMean       | 2e+04        |
| EpRewMean       | -2.17e+03    |
| EpThisIter      | 0            |
| EpisodesSoFar   | 2            |
| TimeElapsed     | 137          |
| TimestepsSoFar  | 45056        |
| ev_tdlam_before | -0.000374    |
| loss_ent        | 2.874453     |
| loss_kl         | 0.0030143785 |
| loss_pol_entpen | -0.02874453  |
| loss_pol_surr   | -0.005187638 |


********** Iteration 183 ************
Reward ( 47000 )	 -0.1566767066310845
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00012 |      -0.02882 |       0.00357 |       0.00043 |       2.88176
     -0.00465 |      -0.02884 |       0.00125 |       0.00365 |       2.88374
     -0.00315 |      -0.02885 |       0.00052 |       0.00857 |       2.88469
     -0.00378 |      -0.02885 |       0.00103 |       0.00398 |       2.88516
Evaluating losses...
     -0.00568 |      -0.02886 |       0.00113 |       0.00173 |       2.88557
----------------------------------
| EpLenMean       | 2e+04        |
| EpRewMean       | -2.17e+03    |
| EpThisIter      | 0            |
| EpisodesSoFar   | 2            |
| TimeElapsed     | 142          |
| TimestepsSoFar  | 47104        |
| ev_tdlam_before | 0.00102      |
| loss_ent        | 2.8855696    |
| loss_kl         | 0.001733322  |
| loss_pol_entpen | -0.028855694 |
| loss_pol_surr   | -0.005676184 |
|