In [1]:
import webotsgym as wg

from webotsgym.config import WebotConfig, SimSpeedMode
from webotsgym.environment import webotsgym, WebotsGrid
from webotsgym.evaluate import Evaluate, EvaluateMats, EvaluatePJ0
from webotsgym.action import DiscreteAction, ContinuousAction
from webotsgym.observation import Observation

import numpy as np

import gym
import stable_baselines
from stable_baselines import A2C, ACER, ACKTR, DQN, DDPG, SAC, PPO1, PPO2, TD3, TRPO
from stable_baselines.common.env_checker import check_env
from stable_baselines.common.policies import MlpPolicy

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [2]:
def exponential_decay(x, N0=1, lambda_=5):
    return N0*np.exp(-lambda_*x)

def exponential_penalty(x, step_penalty=-1, lambda_=5):
    return step_penalty * (1 - exponential_decay(x, lambda_=lambda_))

class MyEval(Evaluate):
    def __init__(self, env, config: WebotConfig = WebotConfig()):
        super(MyEval, self).__init__(env, config)

    def calc_reward(self):
        if self.env.get_target_distance(normalized=False) < 0.05:
            reward = 10000
        else:
            distance_normalized = self.env.get_target_distance(normalized=True)
            step_base_penalty = -1
            reward = exponential_penalty(x=distance_normalized, step_penalty=step_base_penalty, lambda_=3)
            if self.env.gps_visited_count > 3:
                reward -= 0.2 * (self.env.gps_visited_count - 2)**2
            if self.env.state._action_denied is True:
                reward -= 500
        return reward
        
        
    def check_done(self):
        if self.env.time_steps == 300:
            return True
        if self.env.total_reward < -10000:
            return True
        if self.env.get_target_distance(normalized=False) < 0.05:
            return True
        return False


config = WebotConfig()
config.sim_mode = SimSpeedMode.FAST
config.reset_env_after = 20000
config.num_obstacles = 12
config.world_size = 8
env = WebotsGrid(train=True, 
                 config=config,
                 evaluate_class = MyEval)



Accepting on Port:  10201
sending: start env 2


In [3]:
time_steps = 0
model_name = "PPO_webots_v1_jonas_no_while"

model = PPO1("MlpPolicy", env, verbose=1, tensorboard_log="./PPO_webots_v1_jonas_no_while_tensorboard/")
model.learn(total_timesteps=50000)
model.save("models/keep/{}".format(model_name))

#while time_steps < 500000:
#    model = PPO1("MlpPolicy", env, verbose=1)
#    model.load("models/keep/{}".format(model_name))
#    model.learn(total_timesteps=10000)
#    model.save("models/keep/{}".format(model_name))
#    time_steps += 10000





Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.










Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

********** Iteration 0 ************
sending: start env 2
sending: start env 2
sending: start env 2

Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00141 |      -0.01386 |      3.71e+06 |       0.00066 |       1.38555
     -0.01244 |      -0.01383 |      3.71e+06 |       0.00362 |       1.38252
     -0.01612 |      -0.01377 |      3.71e+06 |       0.00862 |       1.37747
     -0.01950 |      -0.01375 |      3.71e+06 |       0.01110 |       1.37506
Evaluating losses...
     -0.02253 |      -0.01376 |      3.71e+06 |       0.01054 |       1.37565
----------------------------------
| EpLenMean       | 40           |
| EpRewMean       | 9.64e+03     |
| EpThisIter      | 2            |
| EpisodesSoFar 

********** Iteration 4 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00053 |      -0.01375 |      4.30e+06 |      5.00e-05 |       1.37537
     -0.00475 |      -0.01373 |      4.30e+06 |       0.00058 |       1.37316
     -0.01742 |      -0.01370 |      4.30e+06 |       0.00274 |       1.36991
     -0.01956 |      -0.01364 |      4.30e+06 |       0.00815 |       1.36398
Evaluating losses...
     -0.01858 |      -0.01361 |      4.30e+06 |       0.01120 |       1.36122
----------------------------------
| EpLenMean       | 112          |
| EpRewMean       | 6.45e+03     |
| EpThisIter      | 4            |
| EpisodesSoFar   | 10           |
| TimeElapsed     | 164          |
| TimestepsSoFar  | 1280         |
| ev_tdlam_before | 0.000102     |
| loss_ent        | 1.3612224    |
| loss_kl         | 0.011197823  |
| loss_pol_entpen | -0.0136

********** Iteration 12 ************
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00098 |      -0.01363 |      6.95e+04 |       0.00014 |       1.36273
     -0.00702 |      -0.01363 |      6.94e+04 |       0.00145 |       1.36326
     -0.01741 |      -0.01362 |      6.92e+04 |       0.00498 |       1.36198
     -0.02615 |      -0.01358 |      6.89e+04 |       0.01042 |       1.35760
Evaluating losses...
     -0.02820 |      -0.01354 |      6.87e+04 |       0.01406 |       1.35437
----------------------------------
| EpLenMean       | 144          |
| EpRewMean       | 3.82e+03     |
| EpThisIter      | 1            |
| EpisodesSoFar   | 23           |
| TimeElapsed     | 402          |
| TimestepsSoFar  | 3328         |
| ev_tdlam_before | 0.0142       |
| loss_ent        | 1.3543742    |
| loss_kl         | 0.014061857  |
| loss_pol_entpen | -0.013543742 |
| loss_pol_surr   | -0.028204221 |
| loss_vf_loss    |

********** Iteration 20 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00243 |      -0.01347 |      1.93e+07 |       0.00029 |       1.34677
     -0.01473 |      -0.01346 |      1.92e+07 |       0.00394 |       1.34608
     -0.02335 |      -0.01340 |      1.92e+07 |       0.01275 |       1.34011
     -0.02885 |      -0.01337 |      1.92e+07 |       0.01553 |       1.33727
Evaluating losses...
     -0.03424 |      -0.01337 |      1.92e+07 |       0.01392 |       1.33725
----------------------------------
| EpLenMean       | 108          |
| EpRewMean       | 5.42e+03     |
| EpThisIter      | 9            |
| EpisodesSoFar   | 49           |
| TimeElapsed     | 731          |
| TimestepsSoFar  | 5376         |
| ev_tdlam_before | 0.0001

sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00035 |      -0.01283 |      1.31e+07 |      3.75e-05 |       1.28336
     -0.00743 |      -0.01279 |      1.31e+07 |       0.00064 |       1.27923
     -0.01846 |      -0.01272 |      1.31e+07 |       0.00429 |       1.27163
     -0.02327 |      -0.01266 |      1.31e+07 |       0.00870 |       1.26636
Evaluating losses...
     -0.02605 |      -0.01266 |      1.31e+07 |       0.00960 |       1.26640
----------------------------------
| EpLenMean       | 83.2         |
| EpRewMean       | 6.79e+03     |
| EpThisIter      | 4            |
| EpisodesSoFar   | 86           |
| TimeElapsed     | 1.12e+03     |
| TimestepsSoFar  | 7168         |
| ev_tdlam_before | -5.56e-05    |
| loss_ent        | 1.2663956    |
| loss_kl         | 0.009598469  |
| loss_pol_entpen | -0.012663955 |
| loss_pol_surr   | -0.026052859 |
| loss_vf_loss    | 13112116.0   |
---------------------

Evaluating losses...
     -0.02304 |      -0.01254 |      1.86e+07 |       0.01079 |       1.25395
----------------------------------
| EpLenMean       | 72.2         |
| EpRewMean       | 7.21e+03     |
| EpThisIter      | 6            |
| EpisodesSoFar   | 113          |
| TimeElapsed     | 1.43e+03     |
| TimestepsSoFar  | 8960         |
| ev_tdlam_before | -0.00132     |
| loss_ent        | 1.2539482    |
| loss_kl         | 0.010791126  |
| loss_pol_entpen | -0.012539482 |
| loss_pol_surr   | -0.023044271 |
| loss_vf_loss    | 18565850.0   |
----------------------------------
********** Iteration 35 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00075 |      -0.01243 |      2.33e+07 |    

********** Iteration 42 ************
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00029 |      -0.01270 |      5.20e+06 |       0.00012 |       1.27017
     -0.00492 |      -0.01261 |      5.20e+06 |       0.00222 |       1.26121
     -0.01205 |      -0.01247 |      5.19e+06 |       0.00764 |       1.24707
     -0.01392 |      -0.01238 |      5.19e+06 |       0.01172 |       1.23841
Evaluating losses...
     -0.01614 |      -0.01238 |      5.19e+06 |       0.01138 |       1.23767
----------------------------------
| EpLenMean       | 51.2         |
| EpRewMean       | 8.27e+03     |
| EpThisIter      | 2            |
| EpisodesSoFar   | 157          |
| TimeElapsed     | 1.87e+03     |
| TimestepsSoFar  | 11008        |
| ev_tdlam_before | -0.000767    |
| loss_ent        | 1.2376665    |
| loss_kl         | 0.011381617  |
| loss_pol_entpen | -0.012376664 |
| loss_pol_surr   | -0.01614048  

********** Iteration 49 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00020 |      -0.01148 |      2.91e+07 |       0.00021 |       1.14759
     -0.00812 |      -0.01141 |      2.91e+07 |       0.00105 |       1.14149
     -0.01697 |      -0.01129 |      2.91e+07 |       0.00436 |       1.12934
     -0.01855 |      -0.01117 |      2.91e+07 |       0.00888 |       1.11721
Evaluating losses...
     -0.02188 |      -0.01117 |      2.90e+07 |       0.00928 |       1.11691
----------------------------------
| EpLenMean       | 37.9         |
| EpRewMean       | 8.8e+03      |
| EpThisIter      | 10           |
| EpisodesSoFar   | 214          |
| TimeElapsed     | 2.39e+03     |
| TimestepsSoFar  |

----------------------------------
| EpLenMean       | 28.5         |
| EpRewMean       | 8.9e+03      |
| EpThisIter      | 10           |
| EpisodesSoFar   | 273          |
| TimeElapsed     | 3.43e+03     |
| TimestepsSoFar  | 14336        |
| ev_tdlam_before | 0.00841      |
| loss_ent        | 1.1518996    |
| loss_kl         | 0.0071008676 |
| loss_pol_entpen | -0.011518995 |
| loss_pol_surr   | -0.028825423 |
| loss_vf_loss    | 24305300.0   |
----------------------------------
********** Iteration 56 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00083 |      -0.01066 |      3.44e+07 |      9.91e-05 |       1.06551
     -0.00357 |      -0.01065 

sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00023 |      -0.01057 |      2.66e+07 |       0.00012 |       1.05717
     -0.00772 |      -0.01051 |      2.66e+07 |       0.00130 |       1.05051
     -0.01614 |      -0.01041 |      2.66e+07 |       0.00434 |       1.04093
     -0.02330 |      -0.01040 |      2.66e+07 |       0.00610 |       1.04047
Evaluating losses...
     -0.02760 |      -0.01043 |      2.66e+07 |       0.00646 |       1.04333
----------------------------------
| EpLenMean       | 22.3         |
| EpRewMean       | 9.05e+03     |
| EpThisIter      | 13           |
| EpisodesSoFar   | 352          |
| TimeElapsed     | 5.65e+03     |
| TimestepsSoFar  | 16128        |
| ev_tdlam_before | 0.0046       |
| loss_ent        | 1.0433292    |
| loss_kl     

     -0.00721 |      -0.00933 |      4.26e+07 |       0.00079 |       0.93325
     -0.01205 |      -0.00925 |      4.26e+07 |       0.00308 |       0.92477
     -0.01710 |      -0.00920 |      4.26e+07 |       0.00523 |       0.91958
Evaluating losses...
     -0.02228 |      -0.00919 |      4.26e+07 |       0.00527 |       0.91929
----------------------------------
| EpLenMean       | 19.9         |
| EpRewMean       | 9.06e+03     |
| EpThisIter      | 22           |
| EpisodesSoFar   | 431          |
| TimeElapsed     | 6.28e+03     |
| TimestepsSoFar  | 17664        |
| ev_tdlam_before | 0.00903      |
| loss_ent        | 0.91928566   |
| loss_kl         | 0.0052742623 |
| loss_pol_entpen | -0.009192857 |
| loss_pol_surr   | -0.022283513 |
| loss_vf_loss    | 42582980.0   |
----------------------------------
********** Iteration 69 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start

     -0.01929 |      -0.00996 |      2.38e+07 |       0.00539 |       0.99577
Evaluating losses...
     -0.02309 |      -0.00999 |      2.38e+07 |       0.00548 |       0.99855
----------------------------------
| EpLenMean       | 15.5         |
| EpRewMean       | 9.07e+03     |
| EpThisIter      | 11           |
| EpisodesSoFar   | 521          |
| TimeElapsed     | 6.97e+03     |
| TimestepsSoFar  | 19200        |
| ev_tdlam_before | 0.0173       |
| loss_ent        | 0.9985504    |
| loss_kl         | 0.005475106  |
| loss_pol_entpen | -0.009985505 |
| loss_pol_surr   | -0.0230864   |
| loss_vf_loss    | 23801368.0   |
----------------------------------
********** Iteration 75 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00105 |

********** Iteration 81 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00143 |      -0.00991 |      2.81e+07 |       0.00019 |       0.99082
     -0.00493 |      -0.00986 |      2.81e+07 |       0.00120 |       0.98596
     -0.00945 |      -0.00982 |      2.81e+07 |       0.00276 |       0.98222
     -0.01411 |      -0.00982 |      2.81e+07 |       0.00417 |       0.98171
Evaluating losses...
     -0.01694 |      -0.00985 |      2.81e+07 |       0.00426 |       0.98461
----------------------------------
| EpLenMean       | 21.9         |
| EpRewMean       | 7.98e+03     |
| EpThisIter      | 14           |
| EpisodesSoFar   | 600   

********** Iteration 88 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00129 |      -0.01000 |      3.75e+07 |      6.65e-05 |       0.99987
     -0.00981 |      -0.00999 |      3.75e+07 |       0.00179 |       0.99906
     -0.01435 |      -0.01000 |      3.75e+07 |       0.00413 |       1.00002
     -0.02007 |      -0.01005 |      3.75e+07 |       0.00462 |       1.00482
Evaluating losses...
     -0.02231 |      -0.01008 |      3.75e+07 |       0.00494 |       1.00758
---------------------------------
| EpLenMean       | 25.4   

sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00098 |      -0.00913 |      4.75e+07 |       0.00038 |       0.91340
     -0.00725 |      -0.00906 |      4.75e+07 |       0.00269 |       0.90590
     -0.01140 |      -0.00904 |      4.75e+07 |       0.00325 |       0.90414
     -0.01577 |      -0.00905 |      4.75e+07 |       0.00232 |       0.90531
Evaluating losses...
     -0.01737 |      -0.00905 |      4.75e+07 |       0.00273 |       0.90468
----------------------------------
| EpLenMean       | 21.4         |
| EpRewMean       | 9.16e+03     |
| EpThisIter      | 20           |
| EpisodesSoFar   | 741          |
| TimeElapsed     | 8.73e+03     |
| TimestepsSoFar  | 24320        |
| ev_tdlam_before | -5.07e-05    |
| loss_ent        | 0.9046797    |
| loss_kl         | 0.002732778  |
| loss_pol_entpen | -0.009046797 |
| loss_pol_surr   | -0.017372934 |
| loss_vf_loss    | 47520480.0   |
---------------------

Evaluating losses...
     -0.01547 |      -0.00893 |      2.97e+07 |       0.00307 |       0.89349
----------------------------------
| EpLenMean       | 18.9         |
| EpRewMean       | 9.27e+03     |
| EpThisIter      | 13           |
| EpisodesSoFar   | 815          |
| TimeElapsed     | 9.33e+03     |
| TimestepsSoFar  | 25856        |
| ev_tdlam_before | 0.0125       |
| loss_ent        | 0.89348847   |
| loss_kl         | 0.0030726744 |
| loss_pol_entpen | -0.008934885 |
| loss_pol_surr   | -0.01546666  |
| loss_vf_loss    | 29688652.0   |
----------------------------------
********** Iteration 101 ************
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00179 |      -0.01108 |      1.89e+06 |       0.00018 |       1.10843
     -0.00244 |      -0.01112 |      1.89e+06 |       0.00088 |       1.11225
     -0.00418 |      -0.01113 |      1.89e+06 |       0.00139 |       1.11315
     

sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00021 |      -0.00922 |      3.22e+07 |       0.00023 |       0.92220
     -0.00524 |      -0.00921 |      3.22e+07 |       0.00161 |       0.92136
     -0.01025 |      -0.00923 |      3.22e+07 |       0.00244 |       0.92322
     -0.01407 |      -0.00927 |      3.22e+07 |       0.00336 |       0.92700
Evaluating losses...
     -0.01524 |      -0.00929 |      3.22e+07 |       0.00404 |       0.92874
----------------------------------
| EpLenMean       | 25           |
| EpRewMean       | 8.66e+03     |
| EpThisIter      | 17           |
| EpisodesSoFar   | 883          |
| TimeElapsed     | 1.34e+04     |
| TimestepsSoFar  | 27648        |
| ev_tdlam_before | 0.0133       |
| loss_ent        | 0.9287363    |
| loss_kl         | 0.004035109  |
| loss_pol_entpen | -0.009287363 |
| loss_pol_surr   | -0.01524

     -0.00341 |      -0.00944 |      2.56e+07 |       0.00061 |       0.94426
     -0.00909 |      -0.00948 |      2.56e+07 |       0.00250 |       0.94813
     -0.01121 |      -0.00949 |      2.55e+07 |       0.00501 |       0.94947
Evaluating losses...
     -0.01292 |      -0.00945 |      2.55e+07 |       0.00515 |       0.94491
----------------------------------
| EpLenMean       | 19           |
| EpRewMean       | 8.71e+03     |
| EpThisIter      | 15           |
| EpisodesSoFar   | 965          |
| TimeElapsed     | 1.56e+04     |
| TimestepsSoFar  | 29184        |
| ev_tdlam_before | 0.0187       |
| loss_ent        | 0.944913     |
| loss_kl         | 0.0051497417 |
| loss_pol_entpen | -0.00944913  |
| loss_pol_surr   | -0.012920581 |
| loss_vf_loss    | 25545664.0   |
----------------------------------
********** Iteration 114 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: star

********** Iteration 120 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00120 |      -0.00770 |      4.76e+07 |       0.00012 |       0.77006
     -0.00717 |      -0.00766 |      4.76e+07 |       0.00163 |       0.76650
     -0.00895 |      -0.00761 |      4.76e+07 |       0.00347 |       0.76120
     -0.01184 |      -0.00757 |      4.76e+07 |       0.00296 |       0.75665
Evaluating losses...
     -0.01398 |      -0.00754 |      4.76e+07 |       0.00220 |       0.754

      0.00152 |      -0.01098 |      7.10e+06 |       0.00013 |       1.09793
     -0.00031 |      -0.01098 |      7.10e+06 |       0.00058 |       1.09758
     -0.00304 |      -0.01097 |      7.10e+06 |       0.00121 |       1.09671
     -0.00549 |      -0.01096 |      7.10e+06 |       0.00208 |       1.09618
Evaluating losses...
     -0.00604 |      -0.01096 |      7.10e+06 |       0.00243 |       1.09601
----------------------------------
| EpLenMean       | 23.5         |
| EpRewMean       | 8.51e+03     |
| EpThisIter      | 5            |
| EpisodesSoFar   | 1115         |
| TimeElapsed     | 1.83e+04     |
| TimestepsSoFar  | 32512        |
| ev_tdlam_before | 0.0239       |
| loss_ent        | 1.0960083    |
| loss_kl         | 0.002434099  |
| loss_pol_entpen | -0.010960083 |
| loss_pol_surr   | -0.006036766 |
| loss_vf_loss    | 7102597.0    |
----------------------------------
********** Iteration 127 ************
sending: start env 2
sending: start env 2
sending: start env 

********** Iteration 133 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00132 |      -0.00972 |      2.55e+07 |       0.00022 |       0.97176
     -0.00159 |      -0.00972 |      2.55e+07 |       0.00095 |       0.97170
     -0.00417 |      -0.00972 |      2.54e+07 |       0.00072 |       0.97195
     -0.00823 |      -0.00971 |      2.54e+07 |       0.00090 |       0.97141
Evaluating losses...
     -0.01025 |      -0.00971 |      2.54e+07 |       0.00128 |       0.97077
----------------------------------
| EpLenMean       | 23.5         |
| EpRewMean       | 8.14e+03     |
| EpThisIter      | 14           |
| Ep

sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     6.78e-05 |      -0.00943 |      2.12e+07 |      1.33e-05 |       0.94306
     -0.00270 |      -0.00942 |      2.12e+07 |       0.00028 |       0.94226
     -0.00328 |      -0.00941 |      2.12e+07 |       0.00076 |       0.94098
     -0.00432 |      -0.00940 |      2.12e+07 |       0.00051 |       0.94030
Evaluating losses...
     -0.00630 |      -0.00940 |      2.12e+07 |       0.00036 |       0.93970
-----------------------------------
| EpLenMean       | 19.8          |
| EpRewMean       | 8.27e+03      |
| EpThisIter      | 12            |
| EpisodesSoFar   | 1274          |
| TimeElapsed     | 1.96e+04      |
| TimestepsSoFar  | 35840         |
| ev_tdlam_before | 0.0112        |
| loss_ent        | 0.9397007     |
| loss_kl         | 0.0003594769  |
| loss_pol_entpen | -0.009397007  |
| loss_pol_surr   | -0.0063004196 |
| 

sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00032 |      -0.00884 |      4.15e+07 |      6.62e-05 |       0.88359
     -0.00379 |      -0.00885 |      4.15e+07 |       0.00048 |       0.88517
     -0.00761 |      -0.00888 |      4.15e+07 |       0.00124 |       0.88831
     -0.01010 |      -0.00888 |      4.15e+07 |       0.00156 |       0.88800
Evaluating losses...
     -0.01145 |      -0.00887 |      4.15e+07 |       0.00162 |       0.88689
----------------------------------
| EpLenMean       | 15.8         |
| EpRewMean       | 9.37e+03     |
| EpThisIter      | 21           |
| EpisodesSoFar   | 1373         |
| TimeElapsed     | 2.03e+04     |
| TimestepsSoFar  | 37376        |
| ev_tdlam_before | 0.0266       |
| loss_

sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00046 |      -0.00909 |      3.29e+07 |      6.61e-05 |       0.90889
     -0.00096 |      -0.00909 |      3.29e+07 |       0.00031 |       0.90919
     -0.00238 |      -0.00911 |      3.29e+07 |       0.00037 |       0.91067
     -0.00483 |      -0.00912 |      3.29e+07 |       0.00045 |       0.91230
Evaluating losses...
     -0.00608 |      -0.00913 |      3.29e+07 |       0.00056 |       0.91347
-----------------------------------
| EpLenMean       | 16.7          |
| EpRewMean       | 9.16e+03      |
| EpThisIter      | 19            |
| EpisodesSoFar   | 1461          |
| TimeElapsed     | 2.1e+04       |
| TimestepsSoFar  | 38912         |
| ev_tdlam_before | 0.0155        |
| loss_ent        | 0.91347325    |
| loss_kl         | 0.00056421256 |
| loss_pol_entpen | -0.009134733  |
| loss_pol_surr  

********** Iteration 158 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00088 |      -0.00774 |      4.51e+07 |      1.60e-05 |       0.77379
     -0.00015 |      -0.00774 |      4.51e+07 |      5.60e-05 |       0.77382
     -0.00247 |      -0.00775 |      4.51e+07 |       0.00019 |       0.77480
     -0.00423 |      -0.00776 |      4.51e+07 |       0.00054 |       0.77607
Evaluating losses...
     -0.00488 |      -0.00776 |      4.51e+07 |       0.00066 |       0.77616
------------------

********** Iteration 164 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00020 |      -0.00769 |      3.59e+07 |      6.94e-06 |       0.76910
     -0.00251 |      -0.00770 |      3.59e+07 |      9.46e-05 |       0.76993
     -0.00345 |      -0.00771 |      3.59e+07 |       0.00026 |       0.77096
     -0.00520 |      -0.00771 |      3.59e+07 |       0.00034 |       0.77130
Evaluating losses...
     -0.00546 |      -0.00771 |      3.59e+07 |       0.00039 |       0.77137
-----------------------------------
| EpLenMean       | 16.7          |
| EpRewMe

sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -9.61e-05 |      -0.00945 |      2.35e+07 |      1.71e-05 |       0.94510
     -0.00091 |      -0.00946 |      2.35e+07 |       0.00011 |       0.94626
     -0.00179 |      -0.00947 |      2.35e+07 |       0.00016 |       0.94661
     -0.00306 |      -0.00946 |      2.35e+07 |       0.00012 |       0.94604
Evaluating losses...
     -0.00382 |      -0.00945 |      2.35e+07 |      9.80e-05 |       0.94528
----------------------------------
| EpLenMean       | 17.9         |
| EpRewMean       | 8.59e+03     |
| EpThisIter      | 15           |
| EpisodesSoFar   | 1712         |
| TimeElapsed     | 2.3e+04      |
| TimestepsSoFar  | 43776        |
| ev_tdlam_before | 0.0221       |
| loss_ent        | 0.94527864   |
| loss_kl     

sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00040 |      -0.00802 |      4.09e+07 |      3.64e-06 |       0.80183
     -0.00174 |      -0.00801 |      4.09e+07 |      5.44e-05 |       0.80076
     -0.00237 |      -0.00800 |      4.09e+07 |       0.00012 |       0.80005
     -0.00270 |      -0.00800 |      4.09e+07 |       0.00013 |       0.80021
Evaluating losses...
     -0.00340 |      -0.00801 |      4.09e+07 |       0.00011 |       0.80082
-----------------------------------
| EpLenMean       | 15.7          |
| EpRewMean       | 9.19e+03      |
| EpThisIter      | 21            |
| EpisodesSoFar   | 1805          |
| TimeElapsed     | 2.37e+04      |
| TimestepsSoFar  | 45312         |
| ev_tdlam_bef

********** Iteration 182 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00017 |      -0.01149 |      5.42e+06 |      1.74e-06 |       1.14899
    -3.55e-05 |      -0.01149 |      5.42e+06 |      1.04e-05 |       1.14879
     -0.00057 |      -0.01149 |      5.42e+06 |      2.04e-05 |       1.14872
     -0.00071 |      -0.01149 |      5.42e+06 |      3.04e-05 |       1.14871
Evaluating losses...
     -0.00084 |      -0.01149 |      5.42e+06 |      3.11e-05 |       1.14882
------------------------------------
| EpLenMean       | 15.1           |
| EpRewMean       | 9.33e+03       |
| EpThisIter      | 4              |
| EpisodesSoFar   | 1896           |
| TimeElapsed     | 2.44e+04       |
| TimestepsSoFar  | 46848          |
| ev_tdlam_before | 0.00244        |
| loss_ent        | 1.1488211      |
| loss_kl         | 3

     -0.00089 |      -0.01044 |      1.30e+07 |      2.49e-05 |       1.04401
Evaluating losses...
     -0.00115 |      -0.01044 |      1.30e+07 |      1.71e-05 |       1.04380
-----------------------------------
| EpLenMean       | 21.1          |
| EpRewMean       | 8.74e+03      |
| EpThisIter      | 7             |
| EpisodesSoFar   | 1964          |
| TimeElapsed     | 2.5e+04       |
| TimestepsSoFar  | 48384         |
| ev_tdlam_before | 0.0177        |
| loss_ent        | 1.0438018     |
| loss_kl         | 1.7075607e-05 |
| loss_pol_entpen | -0.010438018  |
| loss_pol_surr   | -0.0011461545 |
| loss_vf_loss    | 13033821.0    |
-----------------------------------
********** Iteration 189 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending:

********** Iteration 195 ************
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
sending: start env 2
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     4.64e-07 |      -0.01115 |      4.61e+06 |     -2.28e-09 |       1.11545
    -1.47e-05 |      -0.01115 |      4.61e+06 |     -1.06e-09 |       1.11545
    -1.99e-05 |      -0.01115 |      4.61e+06 |      8.98e-10 |       1.11545
    -2.33e-05 |      -0.01115 |      4.61e+06 |      8.07e-09 |       1.11545
Evaluating losses...
    -2.70e-05 |      -0.01115 |      4.61e+06 |      8.49e-09 |       1.11545
------------------------------------
| EpLenMean       | 21.1           |
| EpRewMean       | 8.7e+03        |
| EpThisIter      | 7              |
| EpisodesSoFar   | 2051           |
| TimeElapsed     | 2.57e+04       |
| TimestepsSoFar  | 50176          |
| ev_tdlam_before | 0.021          |
| loss_ent      

In [4]:
int(config.fast_simulation)

AttributeError: 'WebotConfig' object has no attribute 'fast_simulation'