In [1]:
import sys
sys.path.insert(0,'../../backend')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math

import gym
import stable_baselines
from stable_baselines import A2C, ACER, ACKTR, DQN, DDPG, SAC, PPO1, PPO2, TD3, TRPO
from stable_baselines.common.env_checker import check_env
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.gail import ExpertDataset, generate_expert_traj

import webotsgym as wg

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [2]:
from webotsgym.utils import exponential_decay
from webotsgym.env.reward import step_pen_exp

class MyEval(wg.WbtReward):
    def __init__(self, env, config):
        super(MyEval, self).__init__(env, config)

    def calc_reward(self):
        target_distance = self.env.get_target_distance(False)
        if target_distance < 0.1:
            print("Calc_reward target bonus: ", 5000 + 5000 * (1 - abs(self.env.state.speed)))
            return 5000 + 5000 * (1 - abs(self.env.state.speed))
        else:
            reward = 0
            reward += -2
            
            if len(self.env.distances) > 0 and len(self.env.history) > 1:
                target_diff = self.env.distances[-1] - self.env.get_target_distance(False)
                
                gps_diff_0 = self.env.history[-1].gps_actual[0] - self.env.state.gps_actual[0]
                gps_diff_1 = self.env.history[-1].gps_actual[1] - self.env.state.gps_actual[1]
                
                #print("history: ", self.env.history[-1].gps_actual[0])
                #print("actual:  " , self.env.state.gps_actual[0])
                
                actual_diff = math.sqrt(gps_diff_0**2 + gps_diff_1**2)
                
                if actual_diff > 0:
                    diff_rew = 4 * ( target_diff  / actual_diff)
                else:
                    diff_rew = 0
                
                
                
                if abs(diff_rew) > 4:
                    print("============= Quotient > 1 !? ==========")
                    print("Target_diff: ", target_diff)
                    print("Actual_diff: ", actual_diff)
                    print("diff_rew:    ", diff_rew)
                    
                if diff_rew < 0:
                    diff_rew = diff_rew * 2
                
                #print("Target_diff: ", target_diff)
                #print("Actual_diff: ", actual_diff)
                #print("diff_rew:    ", diff_rew)  
                    
  
                reward += diff_rew
                    
            if self.env.state.action_denied:
                reward += -5
        
            if self.env.state.touching:
                reward += -100
        return reward

    def check_done(self):
        if self.env.total_reward < -5000:
            print("reward boundary, reward: ", self.env.total_reward)
            return True
        if self.env.get_target_distance(False) < 0.1:
            print("target reached, reward: ", self.env.total_reward)
            return True
        if self.env.total_reward > 25000:
            print("Stop hacking!, reward: ", self.env.total_reward)
            return True
        return False

In [3]:
config = wg.WbtConfig()
config.world_size = 3
config.num_obstacles = 0
config.sim_mode = wg.config.SimSpeedMode.FAST
config.sim_step_every_x = 10
config.relative_action = True
config.direction_type = wg.config.DirectionType.STEERING

In [4]:
env = wg.WbtGym(train=True, 
                evaluate_class=MyEval,
                action_class = wg.WbtActContinuous(config=config, bound=0.3, relative = True),
                config=config)



Accepting on Port:  10201


In [5]:
model_name = "3x3_reach_target_v7_fabi"
model = PPO1("MlpPolicy", env, timesteps_per_actorbatch = 5000, tensorboard_log="./{}".format(model_name))





Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.









Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [6]:
while True:
    model.learn(total_timesteps=400000)
    model.save("./model_{}".format(model_name))


Calc_reward target bonus:  9953.726520761847
target reached, reward:  -2676.955300794289
Calc_reward target bonus:  9937.571734189987
target reached, reward:  -2082.2787528221725
Calc_reward target bonus:  9936.06608826667
target reached, reward:  -4301.197645979009
reward boundary, reward:  -5011.78496217159
Calc_reward target bonus:  9967.884996440262
target reached, reward:  -2786.3158705195274
Calc_reward target bonus:  9955.11980727315
target reached, reward:  -502.0220114805631
reward boundary, reward:  -5060.75909023258
reward boundary, reward:  -5081.787975784749
Calc_reward target bonus:  9966.926760971546
target reached, reward:  -2586.0843372386985
reward boundary, reward:  -5085.385354616509
Calc_reward target bonus:  9957.23347645253
target reached, reward:  -46.09213707020795
reward boundary, reward:  -5092.360660905982
Calc_reward target bonus:  9958.506072871387
target reached, reward:  -21.650217047281217
reward boundary, reward:  -5063.640393148449
reward boundary, r

reward boundary, reward:  -5042.063128962801
reward boundary, reward:  -5028.057314338788
reward boundary, reward:  -5033.1948825244
reward boundary, reward:  -5068.149195999297
reward boundary, reward:  -5081.6985632525675
Calc_reward target bonus:  9967.309110797942
target reached, reward:  -797.6626652729066
reward boundary, reward:  -5011.515769642569
reward boundary, reward:  -5005.151930431336
reward boundary, reward:  -5066.437442860197
Calc_reward target bonus:  9966.89832303673
target reached, reward:  -23.982540690070778
reward boundary, reward:  -5038.656100303745
reward boundary, reward:  -5003.951506507328
reward boundary, reward:  -5026.908420455407
reward boundary, reward:  -5035.388537122622
reward boundary, reward:  -5008.318367698914
reward boundary, reward:  -5084.663768599564
reward boundary, reward:  -5013.596501942358
reward boundary, reward:  -5110.403896296842
reward boundary, reward:  -5037.72859683909
reward boundary, reward:  -5011.176263696755
reward boundar

reward boundary, reward:  -5049.521082850137
reward boundary, reward:  -5003.095661683788
reward boundary, reward:  -5034.249043041475
Calc_reward target bonus:  9937.494383193552
target reached, reward:  -2360.6611302976607
reward boundary, reward:  -5070.77984760654
reward boundary, reward:  -5026.209842677041
reward boundary, reward:  -5010.75440818401
reward boundary, reward:  -5078.995091921017
Calc_reward target bonus:  9930.003616027534
target reached, reward:  -351.51725900131635
reward boundary, reward:  -5003.474638167398
Calc_reward target bonus:  9934.423225931823
target reached, reward:  5.658348175192548
reward boundary, reward:  -5021.128803858003
reward boundary, reward:  -5112.630067149406
reward boundary, reward:  -5077.642749479575
reward boundary, reward:  -5023.832424526758
reward boundary, reward:  -5108.466488634694
reward boundary, reward:  -5043.6875494953365
Calc_reward target bonus:  9946.596841327846
target reached, reward:  -1979.3987200624722
reward bounda

reward boundary, reward:  -5067.03695492444
reward boundary, reward:  -5025.0277149855
reward boundary, reward:  -5066.9132642747
reward boundary, reward:  -5055.466332613996
reward boundary, reward:  -5102.410536925746
reward boundary, reward:  -5063.545871670744
reward boundary, reward:  -5011.339839194232
reward boundary, reward:  -5093.001242713122
reward boundary, reward:  -5048.574140879544
reward boundary, reward:  -5006.612299883988
reward boundary, reward:  -5091.173440295323
reward boundary, reward:  -5097.13941066977
reward boundary, reward:  -5010.972974341992
reward boundary, reward:  -5002.563638546232
Calc_reward target bonus:  9964.677125681192
target reached, reward:  -1326.5174111517438
Calc_reward target bonus:  9941.690191626549
target reached, reward:  -2992.1969892706197
reward boundary, reward:  -5015.44263143412
reward boundary, reward:  -5000.81831439608
reward boundary, reward:  -5057.157663581482
reward boundary, reward:  -5038.608710151835
reward boundary, r

reward boundary, reward:  -5044.675102101811
reward boundary, reward:  -5003.408942249641
reward boundary, reward:  -5013.184551993358
reward boundary, reward:  -5015.40983051562
reward boundary, reward:  -5017.169744424888
reward boundary, reward:  -5015.6803240417275
reward boundary, reward:  -5052.470718146366
reward boundary, reward:  -5002.365487935991
reward boundary, reward:  -5033.077094776051
reward boundary, reward:  -5065.638350508969
reward boundary, reward:  -5012.604811101952
reward boundary, reward:  -5013.309270584576
Calc_reward target bonus:  9947.955547831953
target reached, reward:  -771.6067505054159
reward boundary, reward:  -5085.527431797205
reward boundary, reward:  -5081.870857952119
Calc_reward target bonus:  9950.122563168406
target reached, reward:  -474.3383577849624
reward boundary, reward:  -5084.023378141542
reward boundary, reward:  -5091.608717116634
reward boundary, reward:  -5030.489141400084
reward boundary, reward:  -5046.577027433651
reward bound

reward boundary, reward:  -5044.111315888915
reward boundary, reward:  -5050.249046047544
reward boundary, reward:  -5004.07342934692
Calc_reward target bonus:  9946.353775449097
target reached, reward:  -20.9177229762562
reward boundary, reward:  -5040.353434331422
reward boundary, reward:  -5038.027414888604
reward boundary, reward:  -5095.460224580753
Calc_reward target bonus:  9945.807796902955
target reached, reward:  -1348.2056308879664
reward boundary, reward:  -5069.3918972947
reward boundary, reward:  -5020.813182025185
reward boundary, reward:  -5076.9476571426185
reward boundary, reward:  -5081.4217793510425
reward boundary, reward:  -5071.983855310939
reward boundary, reward:  -5092.233420923056
reward boundary, reward:  -5011.2873485480995
reward boundary, reward:  -5004.313461489002
reward boundary, reward:  -5008.278544375327
reward boundary, reward:  -5028.737628950794
reward boundary, reward:  -5047.684835104373
reward boundary, reward:  -5005.665053791949
reward bound

Calc_reward target bonus:  9939.056485891342
target reached, reward:  -4841.598008028248
Calc_reward target bonus:  9976.463369093835
target reached, reward:  14.303097740828186
reward boundary, reward:  -5004.857316884843
reward boundary, reward:  -5049.174778569995
reward boundary, reward:  -5086.800112761672
reward boundary, reward:  -5019.072254252589
reward boundary, reward:  -5042.945723689496
reward boundary, reward:  -5108.845815451193
reward boundary, reward:  -5063.410467738319
reward boundary, reward:  -5095.387825266106
reward boundary, reward:  -5018.898613811679
reward boundary, reward:  -5002.189671426491
reward boundary, reward:  -5056.572119955541
reward boundary, reward:  -5043.297170529572
reward boundary, reward:  -5006.067824100694
reward boundary, reward:  -5003.001131454269
reward boundary, reward:  -5075.039373383647
reward boundary, reward:  -5053.48159973469
reward boundary, reward:  -5065.473591632102
reward boundary, reward:  -5029.370339836563
reward bounda

reward boundary, reward:  -5111.964659022376
reward boundary, reward:  -5002.444877831534
reward boundary, reward:  -5100.18971427374
Calc_reward target bonus:  9930.573548190296
target reached, reward:  -1894.0208333368691
reward boundary, reward:  -5006.228319833779
reward boundary, reward:  -5015.561055685917
Calc_reward target bonus:  9952.070899307728
target reached, reward:  -3775.245384035992
reward boundary, reward:  -5032.1406908087865
reward boundary, reward:  -5000.33683812471
reward boundary, reward:  -5009.309589391419
reward boundary, reward:  -5047.94736529148
reward boundary, reward:  -5100.5130940901345
Calc_reward target bonus:  9973.21845497936
target reached, reward:  -2850.5248270301836
reward boundary, reward:  -5077.001782413044
reward boundary, reward:  -5047.442014043714
reward boundary, reward:  -5084.936899788531
reward boundary, reward:  -5005.604734807112
reward boundary, reward:  -5011.036544122521
reward boundary, reward:  -5024.036115787965
reward bounda

Calc_reward target bonus:  9947.462556883693
target reached, reward:  -53.31431367688487
reward boundary, reward:  -5094.937865175177
reward boundary, reward:  -5081.070604316776
reward boundary, reward:  -5043.364455784909
Calc_reward target bonus:  9960.17839293927
target reached, reward:  -160.99131879800444
reward boundary, reward:  -5000.5958430828405
reward boundary, reward:  -5025.862611302457
reward boundary, reward:  -5041.861737608151
reward boundary, reward:  -5070.656331285023
reward boundary, reward:  -5066.123978850504
reward boundary, reward:  -5110.159008182138
reward boundary, reward:  -5066.462352958396
reward boundary, reward:  -5039.7919794594545
reward boundary, reward:  -5004.358739837213
reward boundary, reward:  -5005.82193765625
reward boundary, reward:  -5001.617150454353
reward boundary, reward:  -5102.103888007775
reward boundary, reward:  -5010.978472658556
reward boundary, reward:  -5051.225260452469
reward boundary, reward:  -5003.932120357629
reward boun

reward boundary, reward:  -5024.274766837852
reward boundary, reward:  -5012.573899209338
reward boundary, reward:  -5015.762245020564
reward boundary, reward:  -5097.117661142421
reward boundary, reward:  -5052.560712879059
reward boundary, reward:  -5049.630199120737
reward boundary, reward:  -5063.8271284975
reward boundary, reward:  -5000.624016485967
reward boundary, reward:  -5107.608698974231
Calc_reward target bonus:  9963.63085694611
target reached, reward:  -4608.416416493089
reward boundary, reward:  -5072.410447661121
Calc_reward target bonus:  9955.76255954802
target reached, reward:  -328.3372791506059
reward boundary, reward:  -5097.103476662879
Calc_reward target bonus:  9964.741012081504
target reached, reward:  -1427.8492446441458
reward boundary, reward:  -5096.39373137093
reward boundary, reward:  -5006.292582368676
reward boundary, reward:  -5092.579194163408
reward boundary, reward:  -5013.518487864143
reward boundary, reward:  -5020.672674401707
reward boundary, 

reward boundary, reward:  -5042.572059685398
Calc_reward target bonus:  9957.760307006538
target reached, reward:  -39.47203910660749
reward boundary, reward:  -5112.015928887402
Calc_reward target bonus:  9930.956466123462
target reached, reward:  -2547.9338278562523
reward boundary, reward:  -5073.8345869133545
reward boundary, reward:  -5032.720299436853
reward boundary, reward:  -5005.754777927353
reward boundary, reward:  -5014.360326388965
reward boundary, reward:  -5021.054748390739
Calc_reward target bonus:  9969.713299069554
target reached, reward:  -239.08119967386475
Calc_reward target bonus:  9950.217832811177
target reached, reward:  -2845.72357765104
reward boundary, reward:  -5037.202741298631
reward boundary, reward:  -5047.8141076278
reward boundary, reward:  -5008.562140136814
reward boundary, reward:  -5053.83631274971
reward boundary, reward:  -5007.572319798849
Calc_reward target bonus:  9938.743491657078
target reached, reward:  -3514.09533824761
reward boundary, 

reward boundary, reward:  -5020.331413424421
Calc_reward target bonus:  9980.19845224917
target reached, reward:  -2941.481958160126
reward boundary, reward:  -5092.7042228838145
Calc_reward target bonus:  9937.801454216242
target reached, reward:  -1267.3799776171793
reward boundary, reward:  -5092.6076252107005
Calc_reward target bonus:  9933.208851143718
target reached, reward:  -164.759199161224
Calc_reward target bonus:  9943.007533438504
target reached, reward:  -3135.04123740739
reward boundary, reward:  -5094.010698899345
reward boundary, reward:  -5072.700372207015
reward boundary, reward:  -5009.420782371464
reward boundary, reward:  -5082.81648215644
reward boundary, reward:  -5063.741025251941
reward boundary, reward:  -5051.553866085173
reward boundary, reward:  -5005.545581155494
reward boundary, reward:  -5109.441089432999
reward boundary, reward:  -5009.725743902914
reward boundary, reward:  -5082.3082733014635
reward boundary, reward:  -5098.626964578865
Calc_reward ta

reward boundary, reward:  -5010.65122755189
reward boundary, reward:  -5075.901397948165
Calc_reward target bonus:  9946.86322286725
target reached, reward:  -1813.2382154461063
Calc_reward target bonus:  9969.452181831002
target reached, reward:  -4426.263816721101
reward boundary, reward:  -5007.155493206959
Calc_reward target bonus:  9993.911019410007
target reached, reward:  -264.4368272428144
reward boundary, reward:  -5033.6521020729515
reward boundary, reward:  -5007.513256836932
reward boundary, reward:  -5056.609429622604
reward boundary, reward:  -5000.400490563286
reward boundary, reward:  -5074.739444227892
reward boundary, reward:  -5076.168451941293
reward boundary, reward:  -5049.696718817513
reward boundary, reward:  -5114.9441261817
reward boundary, reward:  -5094.368854492147
reward boundary, reward:  -5103.345630645184
reward boundary, reward:  -5000.427440866008
reward boundary, reward:  -5071.986507850273
reward boundary, reward:  -5013.049413847439
reward boundary

reward boundary, reward:  -5006.205961831288
reward boundary, reward:  -5011.458159488862
reward boundary, reward:  -5002.294551867789
reward boundary, reward:  -5042.324530133241
reward boundary, reward:  -5108.373180631487
reward boundary, reward:  -5011.285564746235
reward boundary, reward:  -5036.815869097555
reward boundary, reward:  -5067.147270843769
reward boundary, reward:  -5106.760096681541
Calc_reward target bonus:  9939.24805894494
target reached, reward:  -2526.100626437312
reward boundary, reward:  -5004.016210513662
reward boundary, reward:  -5000.987129591923
reward boundary, reward:  -5078.49594559034
reward boundary, reward:  -5088.004593704478
Calc_reward target bonus:  9929.5897455886
target reached, reward:  5.474526504498275
reward boundary, reward:  -5066.582770064436
Calc_reward target bonus:  9982.698363019153
target reached, reward:  -316.9958747552592
reward boundary, reward:  -5078.764799813439
reward boundary, reward:  -5004.818756412629
Calc_reward target

reward boundary, reward:  -5009.41478027289
Calc_reward target bonus:  9965.66201094538
target reached, reward:  -6.9460589740779
reward boundary, reward:  -5076.549647136727
reward boundary, reward:  -5059.61269790361
reward boundary, reward:  -5106.225409502941
reward boundary, reward:  -5053.821119388993
reward boundary, reward:  -5103.553304508353
reward boundary, reward:  -5008.501384589659
reward boundary, reward:  -5002.913582041204
reward boundary, reward:  -5004.615108651821
reward boundary, reward:  -5065.272614125093
Calc_reward target bonus:  9936.282173730433
target reached, reward:  -2544.4386177330257
reward boundary, reward:  -5079.882182822641
reward boundary, reward:  -5096.427525699159
Calc_reward target bonus:  9960.179920308292
target reached, reward:  -4565.060776438857
Calc_reward target bonus:  9954.22839652747
target reached, reward:  -173.77572709520305
reward boundary, reward:  -5060.771534484901
Calc_reward target bonus:  9952.837782911956
target reached, re

Calc_reward target bonus:  9959.117430262268
target reached, reward:  -214.95931013620074
reward boundary, reward:  -5087.618756889837
reward boundary, reward:  -5000.870771145679
reward boundary, reward:  -5033.309643517034
reward boundary, reward:  -5056.971780946372
reward boundary, reward:  -5013.060719754811
Calc_reward target bonus:  9951.485958881676
target reached, reward:  -1383.4769218995177
reward boundary, reward:  -5063.481339738346
reward boundary, reward:  -5001.877019053784
reward boundary, reward:  -5046.003110324133
reward boundary, reward:  -5102.801639836329
Calc_reward target bonus:  9932.930935174227
target reached, reward:  -1045.6542641335138
reward boundary, reward:  -5027.207150409324
Calc_reward target bonus:  9937.074114568532
target reached, reward:  -262.6070314601251
reward boundary, reward:  -5088.291150831093
reward boundary, reward:  -5074.141941035749
reward boundary, reward:  -5063.227974186342
Calc_reward target bonus:  9949.731808155775
target reac

reward boundary, reward:  -5017.866980200347
reward boundary, reward:  -5076.980021480632
Calc_reward target bonus:  9936.818284913898
target reached, reward:  -1137.4463534907482
Calc_reward target bonus:  9965.953892096877
target reached, reward:  -1192.4713915843902
reward boundary, reward:  -5000.754309867018
reward boundary, reward:  -5067.9085136521235
Calc_reward target bonus:  9949.471210129559
target reached, reward:  -47.62763537823558
reward boundary, reward:  -5003.723292334954
reward boundary, reward:  -5059.676731729207
reward boundary, reward:  -5067.098526090106
Calc_reward target bonus:  9960.951597895473
target reached, reward:  -319.3405709804392
Calc_reward target bonus:  9948.652968741953
target reached, reward:  -2051.8211565251445
reward boundary, reward:  -5107.789910735889
reward boundary, reward:  -5033.022856805581
reward boundary, reward:  -5005.936943352566
reward boundary, reward:  -5106.976690716729
reward boundary, reward:  -5002.366969958185
reward boun

reward boundary, reward:  -5062.9698415905905
reward boundary, reward:  -5045.388969963286
Calc_reward target bonus:  9961.461944039911
target reached, reward:  -1379.1540127566632
Calc_reward target bonus:  9945.421013981104
target reached, reward:  -876.0373782149227
reward boundary, reward:  -5000.045692628029
Calc_reward target bonus:  9977.35170647502
target reached, reward:  -266.9962881502819
reward boundary, reward:  -5012.335623494531
reward boundary, reward:  -5099.033347213673
reward boundary, reward:  -5039.434497945199
reward boundary, reward:  -5000.312506662513
reward boundary, reward:  -5043.422187108078
Calc_reward target bonus:  9941.717269830406
target reached, reward:  -4500.424671301364
Calc_reward target bonus:  9943.379661999643
target reached, reward:  -533.1292698764299
reward boundary, reward:  -5004.46332076138
reward boundary, reward:  -5084.414108727462
reward boundary, reward:  -5097.611576000957
Calc_reward target bonus:  9917.691936716437
target reached,

reward boundary, reward:  -5001.565603582603
Calc_reward target bonus:  9939.39162697643
target reached, reward:  -2816.578318729381
reward boundary, reward:  -5005.824814038805
reward boundary, reward:  -5000.042724836192
Calc_reward target bonus:  9973.192906472832
target reached, reward:  -4542.979356331597
reward boundary, reward:  -5013.783788019125
reward boundary, reward:  -5097.109158505118
Calc_reward target bonus:  9946.0021359846
target reached, reward:  -3139.3101294122985
reward boundary, reward:  -5021.011493780071
reward boundary, reward:  -5104.656946587009
reward boundary, reward:  -5063.383928996143
reward boundary, reward:  -5006.948113307542
Calc_reward target bonus:  9939.70732204616
target reached, reward:  -2163.779035614979
reward boundary, reward:  -5046.49136240955
reward boundary, reward:  -5018.2701809460705
Calc_reward target bonus:  9950.526184402406
target reached, reward:  10.167878153747615
Calc_reward target bonus:  9952.756608836353
target reached, re

reward boundary, reward:  -5064.212219424012
Calc_reward target bonus:  9951.111897826195
target reached, reward:  -2125.5688138113114
reward boundary, reward:  -5004.847873095629
reward boundary, reward:  -5023.956670221235
Calc_reward target bonus:  9973.478058818728
target reached, reward:  -94.60238543105139
Calc_reward target bonus:  9938.12563829124
target reached, reward:  3.5708326708359146
Calc_reward target bonus:  9978.331436868757
target reached, reward:  -2791.9915322166585
reward boundary, reward:  -5011.913348803724
reward boundary, reward:  -5025.19332658341
reward boundary, reward:  -5002.721273433853
Calc_reward target bonus:  9922.504867427051
target reached, reward:  -2504.6039794922885
reward boundary, reward:  -5041.637999746084
Calc_reward target bonus:  9935.982064343989
target reached, reward:  -2153.6119437612447
Calc_reward target bonus:  9937.99907155335
target reached, reward:  -3281.364597981121
Calc_reward target bonus:  9944.903454743326
target reached, 

reward boundary, reward:  -5057.518049827966
Calc_reward target bonus:  9971.59328777343
target reached, reward:  -632.3447417294159
Calc_reward target bonus:  9957.165969535708
target reached, reward:  -1132.9073928619096
reward boundary, reward:  -5095.400677257691
reward boundary, reward:  -5039.221981620707
reward boundary, reward:  -5096.292804641102
Calc_reward target bonus:  9971.068350132555
target reached, reward:  -268.53333859402096
reward boundary, reward:  -5106.104882666933
Calc_reward target bonus:  9929.650914855301
target reached, reward:  -3616.09835474525
reward boundary, reward:  -5074.87148569857
reward boundary, reward:  -5001.143311583507
Calc_reward target bonus:  9962.103134021163
target reached, reward:  -722.8773849925439
reward boundary, reward:  -5098.982794076141
reward boundary, reward:  -5095.045740520431
Calc_reward target bonus:  9988.997058244422
target reached, reward:  -106.12908694434881
Calc_reward target bonus:  9940.777434967458
target reached, 

Calc_reward target bonus:  9952.144990675151
target reached, reward:  -679.3540691663236
reward boundary, reward:  -5099.70124725426
Calc_reward target bonus:  9958.590064197779
target reached, reward:  -273.02772282758815
reward boundary, reward:  -5025.956913350244
reward boundary, reward:  -5049.13798308853
Calc_reward target bonus:  9932.814980857074
target reached, reward:  -1023.5907871771518
Calc_reward target bonus:  9966.37055883184
target reached, reward:  -1047.2130235606196
reward boundary, reward:  -5025.3158005997975
reward boundary, reward:  -5012.899105919436
Calc_reward target bonus:  9912.187485024333
target reached, reward:  -254.24987604881878
Calc_reward target bonus:  9940.456221811473
target reached, reward:  -2857.6257983795276
reward boundary, reward:  -5056.507170356561
reward boundary, reward:  -5023.009161692897
reward boundary, reward:  -5071.337315841055
reward boundary, reward:  -5002.317464490917
reward boundary, reward:  -5021.514818514372
Calc_reward t

Calc_reward target bonus:  9933.537826873362
target reached, reward:  -2711.8466546210843
Calc_reward target bonus:  9947.58578017354
target reached, reward:  -342.20483902149994
Calc_reward target bonus:  9951.3277458027
target reached, reward:  -1887.8703319617828
Calc_reward target bonus:  9947.18220550567
target reached, reward:  -1708.1543420548771
Calc_reward target bonus:  9934.553653001785
target reached, reward:  -1567.4022618606928
reward boundary, reward:  -5001.531507774039
reward boundary, reward:  -5070.354717302179
Calc_reward target bonus:  9974.623823072761
target reached, reward:  -4423.752432818756
reward boundary, reward:  -5047.274535461368
Calc_reward target bonus:  9943.169308826327
target reached, reward:  -2386.541064876693
Calc_reward target bonus:  9959.471826441586
target reached, reward:  -184.81091198527852
reward boundary, reward:  -5007.676485812368
Calc_reward target bonus:  9966.959720477462
target reached, reward:  -308.21624840008974
reward boundary,

Calc_reward target bonus:  9940.860401839018
target reached, reward:  -634.3341819385078
reward boundary, reward:  -5004.597749631881
reward boundary, reward:  -5004.56437954648
Calc_reward target bonus:  9942.381265573204
target reached, reward:  -230.39442401490808
reward boundary, reward:  -5067.083735736528
Calc_reward target bonus:  9988.766929600388
target reached, reward:  -2489.3217525190794
Calc_reward target bonus:  9966.179772745818
target reached, reward:  -369.9430732482249
reward boundary, reward:  -5022.688636751842
reward boundary, reward:  -5019.184886785876
Calc_reward target bonus:  9960.50987392664
target reached, reward:  -4354.160578985016
Calc_reward target bonus:  9945.664079859853
target reached, reward:  -439.07102128997235
Calc_reward target bonus:  9923.310000449419
target reached, reward:  -994.4975109051604
reward boundary, reward:  -5050.187466038611
Calc_reward target bonus:  9963.1851259619
target reached, reward:  -841.8490437851491
reward boundary, re

Calc_reward target bonus:  9944.910178892314
target reached, reward:  -85.06231289407572
Calc_reward target bonus:  9966.317124199122
target reached, reward:  -370.73634546197957
reward boundary, reward:  -5029.546376148262
reward boundary, reward:  -5068.023273140908
Calc_reward target bonus:  9961.21224714443
target reached, reward:  -1246.0482805478364
Calc_reward target bonus:  9974.65163236484
target reached, reward:  -2010.7887534586969
Calc_reward target bonus:  9949.064943939447
target reached, reward:  -1426.4902389425567
Calc_reward target bonus:  9976.165816187859
target reached, reward:  -313.22023723887685
Calc_reward target bonus:  9973.789507057518
target reached, reward:  -2920.6683546103795
Calc_reward target bonus:  9942.647307179868
target reached, reward:  -3354.2032348977414
reward boundary, reward:  -5040.0078023055885
reward boundary, reward:  -5073.450558359563
Calc_reward target bonus:  9997.23637884017
target reached, reward:  -1039.5147675382743
Calc_reward t

reward boundary, reward:  -5063.730782023741
reward boundary, reward:  -5000.136464878225
reward boundary, reward:  -5086.76844261315
Calc_reward target bonus:  9941.160171292722
target reached, reward:  -1744.997386665127
reward boundary, reward:  -5074.636491812401
Calc_reward target bonus:  9966.744517441839
target reached, reward:  -1500.062548248505
reward boundary, reward:  -5005.3954079221385
Calc_reward target bonus:  9941.299930214882
target reached, reward:  -87.87210625542794
Calc_reward target bonus:  9963.637120090425
target reached, reward:  -3423.8952454327646
reward boundary, reward:  -5008.7555527549175
Calc_reward target bonus:  9947.357634082437
target reached, reward:  -323.00538932697435
Calc_reward target bonus:  9990.95257022418
target reached, reward:  -558.1357529653029
Calc_reward target bonus:  9954.188629053533
target reached, reward:  -1786.1873883389417
reward boundary, reward:  -5083.624960752329
Calc_reward target bonus:  9934.086920693517
target reached

Calc_reward target bonus:  9929.498406127095
target reached, reward:  -1015.1412366464141
Calc_reward target bonus:  9937.661378644407
target reached, reward:  -1919.7192349673649
Calc_reward target bonus:  9943.093820475042
target reached, reward:  -1471.1953825052035
reward boundary, reward:  -5005.954578070907
Calc_reward target bonus:  9972.707871347666
target reached, reward:  -252.6251443231972
reward boundary, reward:  -5085.3102720807665
Calc_reward target bonus:  9946.775431744754
target reached, reward:  -40.36013882913635
Calc_reward target bonus:  9967.69109973684
target reached, reward:  -2135.3012328726295
reward boundary, reward:  -5045.813998721561
reward boundary, reward:  -5042.989838486722
reward boundary, reward:  -5015.420919580796
reward boundary, reward:  -5039.927415426592
Calc_reward target bonus:  9976.921004708856
target reached, reward:  -1349.3292517649109
Calc_reward target bonus:  9951.554648578167
target reached, reward:  -147.1250214448096
Calc_reward t

reward boundary, reward:  -5081.790865955614
reward boundary, reward:  -5072.926196000027
Calc_reward target bonus:  9954.243092797697
target reached, reward:  -1020.0762031919114
Calc_reward target bonus:  9935.449417680502
target reached, reward:  -246.44055753628658
Calc_reward target bonus:  9949.65575169772
target reached, reward:  -1006.0028497371613
reward boundary, reward:  -5097.369390903202
reward boundary, reward:  -5011.331133912458
reward boundary, reward:  -5056.97028511485
reward boundary, reward:  -5059.314062164479
Calc_reward target bonus:  9950.893870554864
target reached, reward:  -150.37466629935955
reward boundary, reward:  -5053.601561523228
reward boundary, reward:  -5006.871915718444
reward boundary, reward:  -5000.401741077819
Calc_reward target bonus:  9955.732109956443
target reached, reward:  -4738.9859435711605
Calc_reward target bonus:  9949.364080093801
target reached, reward:  -2901.133912198509
reward boundary, reward:  -5106.292422669097
reward bounda

Calc_reward target bonus:  9939.022078178823
target reached, reward:  -1325.7860426647496
reward boundary, reward:  -5048.056717666421
Calc_reward target bonus:  9956.489615142345
target reached, reward:  -1770.4102030340255
Calc_reward target bonus:  9929.378922097385
target reached, reward:  -153.26771894989955
reward boundary, reward:  -5001.19361937585
Calc_reward target bonus:  9941.894034855068
target reached, reward:  -1511.0485958258726
Calc_reward target bonus:  9966.567398514599
target reached, reward:  -606.4438487070703
reward boundary, reward:  -5030.707867099102
Calc_reward target bonus:  9953.566757030785
target reached, reward:  -515.4506301041529
Calc_reward target bonus:  9984.074969543144
target reached, reward:  -885.3724031163982
Calc_reward target bonus:  9988.446291536093
target reached, reward:  -377.10110792147447
Calc_reward target bonus:  9952.849340625107
target reached, reward:  -4502.433032726943
Calc_reward target bonus:  9968.402362428606
target reached,

Calc_reward target bonus:  9927.326757460833
target reached, reward:  -354.56590856879
reward boundary, reward:  -5010.079715230457
reward boundary, reward:  -5004.923034905204
reward boundary, reward:  -5002.899011524575
Calc_reward target bonus:  9981.387990992516
target reached, reward:  -275.51582571213265
Calc_reward target bonus:  9932.926027104259
target reached, reward:  -2840.536753998472
Calc_reward target bonus:  9966.40962548554
target reached, reward:  -3208.6440054708582
Calc_reward target bonus:  9932.607891969383
target reached, reward:  -471.2587522107171
reward boundary, reward:  -5056.481768693459
reward boundary, reward:  -5100.197824438603
reward boundary, reward:  -5057.049291355731
Calc_reward target bonus:  9968.347316607833
target reached, reward:  -3791.820105283191
reward boundary, reward:  -5009.179195610033
Calc_reward target bonus:  9953.718166798353
target reached, reward:  -3227.362040912364
Calc_reward target bonus:  9950.720588676631
target reached, re

Calc_reward target bonus:  9951.619524508715
target reached, reward:  -2649.2262116483685
reward boundary, reward:  -5046.531969676818
reward boundary, reward:  -5009.276042213967
reward boundary, reward:  -5095.570206669813
Calc_reward target bonus:  9954.702383838594
target reached, reward:  -401.65934482549085
Calc_reward target bonus:  9953.217138536274
target reached, reward:  -1570.3857584800696
Calc_reward target bonus:  9940.746095962822
target reached, reward:  -3317.14433319079
reward boundary, reward:  -5002.826279961057
Calc_reward target bonus:  9931.9924460724
target reached, reward:  -409.3515176636139
Calc_reward target bonus:  9951.11140422523
target reached, reward:  -2543.1708779579403
reward boundary, reward:  -5017.937532307665
Calc_reward target bonus:  9947.22904637456
target reached, reward:  -87.75239914582305
reward boundary, reward:  -5064.297398851983
reward boundary, reward:  -5000.143898995554
Calc_reward target bonus:  9951.278609223664
target reached, re

Calc_reward target bonus:  9953.115386888385
target reached, reward:  -84.18555255501576
reward boundary, reward:  -5002.764686108098
reward boundary, reward:  -5052.570974823156
Calc_reward target bonus:  9947.546576149762
target reached, reward:  -1549.678704957757
reward boundary, reward:  -5062.881452230401
Calc_reward target bonus:  9969.96644185856
target reached, reward:  -193.4019971151126
Calc_reward target bonus:  9952.513608150184
target reached, reward:  -756.6594584670426
reward boundary, reward:  -5056.681443972672
reward boundary, reward:  -5067.403166682367
Calc_reward target bonus:  9968.218535650522
target reached, reward:  -631.5678600671569
Calc_reward target bonus:  9949.478711932898
target reached, reward:  -117.13857363713291
reward boundary, reward:  -5006.386974291189
Calc_reward target bonus:  9977.305885404348
target reached, reward:  -4714.798161316828
Calc_reward target bonus:  9975.12548463419
target reached, reward:  -734.6179797524944
Calc_reward target 

Calc_reward target bonus:  9922.074149362743
target reached, reward:  -4280.365314167324
reward boundary, reward:  -5031.807539935245
Calc_reward target bonus:  9985.952217830345
target reached, reward:  -115.23524394779947
Calc_reward target bonus:  9929.706738330424
target reached, reward:  -1276.932244234535
Calc_reward target bonus:  9923.727046698332
target reached, reward:  -590.9737525638244
Calc_reward target bonus:  9959.887810982764
target reached, reward:  -1214.5250062401826
reward boundary, reward:  -5005.009165420566
Calc_reward target bonus:  9946.902366355062
target reached, reward:  -773.4368437234697
Calc_reward target bonus:  9933.258150704205
target reached, reward:  -205.96637154984404
reward boundary, reward:  -5024.124211060706
Calc_reward target bonus:  9956.470746546984
target reached, reward:  -1263.3860019705967
Calc_reward target bonus:  9944.537747651339
target reached, reward:  -727.6985812439689
Calc_reward target bonus:  9940.69105014205
target reached, 

Calc_reward target bonus:  9936.267165467143
target reached, reward:  -1505.9826216150632
reward boundary, reward:  -5018.583857713709
Calc_reward target bonus:  9972.728553693742
target reached, reward:  -4231.017116915252
reward boundary, reward:  -5055.299742655483
reward boundary, reward:  -5060.937312621261
reward boundary, reward:  -5058.519603839924
reward boundary, reward:  -5046.9523986436
Calc_reward target bonus:  9953.231974504888
target reached, reward:  -2605.154831842161
reward boundary, reward:  -5038.869950862206
Calc_reward target bonus:  9937.085206620395
target reached, reward:  -611.957846393558
reward boundary, reward:  -5040.8962262092955
Calc_reward target bonus:  9931.155312806368
target reached, reward:  -421.1078564874358
Calc_reward target bonus:  9924.813080579042
target reached, reward:  -4504.170782199416
reward boundary, reward:  -5104.148209010247
Calc_reward target bonus:  9926.673620939255
target reached, reward:  -1658.1563987932082
Calc_reward targe

Calc_reward target bonus:  9941.083998419344
target reached, reward:  -643.5444269746378
Calc_reward target bonus:  9912.322470918298
target reached, reward:  -196.65336084169866
Calc_reward target bonus:  9965.52742086351
target reached, reward:  -1252.8947426589857
Calc_reward target bonus:  9950.086823664606
target reached, reward:  -97.78462502280968
reward boundary, reward:  -5019.540593001725
Calc_reward target bonus:  9974.479938391596
target reached, reward:  -2222.2472921825515
reward boundary, reward:  -5072.258931112785
Calc_reward target bonus:  9948.569838888943
target reached, reward:  -1547.3359095417732
reward boundary, reward:  -5002.036019197591
reward boundary, reward:  -5010.130010160303
Calc_reward target bonus:  9950.808472931385
target reached, reward:  -165.86694533286578
Calc_reward target bonus:  9965.400316286832
target reached, reward:  -0.9439914461639871
Calc_reward target bonus:  9963.491132948548
target reached, reward:  -824.0249239392799
Calc_reward ta

Calc_reward target bonus:  9944.989206269383
target reached, reward:  -687.7593461028496
Calc_reward target bonus:  9962.438715156168
target reached, reward:  -2152.8162034610764
Calc_reward target bonus:  9937.798380851746
target reached, reward:  -1661.7320221623447
Calc_reward target bonus:  9930.970524437726
target reached, reward:  -446.09894775709336
Calc_reward target bonus:  9941.603662446141
target reached, reward:  -2924.168773942129
Calc_reward target bonus:  9946.423778310418
target reached, reward:  -1442.638179268727
reward boundary, reward:  -5074.272860815484
reward boundary, reward:  -5006.629757381851
Calc_reward target bonus:  9929.054961539805
target reached, reward:  -465.5129216355389
Calc_reward target bonus:  9946.75297755748
target reached, reward:  -84.0085474886941
reward boundary, reward:  -5031.805324848139
reward boundary, reward:  -5061.0815061677395
Calc_reward target bonus:  9999.066431555548
target reached, reward:  -1076.5924800528842
Calc_reward targ

reward boundary, reward:  -5003.303763643701
Calc_reward target bonus:  9963.059814181179
target reached, reward:  -1932.3112560151626
reward boundary, reward:  -5061.808407055619
Calc_reward target bonus:  9951.770422048867
target reached, reward:  -64.94929482447675
Calc_reward target bonus:  9922.47173562646
target reached, reward:  -147.5420905138005
Calc_reward target bonus:  9939.54602163285
target reached, reward:  -215.49115581152668
Calc_reward target bonus:  9966.132787521929
target reached, reward:  -96.91943182432999
reward boundary, reward:  -5107.488806632878
Calc_reward target bonus:  9960.441659204662
target reached, reward:  -600.6281374916019
Calc_reward target bonus:  9932.011752389371
target reached, reward:  -1632.483736027284
Calc_reward target bonus:  9944.267561659217
target reached, reward:  -1019.7278358891642
Calc_reward target bonus:  9932.800796814263
target reached, reward:  -3938.9867804145642
reward boundary, reward:  -5075.46592672008
Calc_reward target

reward boundary, reward:  -5091.769203210448
reward boundary, reward:  -5055.086913671451
Calc_reward target bonus:  9997.357221436687
target reached, reward:  -4452.583977460951
Calc_reward target bonus:  9973.040886688977
target reached, reward:  -4045.6028101439974
reward boundary, reward:  -5013.505744051646
reward boundary, reward:  -5006.650466171791
reward boundary, reward:  -5041.451230985608
Calc_reward target bonus:  9933.960754424334
target reached, reward:  -1224.065439498884
reward boundary, reward:  -5079.358034371741
Calc_reward target bonus:  9999.562450539088
target reached, reward:  -22.09130478667701
Calc_reward target bonus:  9949.800055474043
target reached, reward:  -204.56253145444438
reward boundary, reward:  -5059.150055447202
reward boundary, reward:  -5107.724842087798
reward boundary, reward:  -5031.588033302421
reward boundary, reward:  -5031.835926003322
Calc_reward target bonus:  9935.289053246379
target reached, reward:  -803.6205967497214
reward boundar

Calc_reward target bonus:  9962.357447948307
target reached, reward:  -4673.409996603102
reward boundary, reward:  -5000.394102564773
reward boundary, reward:  -5024.814353057481
Calc_reward target bonus:  9945.303932763636
target reached, reward:  -346.42433052416254
Calc_reward target bonus:  9955.777055583894
target reached, reward:  -418.3060714619153
Calc_reward target bonus:  9949.014750309289
target reached, reward:  -2561.0839715847974
reward boundary, reward:  -5048.4950846371175
Calc_reward target bonus:  9942.977982573211
target reached, reward:  -75.07867018418641
reward boundary, reward:  -5009.18855303657
Calc_reward target bonus:  9969.873724039644
target reached, reward:  -747.282753703918
Calc_reward target bonus:  9972.733007743955
target reached, reward:  -1420.551685937749
Calc_reward target bonus:  9953.861446119845
target reached, reward:  -3803.9485669298383
Calc_reward target bonus:  9931.634892709553
target reached, reward:  -982.075564181299
Calc_reward target

Calc_reward target bonus:  9969.573148991913
target reached, reward:  -203.48150433298272
reward boundary, reward:  -5001.236838398795
Calc_reward target bonus:  9937.17257399112
target reached, reward:  -220.85550792805296
Calc_reward target bonus:  9993.079647538252
target reached, reward:  -814.7659771182363
Calc_reward target bonus:  9950.354467146099
target reached, reward:  -276.0745471317849
reward boundary, reward:  -5089.989661605584
Calc_reward target bonus:  9959.065620787442
target reached, reward:  -1442.788532057056
Calc_reward target bonus:  9922.778462059796
target reached, reward:  -555.4746992483551
Calc_reward target bonus:  9933.276097290218
target reached, reward:  -138.73411093101848
reward boundary, reward:  -5045.512022640767
reward boundary, reward:  -5008.355072315687
Calc_reward target bonus:  9959.19834356755
target reached, reward:  -587.815028004013
Calc_reward target bonus:  9962.901438120753
target reached, reward:  -1174.2086440550177
Calc_reward target

Calc_reward target bonus:  9941.636924631894
target reached, reward:  -919.5042950875494
reward boundary, reward:  -5001.715469852334
reward boundary, reward:  -5008.891268189312
Calc_reward target bonus:  9951.290544122458
target reached, reward:  -2257.826581685944
Calc_reward target bonus:  9950.473988428712
target reached, reward:  -2799.2284197341382
Calc_reward target bonus:  9941.366356797516
target reached, reward:  -543.1585929819973
reward boundary, reward:  -5004.614125750048
reward boundary, reward:  -5009.16408638258
Calc_reward target bonus:  9942.934508435428
target reached, reward:  -1029.5564173401722
Calc_reward target bonus:  9994.230944430456
target reached, reward:  -2957.473712932465
reward boundary, reward:  -5080.39140707293
Calc_reward target bonus:  9940.076465718448
target reached, reward:  -3440.571042323509
Calc_reward target bonus:  9951.569754630327
target reached, reward:  -1513.9243786757147
Calc_reward target bonus:  9964.312014635652
target reached, r

reward boundary, reward:  -5064.355235308795
Calc_reward target bonus:  9968.662827741355
target reached, reward:  7.819065229145164
Calc_reward target bonus:  9951.071310788393
target reached, reward:  -354.39703750836026
Calc_reward target bonus:  9928.952646441758
target reached, reward:  -76.98993475715028
Calc_reward target bonus:  9964.88812379539
target reached, reward:  -1196.0822978604997
reward boundary, reward:  -5019.594775830905
reward boundary, reward:  -5004.419393869613
Calc_reward target bonus:  9948.51347990334
target reached, reward:  -597.5108870627533
reward boundary, reward:  -5001.344283583142
reward boundary, reward:  -5073.117350035571
reward boundary, reward:  -5008.098551921475
Calc_reward target bonus:  9945.73260191828
target reached, reward:  -124.11683252916836
Calc_reward target bonus:  9940.684158354998
target reached, reward:  -327.4981010248018
Calc_reward target bonus:  9974.879836663604
target reached, reward:  -2031.4300450191447
Calc_reward target

Calc_reward target bonus:  9966.22092789039
target reached, reward:  -49.34653657992575
Calc_reward target bonus:  9942.656904459
target reached, reward:  -351.1293049827351
reward boundary, reward:  -5079.9719258281075
reward boundary, reward:  -5019.165207906529
Calc_reward target bonus:  9939.470957033336
target reached, reward:  3.7168971135560036
Calc_reward target bonus:  9931.31943512708
target reached, reward:  -630.8305957103811
reward boundary, reward:  -5066.763861596664
reward boundary, reward:  -5030.134305508185
Calc_reward target bonus:  9956.028792075813
target reached, reward:  -531.6212976691036
Calc_reward target bonus:  9956.20559900999
target reached, reward:  -2790.0506760171784
reward boundary, reward:  -5027.7965590879785
reward boundary, reward:  -5029.162180083992
Calc_reward target bonus:  9979.958403855562
target reached, reward:  -1191.8378788162997
reward boundary, reward:  -5066.207038947247
Calc_reward target bonus:  9928.493215702474
target reached, rew

Calc_reward target bonus:  9956.84671215713
target reached, reward:  -827.7962481459203
reward boundary, reward:  -5002.543719116005
Calc_reward target bonus:  9968.172314111143
target reached, reward:  -703.681032964496
reward boundary, reward:  -5083.067028101141
Calc_reward target bonus:  9955.365904606879
target reached, reward:  -2985.3891171571713
Calc_reward target bonus:  9942.188491113484
target reached, reward:  -96.36203289805918
reward boundary, reward:  -5086.885531858478
Calc_reward target bonus:  9952.48319581151
target reached, reward:  -4192.804285602983
Calc_reward target bonus:  9940.206678584218
target reached, reward:  -518.8193672170722
Calc_reward target bonus:  9950.673910789192
target reached, reward:  -585.7815555366409
Calc_reward target bonus:  9919.195119291544
target reached, reward:  -704.7970250794029
Calc_reward target bonus:  9991.52920965571
target reached, reward:  -281.75109780222374
reward boundary, reward:  -5031.979877081754
Calc_reward target bo

Calc_reward target bonus:  9999.230970497592
target reached, reward:  -3454.4358217743593
reward boundary, reward:  -5004.85332046499
Calc_reward target bonus:  9934.618072584271
target reached, reward:  -1840.91821932078
Calc_reward target bonus:  9955.100733786821
target reached, reward:  -3620.0709452595056
Calc_reward target bonus:  9932.11661465466
target reached, reward:  -361.44659835681125
Calc_reward target bonus:  9974.062314722687
target reached, reward:  -1007.6790745968167
reward boundary, reward:  -5078.8233253247245
Calc_reward target bonus:  9938.31585161388
target reached, reward:  -1232.9389896932116
Calc_reward target bonus:  9934.491184540093
target reached, reward:  -1610.5910950524637
Calc_reward target bonus:  9952.352889813483
target reached, reward:  -439.6150842397404
reward boundary, reward:  -5062.897422380888
Calc_reward target bonus:  9946.490470319986
target reached, reward:  -454.0665726001395
Calc_reward target bonus:  9932.90659505874
target reached, r

reward boundary, reward:  -5031.462362713949
Calc_reward target bonus:  9975.892379879951
target reached, reward:  -4092.0523970486624
reward boundary, reward:  -5011.040594297447
Calc_reward target bonus:  9987.86021838896
target reached, reward:  -642.4463082277351
Calc_reward target bonus:  9942.359537817538
target reached, reward:  -1287.5506164877581
reward boundary, reward:  -5054.440130442323
Calc_reward target bonus:  9937.376859597862
target reached, reward:  -1051.5483189185866
Calc_reward target bonus:  9963.287634309381
target reached, reward:  -441.6910175001151
Calc_reward target bonus:  9957.421896979213
target reached, reward:  -627.9775290075871
reward boundary, reward:  -5041.423940728251
Calc_reward target bonus:  9958.56923982501
target reached, reward:  -11.275507520333562
Calc_reward target bonus:  9939.611866138875
target reached, reward:  12.063047090663517
Calc_reward target bonus:  9976.11650498584
target reached, reward:  -1128.4592620065557
reward boundary, 

Calc_reward target bonus:  9952.781978063285
target reached, reward:  -88.10565466712492
Calc_reward target bonus:  9923.580484464765
target reached, reward:  -199.36980059039448
Calc_reward target bonus:  9963.057390414178
target reached, reward:  -2658.3865581479918
Calc_reward target bonus:  9926.449940539896
target reached, reward:  -152.57662778546208
Calc_reward target bonus:  9932.12919216603
target reached, reward:  -3497.075976560073
Calc_reward target bonus:  9976.168975699693
target reached, reward:  -107.96940084335603
Calc_reward target bonus:  9933.150778524578
target reached, reward:  -252.60309027928844
reward boundary, reward:  -5004.1280259677915
Calc_reward target bonus:  9973.79369335249
target reached, reward:  -41.220934605682444
Calc_reward target bonus:  9933.613394387066
target reached, reward:  -246.31223007420093
Calc_reward target bonus:  9973.170389421284
target reached, reward:  -162.44413562654296
Calc_reward target bonus:  9951.749024912715
target reache

reward boundary, reward:  -5040.9935820967
Calc_reward target bonus:  9955.304088070989
target reached, reward:  -2149.1129437182167
reward boundary, reward:  -5083.413891011284
Calc_reward target bonus:  9947.760724462569
target reached, reward:  -1084.7035496839358
reward boundary, reward:  -5007.220064093132
Calc_reward target bonus:  9953.839764930308
target reached, reward:  -432.61878580539707
Calc_reward target bonus:  9971.619539428502
target reached, reward:  -418.1186462984857
reward boundary, reward:  -5074.804288515831
Calc_reward target bonus:  9943.1770760566
target reached, reward:  -2255.3049947896125
reward boundary, reward:  -5057.174010350838
Calc_reward target bonus:  9949.511238373816
target reached, reward:  -499.23282512703605
Calc_reward target bonus:  9951.498890295625
target reached, reward:  -657.6793401653186
Calc_reward target bonus:  9970.638900995255
target reached, reward:  -635.9491644408961
Calc_reward target bonus:  9932.321887463331
target reached, r

reward boundary, reward:  -5105.75935228465
Calc_reward target bonus:  9948.45234323293
target reached, reward:  -1346.670162813232
Calc_reward target bonus:  9930.557166226208
target reached, reward:  -1487.462992585109
Calc_reward target bonus:  9950.719592161477
target reached, reward:  -125.99792508931871
reward boundary, reward:  -5055.630071723073
Calc_reward target bonus:  9948.739572428167
target reached, reward:  -1099.2635840123976
reward boundary, reward:  -5086.579258008681
reward boundary, reward:  -5058.605486357596
Calc_reward target bonus:  9939.594734460115
target reached, reward:  -1959.4618312499358
reward boundary, reward:  -5078.042829472979
Calc_reward target bonus:  9992.100147064775
target reached, reward:  -246.17006180574074
reward boundary, reward:  -5093.064215799598
Calc_reward target bonus:  9946.413305588067
target reached, reward:  -1847.2726607230709
Calc_reward target bonus:  9990.285818930715
target reached, reward:  -1588.737722752491
Calc_reward tar

Calc_reward target bonus:  9940.541028045118
target reached, reward:  -65.58991805248569
Calc_reward target bonus:  9966.069832444191
target reached, reward:  -197.41066640360214
reward boundary, reward:  -5001.21856127713
reward boundary, reward:  -5005.280560596286
reward boundary, reward:  -5107.1276578541765
reward boundary, reward:  -5001.088031382465
Calc_reward target bonus:  9975.398825481534
target reached, reward:  -587.2326907808688
reward boundary, reward:  -5097.427911816226
reward boundary, reward:  -5004.812691640019
reward boundary, reward:  -5094.930573566293
reward boundary, reward:  -5030.738163791931
Calc_reward target bonus:  9965.10801371187
target reached, reward:  -1679.2412252004738
Calc_reward target bonus:  9952.207333408296
target reached, reward:  -1194.3200695388898
Calc_reward target bonus:  9942.27396324277
target reached, reward:  -1025.501706107553
reward boundary, reward:  -5074.649094956677
reward boundary, reward:  -5038.833407789647
Calc_reward tar

KeyboardInterrupt: 