In [1]:
import numpy as np
import math
import random
import os
import pybullet as p
import pybullet_data
import gym
from gym import error, spaces, utils
from gym.utils import seeding
import matplotlib.pyplot as plt
import time

In [2]:
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, Concatenate
from keras.optimizers import Adam

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import  SequentialMemory

In [4]:
class BikeEnv(gym.Env):
    def __init__(self):
        p.connect(p.GUI)
        p.resetDebugVisualizerCamera(cameraDistance=10, cameraYaw=0, cameraPitch=-40, cameraTargetPosition=[0.55,-0.35,0.2])
        self.action_space = spaces.Discrete(5)
        self.observation_space = spaces.Box(np.array([-1000]*10), np.array([1000]*10))
        self.timestep = 1./240.
        
    def step(self, action):
        #print(action,)
        print(p.getJointState(self.pid,1)[1])
        
        if action==0 :
            self.v = 0
        elif action==1 :
            self.v = self.v - 5
            if self.v < 0 : self.v =0 
        elif action==2 : 
            self.v = self.v - 10
            if self.v < 0 : self.v =0 
        elif action==3 : 
            self.v = self.v + 5
            if self.v > 100 : self.v = 100 
        else :
            self.v = self.v + 10
            if self.v > 100 : self.v = 100
                
        p.setJointMotorControl2(self.pid, 2, p.VELOCITY_CONTROL, targetVelocity=self.v, force=20)
        
       # if action[1] < 0.5 :   
       #     p.setJointMotorControl2(self.pid, 2, p.VELOCITY_CONTROL, targetVelocity=0, force=0) 
       # else:  
       #     p.setJointMotorControl2(self.pid, 2, p.VELOCITY_CONTROL, targetVelocity=5, force=5)    
    
        p.stepSimulation()
        #time.sleep(self.timestep)
        
        state = p.getLinkState(self.pid,0)[0]
        if state[2] <= 0.7 or  state[2] >= 2:
            reward = -1000
            done = True
        else :
            #reward = math.sqrt((self.origin[0]-state[0])**2+(self.origin[1]-state[1])**2)
            reward = 1
            done = False
        self.origin = state 
        
        velocity = p.getBaseVelocity(self.pid)
        state_object ,orient = p.getBasePositionAndOrientation(self.pid)
        norm = math.sqrt(orient[0]**2+orient[1]**2+orient[2]**2+orient[3]**2)
        observation =velocity[0]+velocity[1]+ orient
        
        info = {'x':state_object[0],'y':state_object[1],'z':state_object[2]}
        return observation, reward, done, info
            
        
    def reset(self):
        p.resetSimulation()
        self.v = 10 ;
        urdfRootPath = pybullet_data.getDataPath()
        planeUid = p.loadURDF(os.path.join(urdfRootPath,"plane.urdf"), basePosition=[0,0,0])
        self.pid = p.loadURDF(os.path.join(urdfRootPath, "bicycle/bike.urdf"),basePosition=[0,0,1])
        self.origin = p.getLinkState(self.pid,0)[0]
        p.setGravity(0,0,-10)
        #p.setRealTimeSimulation(0)
        p.setJointMotorControl2(self.pid, 1, p.VELOCITY_CONTROL, force=0) 
        
        velocity = p.getBaseVelocity(self.pid)
        state_object ,orient = p.getBasePositionAndOrientation(self.pid)
        norm = math.sqrt(orient[0]**2+orient[1]**2+orient[2]**2+orient[3]**2)
        observation =velocity[0]+velocity[1]+ orient
        
        p.configureDebugVisualizer(p.COV_ENABLE_RENDERING,1)
        return observation
        
    def render(self, mode='human'):
        view_matrix = p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=[0.7,0,0.05],
                                                            distance=.7,
                                                            yaw=90,
                                                            pitch=-70,
                                                            roll=0,
                                                            upAxisIndex=2)
        proj_matrix = p.computeProjectionMatrixFOV(fov=60,
                                                     aspect=float(960) /720,
                                                     nearVal=0.1,
                                                     farVal=100.0)
        (_, _, px, _, _) = p.getCameraImage(width=960,
                                              height=720,
                                              viewMatrix=view_matrix,
                                              projectionMatrix=proj_matrix,
                                              renderer=p.ER_BULLET_HARDWARE_OPENGL)

        rgb_array = np.array(px, dtype=np.uint8)
        rgb_array = np.reshape(rgb_array, (720,960, 4))

        rgb_array = rgb_array[:, :, :3]
        return rgb_array

    def close(self):
        p.disconnect()

In [5]:
env = BikeEnv()
np.random.seed(123)
env.seed(123)



In [6]:
nb_actions = env.action_space.n

In [7]:
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 10)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 16)                176       
_________________________________________________________________
activation_1 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 16)                272       
_________________________________________________________________
activation_2 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 16)                272       
_________________________________________________________________
activation_3 (Activation)    (None, 16)               

In [8]:
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [9]:
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

Training for 50000 steps ...

0.0
1.659110602170943e-24
1.7433884733252233e-18
8.716795461275455e-19
0.023846773038626676
8.079566059839005e-07
8.077824666142042e-07
0.02384758064863733
1.1542772098034615e-06
0.38958585829194303
0.4423923655587251
0.47397436898131534




0.5318304241021984
0.08533280141107064
-0.04117294946357475
-0.016103496581955822
-0.000581717795938309
-0.00027694873646659366
0.024775232120152185
0.010356273709783592
0.03427684611303475
0.02046843012162609
0.0432417868542493
0.06827984587754114
0.09331827248238601
0.11834700250655153
0.14336055108151338
0.16835080195133367
0.19332474211968154
0.21828158698056352
0.24322119834999603
0.26814112750205643
0.269052857015182
0.25317196470213
0.25098737674128385
0.24993047305073984
0.23420875573200328
0.21866913460303408
0.22047213184629552
0.24547760370549174
0.24412204335806667
0.24355114973001124
0.24185565499340206
0.24171306808242995
0.26671860189567
0.2916311351830445
0.31654133475223384
0.34143783959035034
0.36633497213949723
0.3912008552865964
0.41607342706806716
0.44094695891661173
0.46582819675378506
0.46403201642084163
0.4607037996275961
0.4463691759244695
0.4317631294119846
0.4168409888881728
0.44181245073387115
0.46668405462513074
0.4915756305423267
0.5164964249802071
0.54145

0.46255301994412956
0.4885655785489202
0.47021472167947825
0.4509332306274623
0.432989546133283
0.41662417995007384
0.42061113012985607
0.44697324486606227
0.4734918496986219
0.5002885944728502
0.48492352173487024
0.46483877205891644
0.44462374284851364
0.42815583757020875
0.41261484716107616
0.3971914470247139
0.4054928775981221
0.4325548887346954
0.4598183846373975
0.4403042870764101
0.4201848225189232
0.442054049094852
0.4696552375028537
0.4975152219017206
0.5258111081507475
0.5546568426256197
0.5841985688458254
0.5823788446361744
0.5595433470020607
0.5140306273675779
0.5117261302166355
0.49743708422287897
0.4832231278376621
0.45353266453735597
0.47778082450541004
0.4830510089946624
0.4799582283264189
0.45291289201402596
0.4576545715842124
0.44970098481572385
0.4539087481917651
0.4468341561545089
0.4505351110149158
0.4452754296308483
0.43233304923506505
0.4125905806780283
0.4367358539512368
0.46085852534696115
0.48497956953016275
0.4871980287080131
0.48656180948644606
0.464284311913

0.01342009680139603
0.03722381686679791
0.026948282060841235
0.050315920019988664
0.07436884012936935
0.0984562303546381
0.09268636761930776
0.0802704098912564
0.06794564752911948
0.06115527279807224
0.08287437247657344
0.07356841600118244
0.08912596317549618
0.08006325239353815
0.06808930001224382
0.05598795966261568
0.04391502840834994
0.03189042040680835
0.05471767717555436
0.07851702058400645
0.07124345104729421
0.05924558292045317
0.04729947373022987
0.03536160560135304
0.023442918757254384
0.01155427717547587
0.00037833964781203155
0.0002733075927733483
0.00021871936792186494
0.00018889131580283804
0.0001770987587337007
0.023737791550931336
0.014263776154150339
0.002549773872992366
0.00029725838310185266
0.0002815580087339201
0.00029005210056351846
0.00029974073561530044
0.0003132552965772837
0.0002975253427823975
0.0003167092014804662
0.0002969491859704137
0.023520713242575087
0.0467849210760689
0.040444553837975336
0.028940415922623225
0.01771605985151002
0.006633209567625071
0

0.42865593262572654
0.4045162652248819
0.3832707528653982
0.36633381675248133
0.3544562260335264
0.3431037004144713
0.33170642204555917
0.3203075250727977
0.30894018833771963
0.2976291082064053
0.2863928952530892
0.2752456951850798
0.264198340895058
0.25325918073795
0.24245681957662057
0.23173785390742974
0.22115861467200895
0.2107060744461103
0.20038267892247766
0.19019036329583014
0.1801306576554622
0.1702047789303928
0.16041370309922287
0.15075822246264284
0.13663702520738738
0.1269515573574528
0.1174164552124823
0.10754839184254399
0.09964907289524043
0.0905053443827604
0.08143409662249157
0.07252576027406872
0.06375087211699211
0.05515348144964889
0.04661961508876717
0.03828721344355209
0.03005615611257003
0.021965409610251247
0.014050072930186038
0.006247991284409058
0.00028696043117223437
0.00029171923197346514
0.0002923396782640972
0.00029190922656434793
0.00029121685396263087
0.00029034695927687644
0.0002892974763846063
0.00028806096123493743
0.0002866301873870288
0.0002849980

0.0002866625066020443
0.0003095409612770511
0.00029190673926900493
0.0003163232021702849
0.00027600230384077157
0.00030614991834954937
0.0002903163235336894
0.0003094599037264824
0.00026252560285522494
0.0002873572695434234
0.0002793992203736681
0.0002648544399742194
0.0002442164420233758
0.00021873351964900778
0.0001896780613994927
0.00015789715182237
0.00012401124737292714
0.00010959916350676823
0.00014523272012790907
0.00021449332069320198
0.0002866656012597347
0.00038936371372452386
0.0005122950351604632
  1649/50000: episode: 5, duration: 24.907s, episode steps: 330, steps per second: 13, episode reward: -671.000, mean reward: -2.033 [-1000.000, 1.000], mean action: 1.691 [0.000, 4.000], mean observation: 0.119 [-2.802, 1.631], loss: 1267.152222, mae: 10.070965, mean_q: 12.867598
0.0
0.023846773038626676
0.04768937413978882
0.07152740919834974
0.09536048441665247
0.11918820633565394
0.14301018186624476
0.1668260183208345
0.19063532344528258
0.21443770545125518
0.23823277304908755


0.2544168332125066
0.28093206304783325
0.3076268792791986
0.33455452862192747
0.32884146563425287
0.3114858372668199
0.3303378616514962
0.3571816821666173
0.3841301720345563
0.3671908356366903
0.39401086468167185
0.4211329004166747
0.4485579369136912
0.4336483400579255
0.4150401497795401
0.4371766277138835
0.46480879910662615
0.49268891196599124
0.5209661947969753
0.5497457617767672
0.5791615243810618
0.6093739386332291
0.5872056776522587
0.5945527338413853
0.5999255284408934
0.5969647715630506
0.5834972928900244
0.5701929476360723
0.5570106111878554
0.5226549046365968
0.5473556026488298
0.5719822632844961
0.5965773882739609
0.6004725417463237
0.5991319444691071
0.6237114164334332
0.6482349788746382
0.6729457959516482
0.6976422154102675
0.7222648012150388
0.8128340930351724
0.8057867630605048
0.7962687767176542
0.7861773900034206
0.7462719772096823
0.7699018851522834
0.7611922923498369
0.7504222312712826
0.7300675869117869
0.7543728944395164
0.7785992756333704
0.8029971613609013
0.8276

2.4935802412348047
2.323912534561929
2.3463936593786303
2.5721017206721966
2.5882532493018164
2.6071479331780356
2.6289274806033367
2.6313929943045147
2.652444052876932
2.6792021864978186
2.7104812708747645
2.5720957191629146
2.587099454440243
2.6034416239225444
2.621319695377464
2.641111515675076
2.662933961015059
2.9259646066659672
2.9445658724366326
2.957895624840799
2.9613411101601836
2.93625673776032
2.715242957933479
2.7254414584564506
2.733510997975429
2.7284233966510585
2.739816423625099
2.7356544808180643
2.7380065011370407
2.720908994609823
2.932319376333162
2.9047576856962123
2.883177029037957
2.8757224377654507
2.8886574551770483
2.9026038465240935
2.719358144110585
2.7271798266523235
2.735467669557443
2.744604873475477
2.7454738622662904
2.7575245468601333
2.767332610817461
3.1154788521297663
3.134543919571351
3.1563722161374304
2.9819360142406426
2.981353055368786
2.9840768785004568
2.9906817332008835
3.002063468556485
3.0194245112919513
3.0412821514997543
3.0631342869923

1.030589269275488
1.0158188249825628
1.0014547738879234
0.9874859263675886
0.9739046064243139
0.9607036354819468
0.947876301531178
0.9354163302583879
0.9233178575687389
0.9115754035271789
0.900183847463828
0.8891384042365256
0.8783829515558942
0.8652171640049526
0.8553872134448351
0.8458715586110884
0.8366659151984462
0.8277685041743341
0.8191774629626708
0.8108910065455092
0.8029265308240828
0.7943412694171831
0.786652683970567
0.7783978679935184
0.7728369521561779
0.7660260239417739
0.7594767343485701
0.7532337304005444
0.7472812877454622
0.7416174529370815
0.7362401884691775
0.7311473514289031
0.726336667143115
0.7218057027381315
0.7175518393722726
0.7135722432087848
0.7098638352113005
0.706423259685373
0.7032468517622
0.7003306038215956
0.6976701308970754
0.6952606354196214
0.7472307113310755
0.7410016176181207
0.73472540263439
0.7279534343672136
0.7184772888735182
0.7134688841844111
0.70873313257521
0.7042567856104937
0.6999290563863934
0.6960062568468224
0.6922390811623412
0.6887

0.0
0.023846773038626676
0.04768937413978882
0.07152740919834974
0.09536048441665247
0.07149466035183581
0.047633219113051344
0.02377641577137812
5.2647300970459776e-05
0.06578387630084731
0.4315644121909745
0.4895317257331996
0.28842943567404467
-0.027432216357350436
-0.0023630894468228032
0.022666180789436226
0.04769471861368558
0.07272239573221152
0.09775059374175568
0.12277863860321386
0.14780891638470656
0.17284357612377432
0.19788538069580966
0.22293784677469308
0.2192827226811779
0.23292270555266614
0.2579080387577027
0.2828736432013432
0.30784840454691187
0.33283550108469173
0.3578420248192761
0.38287660622776254
0.40794970769665595
0.4330739893425424
0.45826474991458893
0.4835404606123489
0.508923415221181
0.5344405267497946
0.5601243093375695
0.586014095812653
0.6121575562878502
0.6386126036812031
0.6654276010243815
0.6781140704918814
0.7029308194714341
0.7289566431917188
0.7553710067578455
0.7822028012691477
0.8095982695693144
0.8376464749373602
0.8665078716554442
0.89637101

1.4174282681387458
1.4495473271151595
1.3770562826897328
1.3981320614637178
1.405487355415616
1.4185126460751218
1.44070578605116
1.4630729262388553
1.485706384851517
1.5086886909546802
1.5321226551431515
1.5561342022056976
1.580878452485316
1.6065476517571762
1.6350455423879209
1.8053769758709806
1.8323623254875494
1.8619955154217003
1.8946210142262558
1.9309801243477536
1.8130110509648159
1.8341200430168212
1.8559474210167772
1.878579086230896
1.8982848354409607
1.923386877216348
1.9501195711411865
1.978891723402595
2.0102016547543524
2.0447845714741915
2.0835635976120574
2.127745587793029
1.9867982816633352
2.010688701732733
2.2203804054015346
2.2324112901421715
2.2350924298304884
2.227066383601149
2.2270411720911616
2.2467397209575086
2.2679725417823526
2.291093380165345
2.316563959004967
2.3424442254320867
2.3715551615851593
2.2419008811963828
2.253931021527047
2.2436792574376314
2.262873441274364
2.2697202307302202
2.4471013918911497
2.4330987190296853
2.4148021747590653
2.386423

-0.00027915679559121007
0.02490108390942282
0.010263047831082136
0.0002760018329899423
-0.0002881917761331146
-0.0002958290246299646
-0.0002905403263901845
-0.00028753829992238787
-0.0002851807260729566
-0.000282918008549985
-0.00028063327551823157
-0.0002783030991163732
-0.00029183529629212576
-0.00028703798849929557
-0.000284126618541079
-0.0002816097427289884
-0.00027914319004198354
-0.00027664697122891415
-0.00029203219329195336
-0.0002784295379200166
-0.00029085814182728496
-0.0002919548968416978
-0.0002897968008931004
-0.00028687083788345657
-0.0002837316298558691
-0.00028049488069349757
-0.0002771837466657064
-0.0002946376399128086
-0.00027575178898706803
-0.00028838237981575887
-0.0002892023177122633
0.024584007640089788
0.010297571007776932
0.00027804856118186436
-0.00027538942666371436
-0.00028843918368457117
-0.00028643112022364714
-0.00028093988929739854
-0.00029655145581454354
-0.0002766975490168743
-0.00028691055570755596
-0.0002863429072318898
-0.0002817699437077364
-0.0

2.0416086159768825
2.022790984426399
2.004705315370349
1.9873338802693135
1.9706598918845817
1.9546674477574313
1.9342118803756607
1.919821673822154
1.9062042420283032
1.8931776863130338
1.880732626050598
1.8688599213092274
1.8575508007662842
1.846796840479413
1.8365899394106069
1.9654840736647226
1.9491530567151376
1.933323987643207
1.897267683337008
1.9039642103621495
1.8890104032701063
1.8758322502849232
1.8634992825381653
1.8517681470338367
1.8406284523573522
1.8300700404912837
1.8200828722512457
1.8106570851707717
1.8017829594758061
1.793450884050704
1.7856513223186805
1.7783747779572052
1.7716117603206227
1.7653527493677534
1.7595600322372726
1.892040570909264
1.8805447270051905
1.8694070499774211
1.8589251577725079
1.8490792889000072
1.8398485877698822
1.831154842670647
1.8231008738406602
1.8155991468725852
1.8057559508835765
1.7956960338625165
1.7902540942673018
1.7852883817768956
1.7806589129392556
1.7764602931260007
1.7726399161868138
1.7691789094810844
1.766057627821852
1.76

7.875654616022415e-05
0.00010627435264016694
0.0001230610394790975
0.00013654116528310188
0.0001470702082558625
0.00015483540959998458
0.00015994912605360519
0.00016249763433596445
0.0001625547765762546
0.0001601904625832297
0.00015547638175260377
0.0001484899738942637
0.00013931729167515906
0.0001280551466326164
0.00011481281183799407
9.971343684877476e-05
8.28952868906638e-05
6.451287103475644e-05
4.4737996854540336e-05
2.3760770514429878e-05
5.7711314909346555e-05
0.00010295075809145111
0.00015691290800207643
0.0002090457355332839
0.0002769533428024813
0.00035548367876305555
  4916/50000: episode: 15, duration: 25.130s, episode steps: 331, steps per second: 13, episode reward: -670.000, mean reward: -2.024 [-1000.000, 1.000], mean action: 1.761 [0.000, 4.000], mean observation: 0.102 [-2.808, 1.639], loss: 1027.173096, mae: 56.044491, mean_q: 29.179216
0.0
0.023846773038626676
0.04768937413978882
0.07152740919834974
0.09536048441665247
0.07149466035183581
0.09532676506866039
0.11915

0.0
1.659110602170943e-24
0.02384677303862668
1.1154897511661543e-06
0.023847888314899576
0.04768999972139071
0.07152705640556475
0.09535866526926252
0.07149088753784516
0.16972366903331523
0.48614985297206037
0.5147480142763508
0.16698042181581674
0.028615157302279648
0.053705154544257616
0.04065434472980885
0.06391633418797567
0.08899390695183661
0.11406581465954854
0.13914621458008103
0.16423567119973698
0.15756251858229187
0.14171453533621356
0.15571498296969616
0.1807954302220321
0.2058713521575934
0.19888990536676815
0.20790410898396103
0.1992620280073161
0.20839505372229272
0.1997351695934845
0.18420256343153757
0.16880354799178288
0.1534287373218913
0.13810492252027393
0.1530660323422254
0.17821046749921934
0.16945765273295385
0.15416918483196068
0.1679129890188483
0.19307253348279327
0.21823704321703952
0.24342246079846025
0.268636981260011
0.2938914359555464
0.31919856732269425
0.3113787261977258
0.3337160576211764
0.3589242181349399
0.3500918133970971
0.33791975618079534
0.3

0.21310744316908353
0.20000441095538862
0.20777249249588936
0.20344124295150864
0.19047754666144925
0.17759382110410804
0.18759159405715808
0.1824738218326338
0.16968890178383791
0.1569533064148969
0.1442258297737286
0.15752290668424926
0.18257266739454733
0.20761591112581207
0.20483564866217874
0.19222129615626574
0.17971675567948092
0.16725116425302325
0.15479209719731996
0.1423776992184595
0.1299976830482536
0.11767397074234424
0.10534780843987884
0.09305917034495692
0.11144760368142025
0.10357547496133931
0.09139309713613562
0.07920848645787253
0.06703700770813417
0.05491237302825695
0.04274386401375076
0.030639631905456648
0.01852762340312339
0.006405918968259197
0.0002942037552810719
0.00010098783966218478
-2.1986030748879655e-05
-0.00010411169093414314
-0.00015781716146455444
-0.00019246991843587525
-0.0002143595177652191
-0.0002276883079577795
-0.00023526333963846472
-0.00023895936995678893
-0.00024002823176461313
-0.00023930553098351336
-0.00023734875188695002
-0.0002345295326

0.01339512202016909
0.0003837216340099956
0.025850743093952015
0.013393479271442242
0.00038440975701833566
0.00014920220870756187
-1.1495940563821771e-05
0.025407221591978266
0.05088971387785242
0.07638395841714996
0.06573717438301818
0.0521725329520386
0.07701172737449591
0.06471997430783112
0.08915948637049678
0.11456498828761746
0.14007390588284163
0.12938275388607984
0.14920487340162294
0.17464440294404004
0.2001614661753527
0.22576087405488574
0.251483426581888
0.27736846727851194
0.30346125307123184
0.3298144543454858
0.3564899848449418
0.3835612541619779
0.4111159624337143
0.43925959582585994
0.46811982974241073
0.4978521091490112
0.528624567183652
0.5362996099539609
0.5329919059339485
0.5592132453238616
0.5876214438960511
0.574786107792847
0.5558652780658753
0.5812900929275305
0.558411484808531
0.5819257752881546
0.6053992820454842
0.6288700507941897
0.6523507692721711
0.6758596028447876
0.6994180520582816
0.7230515555255139
0.8094292045960523
0.8320197411362957
0.8549759261430

0.0017597793308653156
-0.0002859439563169229
-0.0002787867399456228
-0.0002808836634099814
-0.00028004577060213774
-0.0002788997922037572
-0.00027770209358964277
-0.00029633549632330613
-0.00027711191849249487
-0.0002937842571866419
-0.00029422273496441134
-0.00029294681270531915
0.02230912823400545
0.011590686184555925
0.0002936994431574965
-0.00027942152479598544
-0.0002837055887485924
-0.00028215544755338956
-0.0002804724680206261
-0.00027874844852216385
-0.00029681747937737954
-0.0002772331265474307
0.02183350127534718
0.04393648831400375
0.06604349060675525
0.08816686874902971
0.11033064214061956
0.1325593854473931
0.15488111361950724
0.17732784642631927
0.19993632384300877
0.2227488640349033
0.24581438474477602
0.2598834127352779
0.25896501959137563
0.25827435235803303
0.27790135144388445
0.2777318736957715
0.26945266377844845
0.2583496123939901
0.24714086411680694
0.2358713157162052
0.23276995381810348
0.25479253508176897
0.25516643428460556
0.25806201511035876
0.280185233628924

0.7711611453915788
0.5980579670513306
0.622008235048124
0.6545442364866035
0.6959461031422038
0.7485385195481304
0.8171218461283047
0.8862055923007858
0.9331842828389221
0.7714135850875401
0.7927709319221243
0.8173074823794346
0.878332022189287
0.9642577020077179
1.0377400921648094
1.060383192161994
0.9846810088683822
1.0094188380567302
1.1404315654615473
1.2348167052834114
1.2729653196058817
1.2064782162763898
1.2313476118058555
1.2562169092815274
1.2810862738829727
1.2462206876176303
1.3065126736712813
1.376135556890348
1.4242487225983043
1.367914541128815
1.3932294437944661
1.4185475695729228
1.4438690250324855
1.4691935423356712
1.5150935731120287
1.4617015690779236
1.5122402426884543
1.549955993285168
1.5942771014192294
1.6378343972798715
1.6802596436261408
1.6377849710059724
1.6638243879031465
1.6898715200383623
1.8308845584436388
1.872300975905065
1.9114827936974637
1.949832163076458
1.9876341231894117
2.025082912353144
2.062312847688522
2.0157012304683337
2.0420666170910122
2.0

0.15721278949222506
-0.010690783314388469
0.01794777616390551
0.04661859909979628
0.07549044235330293
0.06079526629638879
0.042749971456322575
0.027368418147476238
0.011963677145599516
0.04084269610322715
0.019375931365748507
0.004000123291674516
0.03295284229904723
0.011364160451583734
0.0002927903953982118
0.00028744825505638735
0.0002845899459287157
0.0002838885978971145
0.0002837698810946287
0.029274179506444644
0.04864032092686346
0.03987795182474517
0.024058049734116977
0.008219210187107688
0.00029601018020573334
-0.00029036016640378385
0.024704311745114556
0.01044246235462527
0.03424546853282313
0.020602795764750253
0.04322847215375976
0.0681935243450663
0.05678179820793599
0.0753208283671167
0.06437505820787638
0.08206756809064564
0.07156709918830953
0.05580201783095814
0.07450152854131639
0.06353692257598116
0.08137841342601404
0.07086171379157846
0.08790693505482614
0.07779797897773123
0.062146755559837084
0.046500514565354895
0.030855791559289254
0.015234583987220274
0.00031

0.10912598080742135
0.09358485356312353
0.10836971879896272
0.09954718839849151
0.08406713080213007
0.0685724605166589
0.08617595674064274
0.11111184335238625
0.10264783209951579
0.08721710527879843
0.07178527473093377
0.05636259237491901
0.04096149099157717
0.025570576480305706
0.010178155926606348
0.00027644099351887587
-0.0002848905524249518
-0.0002908279766705423
-0.00029042267566257784
-0.0002904713346972355
-0.0002905499503710217
-0.0002906157711204566
-0.00029066720639288156
-0.00029070505142769965
0.024559342547744868
0.010937392334864215
0.00028840933615488853
-0.00028381359510317177
-0.00029053226509508225
-0.00029003537348176435
-0.00028995189605196184
-0.000289904022908843
-0.0002898503927260382
-0.0002897888388793809
-0.00028971955743688866
-0.00028964282583574246
-0.00028955889509980697
-0.00028946798727770144
-0.0002893702981060511
-0.00028401760615108155
-0.00028866249628042265
-0.00028927965997996194
-0.000289185674534717
-0.0002890351643821658
-0.00028887664075063953


0.32961384704879554
0.35431292092238814
0.37906037603090137
0.4038719141765509
0.40169329041835505
0.3971394066761748
0.39837107959577833
0.4229619851354155
0.44751162256686766
0.4427908165596311
0.452918422257427
0.47751936079368074
0.4720069457509976
0.4806826727727734
0.47487959728171875
0.46664312955212267
0.45394898904291103
0.4438355526755444
0.4686242108706119
0.4934135593353506
0.5183255141797635
0.543402359213178
0.536021187651545
0.5596630704704105
0.5514217107731261
0.5420053588105816
0.530974488647967
0.519378922991859
0.5078612150313992
0.4983387928405546
0.5236017774879925
0.5150778288381865
0.505460832756198
0.5057282098457511
0.4970655932321068
0.4872563788861401
0.5198960208617547
0.5038120447883729
0.48656107317106834
0.48126140431762277
0.4650117488874392
0.4478905275467564
0.4318163427108126
0.42961208966361986
0.4134026178536626
0.42689925040723686
0.4105546961699566
0.42460157255527414
0.4081241321365767
0.39148750490787065
0.3764889701670944
0.3798528557870371
0.

0.0672056628464237
0.08975645810519278
0.1123731385713185
0.11402341708127954
0.10326091432503484
0.0907621615651445
0.07835434420801675
0.09437164785355642
0.11634583771112741
0.1385608617468475
0.16088642102455689
0.18334572814652547
0.20597821881345116
0.20940879991520994
0.22502217043197492
0.24599425166018135
0.24555675456265136
0.23829657048779954
0.22777402035610653
0.22721612008131414
0.24943147668749957
0.2718529980020768
0.29451948265896244
0.31749691345051173
0.34086607224176485
0.36471769893738376
0.3891588451314898
0.414302576237987
0.44029194951451356
0.467285212552808
0.4954689469287944
0.5250605627776591
0.5563139107031924
0.5895254029933081
0.6250405365226799
0.6632603066435115
0.7046465011756532
0.5831227118060827
0.6071502160574853
0.6368159754437915
0.6780388724955033
0.7194103865031856
0.7660158245145569
0.8087132668996391
0.8532374800393613
0.9022469315444458
1.0262344748564234
0.8452476555451147
0.8656211682890499
0.9041022632370925
0.951879219945016
1.0151672836

2.22747836495165
2.19591075411348
2.222060314125812
2.447279989649051
2.4836141035720964
2.5280398220567055
2.5448430453409756
2.535800575024504
2.5215277682880584
2.5139519710162017
2.5061981121453734
2.4978965339611787
2.489084017241838
2.4797300080700024
2.4698012242546317
2.459264255551003
2.564278069994076
2.5611163526848704
2.603235317064326
2.6254863620196565
2.60599685461694
2.5858792759897513
  8804/50000: episode: 27, duration: 16.417s, episode steps: 318, steps per second: 19, episode reward: -683.000, mean reward: -2.148 [-1000.000, 1.000], mean action: 1.956 [0.000, 4.000], mean observation: 0.117 [-3.012, 1.180], loss: 85.334480, mae: 112.256561, mean_q: 12.809359
0.0
1.659110602170943e-24
1.7433884733252233e-18
8.716795461275455e-19
-2.0067587107375535e-23
8.71675143459727e-19
0.02384677303862668
5.005105617600369e-07
5.003911268576115e-07
0.38720627787857553
0.43157575137314763
0.47399981306319244
0.5318588274959727
0.19434256378585757
-0.0005055584591308337
0.028326700

0.000283338279779409
-0.00028272357316294665
0.024729076140332533
0.010572706464308409
0.03556844037926441
0.060578680889720585
0.048243195932940665
0.06951854788525005
0.09452270613693575
0.11950888815493965
0.11053077465275167
0.12554172649113995
0.1505266375795683
0.17549279622034009
0.2004580019564374
0.22542034281442852
0.25038244831432394
0.27534769783802404
0.3003203674823891
0.3253058092978612
0.3214573852501444
0.33408703561297237
0.3296418806532545
0.33003925200738654
0.3254680521822604
0.32632589455080935
0.32151961418665603
0.3228792825998483
0.3178462668227869
0.30411418061164763
0.2891568087195374
0.2742754630344791
0.2594491576514733
0.26282774378943463
0.2571731021426187
0.2424563754157425
0.22787679034150432
0.21334887485685097
0.19886977685752005
0.18443823881085128
0.17005223822223567
0.15571689642230105
0.14139813526107642
0.1271487564180098
0.1429806713933154
0.13359671060375003
0.11943218641908103
0.1052948044208132
0.09118235863980917
0.07707604470782202
0.063021

0.06590884229708556
0.05253342870879322
0.03911866148202092
0.025739876253525158
0.012360353359208444
0.00035713200729717634
0.02530591806977978
0.013792776643839643
0.038592900607527555
0.06348639557651264
0.08838955582962787
0.08066539312900659
0.06732872827558364
0.08852014931146673
0.07842223485769467
0.09867462916037761
0.08897788340451537
0.07583996540512837
0.06272165830723456
0.04957231595539628
0.03645294303330745
0.023340249210978124
0.04784427171383649
0.07260283458878061
0.09737661291369283
0.12216046744968766
0.1166764088328908
0.1334429958985698
0.15814295253415903
0.18286973401425535
0.17798044252433892
0.18962443627983966
0.21431920823260975
0.20921307430785774
0.1965935190847976
0.18402993841833154
0.17146576074315406
0.15891615639317574
0.14639137880181774
0.13389448759201944
0.12142870067804844
0.10899508890128094
0.12676120726588141
0.11838053455535916
0.10624260510445045
0.12451445351562614
0.11614101717263273
0.13378714796443184
0.12574213428568712
0.1426048272055

0.21651429126007415
0.20530341250649942
0.1946577844386367
0.21723979077336175
0.2002290490032348
0.18988809800592063
0.21316878581556892
0.1963457258650533
0.22015666848139376
0.2493149221175333
0.2788662471628439
0.2758115918675093
0.2974237734333386
0.31944574015574834
0.34166702400683874
0.36404015606393836
0.3865666157378038
0.4096509372101656
0.43310215063117813
0.45636757560643565
0.4799545550595291
0.5039412683186739
0.5284136532949164
0.553472182603517
0.5792353666471116
0.6579815381113286
0.6843170472813743
0.7120053239231143
0.7412259629522119
0.7722548174330829
0.805394448278263
0.8410488631017898
0.8796970159515347
0.9219145818202424
0.7979901487553849
0.8335171266995207
0.8654147136949583
0.8994404557695147
0.9327159278140353
0.9686070313820568
1.0077856569127683
1.0509018258917808
1.0987371252343299
1.1492526967980012
1.1993943243795768
0.9924183644630934
1.0245815323533047
1.0680533253135138
1.1242552298934625
1.195805067997094
1.2595972027662303
1.3081533362239348
1.12

1.6545003291707996
1.6671908963635282
1.6811366535169212
1.6953652798344152
1.7789172531898954
1.8072197413555784
1.815901048681313
1.8522087948526018
1.894984428838785
1.920718683733516
1.9724190785573026
1.9716026909921174
2.000447494964798
1.9973836196813424
1.9948065484390496
1.9920135804308576
2.02066656013137
2.0496230520433145
2.05851018088382
2.0118466876161
2.0334504849927573
1.9246734148207263
1.9375893097319683
2.0424714710552982
2.0705498520091057
2.0840474039260264
2.0689540356679346
2.0532851802839605
2.0320112302051103
2.058579805515534
2.0429756447152694
2.07009266672371
2.0976617838786606
 10404/50000: episode: 32, duration: 16.470s, episode steps: 320, steps per second: 19, episode reward: -681.000, mean reward: -2.128 [-1000.000, 1.000], mean action: 1.934 [0.000, 4.000], mean observation: 0.112 [-2.796, 1.338], loss: 17.460064, mae: 131.903427, mean_q: 4.399445
0.0
1.659110602170943e-24
1.7433884733252233e-18
0.023846773038626676
0.047689047867454586
0.0715264309725

0.22841511373795495
0.21310578743734776
0.22322884450838007
0.2132022958709737
0.22345225342269223
0.2487487735501024
0.23941040667639
0.24980551098483633
0.2751257906049314
0.30046927075495056
0.32585874783611646
0.351321472131786
0.37687922878065094
0.40255758282342935
0.3941618216408456
0.41943207420717565
0.4449861897598037
0.4707087679566616
0.45875197039466253
0.48318118045565284
0.5088660927616259
0.49594761322273917
0.4817959891244825
0.46751078886302094
0.46553486543588013
0.45231602555997685
0.43806699698613333
0.43900970939119094
0.46518825859657525
0.4914507062553016
0.5179066716832331
0.5446593238645483
0.5717487024676281
0.599267564902234
0.6273170317083542
0.6560199605855752
0.685526528072034
0.7159976292632265
0.7477241160650183
0.753194452463309
0.6787667720164783
0.6861476936878675
0.6774360951036428
0.7019255261117466
0.7263171706533796
0.7313267595310277
0.7346682052416956
0.7023587211050333
0.7526388872479923
0.7489832529185488
0.74543868930867
0.7409531168123137
0

0.16196862427343253
0.18676718795303124
0.211583461581522
0.23642716723796242
0.2613097401763648
0.2578724450453698
0.2506531794301159
0.23785313447647974
0.24366883191959698
0.26841378144368727
0.2931652622471068
0.3179523348485484
0.31348755991687177
0.30518791259058126
0.2928340171769466
0.280506169427285
0.2835541627039758
0.3083068225798637
0.3330833138548619
0.35791941912141323
0.38284309065992445
0.40788135795912844
0.43306601359348623
0.4584346149445525
0.4840317271063594
0.5099104627539988
0.5361343922541503
0.5627799260925687
0.5899393044113537
0.6177243702891994
0.6462713579063275
0.675746997856803
0.6870795618351493
0.732248963964411
0.7567218827857789
0.73489982403536
0.759477844760122
0.7862881105197231
0.8138908023340939
0.8423446690947115
0.8718281745022068
0.8356308908581495
0.8573988362581413
0.8796282575896763
0.9020923840422801
0.924837286533884
0.9485889829309881
0.9711458837103393
0.9939441188164623
1.0179124165544051
1.0379249443246163
1.0640750682196167
1.089869

0.3251992566452032
0.31801781518186883
0.30780181048877214
0.310037540626346
0.30470289514523
0.3068299378930622
0.2976497037046941
0.28808630717015604
0.2799557044213614
0.2814504527627283
0.2728844426605115
0.26386867705902806
0.2544892820563488
0.2486670393840827
0.26938824176963366
0.29097015917196123
0.3130150427737222
0.3355512644863575
0.3586155635910483
0.38234487343611795
0.406853223909221
0.43228461045359334
0.4588042801657054
0.4866015975627739
0.5158933294723883
0.5469271426125278
0.5799849817966821
0.6153857968977616
0.6534867487421728
0.7136730913485947
0.7446555993615771
0.5967970971592917
0.61407470110426
0.6449946103581388
0.6804397011678152
0.7236135942977089
0.7769408493726679
0.84541671549548
0.9061942579744825
0.952030226987039
0.7927906708527418
0.8134260701148165
0.837869650907757
0.942853535524797
1.0415021928693058
1.1075502536239463
1.0604601678880996
1.0284216245022675
1.0530584613967808
1.0776944432208064
1.138707012703252
1.2502181141590425
1.19170052945167

0.6592467266191774
0.6680302361775621
0.6797319107958949
0.6645474466528324
0.6501454014046266
0.6407421116108395
0.6482552564210262
0.6232416222192637
0.6144066249562816
0.6061619843933852
0.5974383544672974
0.6091108394987793
0.5820194477710905
0.5737537638386774
0.5660781414540771
 12013/50000: episode: 37, duration: 17.033s, episode steps: 329, steps per second: 19, episode reward: -672.000, mean reward: -2.043 [-1000.000, 1.000], mean action: 1.848 [0.000, 4.000], mean observation: 0.106 [-2.790, 1.592], loss: 10.117224, mae: 158.256287, mean_q: -23.313795
0.0
0.023846773038626676
0.04768937413978882
0.023833863149022375
7.3027032859891705e-06
7.301284361031064e-06
7.299832010984313e-06
7.298332895747389e-06
7.296787055386069e-06
0.3228137790853261
0.4316096917954162
0.47400026532390815
0.5318515741375957
0.09321944363169948
-0.04139969660700128
-0.01636783835631413
-0.0005887039658515267
0.0244254923112525
0.009993835923774279
0.0002910301028282939
-0.0002808011672090118
0.024767

0.0002958930086131746
0.02531341165833401
0.05030619935493777
0.0753012201018449
0.10029125116478795
0.12527690329849187
0.11840950414363906
0.1324808714792045
0.12317131380825848
0.13685181031281313
0.12774614574778315
0.1410492681586557
0.16601355639049947
0.19095827846796853
0.18553638435662087
0.169981593749004
0.17927728842527918
0.17203096717488367
0.18128468586265886
0.17409566281038286
0.15875741746264665
0.16943204423795458
0.1617296893642804
0.14649846803086605
0.13131137983010602
0.11617421936629882
0.10104675511103861
0.08594972434670793
0.0708641292322869
0.055826804907894316
0.04077023325205555
0.025764256706755403
0.010749797523081793
0.0002796114711683586
-0.0002821680311028263
-0.00028405569458713083
-0.0002835171784691071
-0.0002833910274432279
-0.0002833064652046461
0.02469456734117621
0.049673344402818384
0.07464433050913577
0.06516878570768518
0.05018800842307808
0.03525866060536955
0.058432205012061464
0.04608607707404823
0.031170432001445147
0.01629042690357692
0

0.6430158460656252
0.6558774999255654
0.6501486335813694
0.64331205420212
0.6312342335023609
0.6021200451995882
0.5959168491467056
0.6002449112332077
0.5941324850665078
0.5990107552748453
0.6236719338820095
0.6160979213190032
0.606975794063522
0.5976280136626672
0.5897615508655334
0.580184421168666
0.5689552942583761
0.5939878923390798
0.6190154013849849
0.6098089176385081
0.62960152100925
0.6547465359206641
0.6445854963058548
0.6329409448353052
0.6342705448551978
0.623796109918772
0.6376753229846454
0.6634140332151901
0.6519882005355284
0.6392109507638634
0.6261312693158961
0.6144984736160501
0.6032794658897094
0.595176464911078
0.6213734579920294
0.6094355130325012
0.6369578196101527
0.6160030986065391
0.5978017557899324
0.5810129828585385
0.5643495948200417
0.5478116712401498
0.5314029903166936
0.5151254079594689
0.49897964042131193
0.48296550589357334
0.46708211492249424
0.4643889929226011
0.49064486451797584
0.5172284873716654
0.5403386000688789
0.5676557072887157
0.59649082672786

0.4166577751566156
0.43782860895319253
0.42579837517644703
0.4133568208113855
0.3997952014341667
0.39260456247707853
0.4169303265917603
0.44152511524580124
0.4665563505794523
0.4921272399705381
0.5183650899782123
0.5454163739913358
0.5734509650573655
0.6026670645973554
0.6332970026854118
0.6656140012726276
0.6716666216040069
0.7007168070792474
0.6997617799043336
0.6088118278935029
0.6177482578191013
0.6257595601419472
0.6322883455852907
0.6273435232533154
0.6171970391802561
0.6071415452411923
0.5694423745034447
0.574676951171643
0.5916581718849324
0.6112380258408401
0.6325757515097241
0.6553502160118809
0.6780171009307131
0.7015702520303888
0.7261598773808984
0.7412858323730929
0.7456252725448501
0.7423074182231506
0.7387435345272989
0.7337371087836814
0.726620824846833
0.7197638304161169
0.7130942726830848
0.7066220560100913
0.700264822358136
0.6916649499074516
0.6874110534576189
0.6824513937593829
0.6694466865069493
0.6682034165434197
0.6644342194909569
0.6595380688029564
0.654866293

1.3925308883871788
1.4155061078311262
1.3920460818495357
1.3686075062223448
1.3953365150392698
1.474734231397553
1.5184686601049626
1.5415733631370065
1.508380028211187
1.5069458851253066
1.5466501258595644
1.559607875041429
1.5553090474469442
1.5503018013202372
1.538481431743153
1.5653593497201579
1.592379199944804
1.5769853802266556
1.5471399656997746
1.554098006692604
1.5303986323434478
1.5500556748225798
1.5638289117658786
1.5823240118891304
 13639/50000: episode: 42, duration: 17.066s, episode steps: 327, steps per second: 19, episode reward: -674.000, mean reward: -2.061 [-1000.000, 1.000], mean action: 1.945 [0.000, 4.000], mean observation: 0.137 [-2.797, 1.503], loss: 8.533749, mae: 173.065918, mean_q: -44.080204
0.0
1.659110602170943e-24
1.7433884733252233e-18
8.716795461275455e-19
0.023846773038626676
0.047688884668286066
0.07152594158449213
0.047664004666863005
0.07150041473444979
0.20637866921218578
0.4569821178659155
0.5148772313129925
0.38403393466588903
0.08170369540163

0.042144614538262806
0.02621334598686013
0.01029359998342486
0.03404556287099931
0.05899568700283561
0.046901016159972955
0.030999600042084345
0.015110475748553731
0.00029791017231817034
-0.0002804940223234331
0.02468368770448502
0.010399207932217639
0.0002760943382190111
-0.0002818251355327437
-0.00027951616554563324
0.024689906317775373
0.010415938806940193
0.00027745034569447505
-0.0002810206976059865
-0.000278877585848169
-0.00027881934340056463
-0.0002787918703581058
-0.00027876590231678975
0.024697392843982438
0.049683029933960746
0.07467264013618995
0.09964429012756376
0.12460699388713443
0.11677761298824751
0.10096608764790152
0.11462368409641606
0.10614536039353197
0.11926929861610362
0.14420391067309213
0.13763338707898484
0.14735979068306287
0.14089681012938754
0.15034981704522452
0.17525849871225194
0.2001528048898399
0.22501198787914736
0.24985609279068058
0.2746791120510198
0.29948123594812753
0.32426318372167473
0.3490262692914824
0.37377250439630605
0.39850471762008277


0.5574003186592476
0.5525492904823563
0.5466845262451617
0.5336746491406872
0.5188068989312667
0.5040598171612839
0.4847076149077891
0.4794395870252844
0.4714565117405724
0.45731072102678055
0.443211213619942
0.4293477903868857
0.4178752513071769
0.4425613005373818
0.46721439946439985
0.46138036979861524
0.4543635568197238
0.44108267797515166
0.4309198378217938
0.45570677591345055
0.4804796159592982
0.5053517388526564
0.4982846397773648
0.4898818141951771
0.4791413219084917
0.46907753430205035
0.49408342726069127
0.519108409015525
0.5442900821422426
0.5357464435340855
0.5258967802803187
0.5155516965291812
0.5066724358500023
0.5320464491615142
0.5574857544150013
0.5477496565405011
0.5704660544196188
0.5602554246634134
0.5489515321111827
0.5480065279999075
0.5377805837468833
0.5528405106358919
0.5421981502821461
0.5580610177711383
0.6327943022224052
0.6581351020950038
0.6387464477425024
0.6623581668609719
0.6881403356691769
0.7145267961517883
0.7415436891105635
0.7693190674745335
0.79801

2.562806083453941
2.5856307718843192
2.6100980452573705
2.6426708510849233
2.664132441070723
2.694774281404999
2.7121572534597838
2.713819483778618
2.7163721577269553
2.7206082511648284
2.755609123572085
2.7578904926888725
2.76233587958611
2.7748398435861157
2.757525604148036
2.7425028849392805
2.7322666398577513
2.713264050630969
2.691859441990195
2.876660770096046
2.6711510202662647
2.6662029842903783
2.617319842899293
2.6170044687661607
2.6115796789351347
2.5726946432920514
2.5681372502749134
2.5596324639466252
2.548591277129104
2.5198738776521683
2.5102484651053225
2.4985222548635337
2.4763927287697456
2.6550009686400204
2.6451802523392804
2.6331023271674168
2.62123763060093
2.6262891418046377
2.6364708181584295
2.6382999079887983
2.642639122186005
2.435407236213167
2.4410971706015925
2.4409417989291464
2.5091861880436275
2.5065165839253423
2.512561375406343
2.7259979397045764
2.710632353805763
2.691531431521272
2.669916159573067
2.6467028106843387
2.6200587169010783
2.591547349715

3.6153515856127156
3.637564518652471
3.6597826286827195
3.6997352596981012
3.6725504889693887
3.646449907119374
3.6195389883824576
3.5929351438913524
3.566680912261337
3.5890137424693584
3.6116494265431442
3.6346340517838485
 15282/50000: episode: 47, duration: 17.049s, episode steps: 329, steps per second: 19, episode reward: -672.000, mean reward: -2.043 [-1000.000, 1.000], mean action: 2.416 [0.000, 4.000], mean observation: 0.220 [-2.746, 1.981], loss: 8.462330, mae: 186.587128, mean_q: -70.765991
0.0
0.023846773038626676
0.04768937413978882
0.07152740919834974
0.047666938395340794
0.023810779905541986
2.2852031718721688e-05
2.284732544919509e-05
2.2842486202529015e-05
0.21561587573622928
0.4316247605685487
0.4739726865114019
0.5161895579576503
-0.0007786437500365562
-0.01625671629188782
0.00879846954565833
0.00028781834213589645
-0.000281833535562025
0.02474504504505327
0.04977888362770101
0.03714476804170907
0.058549331588934946
0.04600226892519031
0.030069399008057995
0.01415559

0.8300240680331709
0.7531450855543634
0.7775771980054065
0.8018652595391247
0.8261256744050326
0.8312368112066053
0.8296745124652839
0.8333549574988778
0.8351237668565892
0.8923295819577741
0.8742548443688981
0.8549201303274306
0.804388527264239
0.8001484149574573
0.7946269035673681
0.7800452387665288
0.7619390285301146
0.7440572811332717
0.7263956303777026
0.6798351586024381
0.6759981471929987
0.670871271296539
0.6405690703344029
0.6648195903825289
0.6891082029954384
0.7136694450868092
0.7082673853352979
0.7189588466888889
0.7428740416603408
0.7355191808102487
0.7436525218735437
0.7683529476868474
0.7929833011574725
0.7841609664584908
0.8000587502397811
0.8249013430709792
0.8498539060448478
0.8751158440306251
0.9006808136047171
0.9266551832026437
0.9531366831976014
0.9802244818598461
1.0081110485464277
1.0369441353271145
1.066957910359339
1.0984131602891076
1.1317174421140477
1.069007744087689
1.0931754654566457
1.1175541500400408
1.1421422366487282
1.1670077383389046
1.28707254833450

1.8513606777838028
1.8379061195553175
1.8179826291853045
1.796392336189635
1.7926504400953978
1.8158350983308613
1.8417387397405929
1.7257086019910277
1.7452391603272543
1.7620961708174199
1.7822210236876825
1.795963162113691
1.9479772432599225
1.9437586783575413
1.9319690367670856
1.9173733996311397
1.9008331996478798
1.8817035193709595
1.8488529716421107
1.8355010198080928
1.8183471161347238
1.7863173392496239
1.7739196750573145
1.7680781331370956
1.7459198355275165
1.7366927222180777
1.726950938378905
1.7114779632323291
1.7073360032131764
1.7330663352474236
1.7206855147297255
1.7440949262030407
1.9168754927737541
1.789596697351766
1.807211492996145
1.825373946703826
1.8441965198912709
1.8638208023383611
1.8856886680521916
1.9095970523398742
1.9333003697205273
1.9566644143329144
1.9822134157327724
2.013253672734947
2.047811529296958
2.086769827157713
2.131227260153148
1.9674186744639943
1.9905711779935982
2.0160294291947256
2.043931146234117
2.2618386805882453
2.2907124168671675
2.32

2.598296529777612
2.568686744561198
2.5421065593128183
2.515545541418036
2.504529346625431
2.464598946917422
2.4259877398112963
2.4292873942596827
2.4245556118388243
2.5713020142826912
2.5679408028078843
2.5620959441567996
2.526798858183993
2.5156435940562747
2.498122304576304
2.4788445314649317
2.4483301337288883
2.414042799571905
2.4113414732743967
2.410353055055211
2.3826700697569874
2.356359095068213
2.3606844258236834
2.359777292474992
2.5140268529614733
2.515002204880327
2.5089904615234895
2.5004282602867893
2.486097085384957
2.4719657951874052
2.460939961519858
2.4442849728688345
2.436333375158469
2.4054233267899336
2.3966327955181113
2.3910645514070183
2.3882342343719265
2.389126002662597
2.544138243831043
2.5522689176776177
2.5630940562810216
2.6017978867793006
2.595999042757153
2.584790450767137
2.59285744031092
2.49600295839735
2.4716848439390304
2.4714553028112705
2.458964798899357
2.48481389192996
2.496030925056045
2.5025932033480567
2.4940530046226566
2.6000918801021196
2

0.17421973371453892
0.13686321751085823
0.15230983255299463
0.17746621385884143
0.2026230289868196
0.2278013111567193
0.2530110920911834
0.27826369328061856
0.3035723397721626
0.2946370860960542
0.31805592995596726
0.30831806327549915
0.29375190761683234
0.27832364788859587
0.2834725447825273
0.30878169328920907
0.3341046264632763
0.3239697565282913
0.3389918396141306
0.3643673342123724
0.35365074823843423
0.36650992417629225
0.39196594676032387
0.4174533079665554
0.4430374203429061
0.4687249174949899
0.4571899502176237
0.44398114342340766
0.4302524033255895
0.42878920479029337
0.41620870489276646
0.4293488494080555
0.41660480572164243
0.4303007593115482
0.41730788344783243
0.4032111760143841
0.40588621942175906
0.43198959637131823
0.4581853071381328
0.484553329988416
0.4709890531493334
0.4970700364691098
0.48261846818817333
0.4671487795777836
0.4527017296375944
0.45627471509410367
0.4830135772356788
0.4682041332826235
0.4524177083126723
0.4644909111962808
0.49153333624834117
0.4762787

0.7923118947281452
0.8165706872541885
0.840979681007844
0.8347075056231624
0.8257865953521901
0.8208974590212648
0.8133800263217098
0.8208662667332628
0.8457852093702363
0.8706644018739926
0.8958873547938979
0.9214276110600743
0.9137615847075393
0.9011310846779528
0.9157583600591738
0.9039362502910824
0.889732390389552
0.8750031180802685
0.8673351885852308
0.8550124884264035
0.8707314799303509
0.8974096595239548
1.0003840749029562
1.0264731681298433
1.0534789005238543
1.0814612319449515
1.110634054664574
1.0677321469198924
1.0899908257098876
1.1123961023008662
1.1155061747547863
1.1155429803307138
1.137910639462982
1.1377377148533379
1.135114261749659
1.1305170348986535
1.085978427729347
1.0827922812618598
1.0664556696378138
1.0900141850528522
1.1133765876964892
1.1369965244350115
1.1609524111815914
1.1853296947438778
1.2102321576533313
1.235786652880482
1.2621490297019475
1.289511771969719
1.3181139059066223
1.3482538980818695
1.3803064679799422
1.4147203418819012
1.4521581070538232
1

2.2280936543076666
2.2540699653070773
2.4893668708501018
2.4975730280008395
2.5335143739797235
2.5584204611732604
2.5924907520677314
2.4303804827981614
2.4408760634620537
2.4623641727162293
2.4723956688712536
2.4898449785374615
2.510497742851451
2.522565576127401
2.5176202029863446
2.5307779957430836
2.722456611508567
2.700014726450459
2.6983439063107486
2.71374707254296
2.5296582101251395
2.53931796484863
2.549509337980372
2.560738866968015
2.5656559841976283
2.5942377384011435
2.6070405807998545
2.6076526875704875
2.585734528277454
2.5706051420335605
2.767572860740482
2.7675203814721945
2.775765259896513
2.768541920746177
2.4926161264715088
2.4981246274896085
2.504536220491447
2.5124704442549533
2.522618799747734
2.564971250959827
2.564856441087778
2.5623258935751565
2.5598435310511976
2.5510998790000308
2.751804208041959
2.7263711112658755
2.697984751285864
2.6940966491720637
2.4612506620740757
2.4468234327737006
2.452572876463642
2.446047290926697
2.4489940488080184
2.4549190692049

3.172212601410691
3.1753389231196705
3.1515934943844637
3.1575282832798415
3.1341399809523716
3.2516422344113276
3.264482157854121
3.2677657438996595
3.236384495245227
3.244503921131902
3.239695599811272
3.251159580995687
3.174569329938807
3.179818997315825
3.189057400263526
3.163891093632676
3.1666602016987553
3.260482019063356
3.282811006155174
3.305140280469378
3.288810034171527
3.312610226869736
3.3356819416625463
3.3190130130795223
3.2336591525790324
3.2092821056997285
3.2309390001846117
3.3449245014587143
3.313867961134251
3.287169010608613
 18231/50000: episode: 56, duration: 16.962s, episode steps: 327, steps per second: 19, episode reward: -674.000, mean reward: -2.061 [-1000.000, 1.000], mean action: 2.254 [0.000, 4.000], mean observation: 0.191 [-2.792, 1.642], loss: 11.761954, mae: 201.130020, mean_q: -109.659416
0.0
0.023846773038626676
1.2691276814608132e-06
1.2688863603250014e-06
0.02384804168279162
0.04769015305908522
0.02383366367158412
0.04767545173634598
0.0238183132

0.4791242203942058
0.5079798233392706
0.48875595715502645
0.4404598800273997
0.4450067848588369
0.46990754167222226
0.49478467001418963
0.5003304331684779
0.4938367151603426
0.49878073487975083
0.49249091122123423
0.4968153282760279
0.49155175581300115
0.5164094898041004
0.5196713164805357
0.5190586339247644
0.5068847845729807
0.4829369296647377
0.507837245616175
0.5101928818832561
0.5476632275698627
0.5717453067585523
0.5959177752292
0.6202069064242711
0.6144189020975444
0.6067013213233604
0.5859464939308573
0.6103595778173246
0.6346765090012213
0.6590588757752802
0.683527553536922
0.7081071818383128
0.7328275665574445
0.7577247805137066
0.7828425269035572
0.8082338436707569
0.833943754104821
0.8596476551680914
0.8694983112997026
0.893605640432991
0.8792701493940457
0.8626806781621856
0.8683207939168974
0.8942108959799376
0.8786237087386144
0.8609192668164214
0.870905490946165
0.8974347504939869
0.9242326586905385
0.9516387091072928
0.9411640821690214
0.967851959466558
0.9958026314239

1.2279269980292908
1.205332760831008
1.1239834878795727
1.1295767883464032
1.117376349653081
1.1210857710896656
1.1218570215289814
1.0800579347589845
1.102564480517575
1.125192945565955
1.1479890796284056
1.1710036745144998
1.1942977138880628
1.2179447352621322
1.2407151455541658
1.2648412088576564
1.2769738178284702
1.299240776069052
1.2914074705730707
1.2799977966086948
1.2815047781524134
1.2723311373629314
1.2592637196014786
1.2450900030080922
1.323052490278899
1.3112394871263198
1.3223755770438241
1.3031202525993457
1.2806498502330141
1.2572837525178504
1.2334310767516456
1.231662445017116
1.2103954936210835
1.186933759648583
1.1632357897618253
1.1395372111721394
1.1429576504073005
1.1209280043560648
1.0978425244839494
1.0748709787196644
1.080996725439858
1.0661982557455834
0.968661384507573
0.9902235083628339
1.011187176882009
1.0334653316552325
1.055895334767726
1.0785337019413663
1.1014426672788409
1.124695288968339
1.1483784541565032
1.1741977464936966
1.1994763957594516
1.2254

2.042241030492068
2.1889066394192063
2.1615960099212868
2.1331514063083215
2.1327916473110258
1.9426303569290622
1.9517801096306697
1.925736781099517
1.9370294628020437
1.9364287241671696
1.9331491074986111
1.9280797496256634
1.922138336967485
1.913846144690546
1.8978659829442412
1.8835544815312917
1.8722827735733396
1.8614520752990928
1.8510650302019351
1.991196167959822
1.973927348716085
1.9571957627686085
1.9411431577187224
1.925750874185577
1.8947202291929859
1.867151426002154
1.884849658519453
1.8575560517728935
1.848404338153459
1.8233332250666783
1.82039480066507
1.8000908770489075
1.7975804576062355
1.7913552322169366
1.784808233512541
1.7772854976625179
1.7647170003608883
1.7601564010161121
1.8931393685279028
1.8799931111356118
1.8655838554385886
1.8521644672650652
1.8434053891217097
1.8355839491826196
1.8233893769895737
1.8048098481939212
1.7985601213898064
1.7970670556222248
1.7880661625582648
1.775590320961885
1.7627695664365397
1.7501668626000508
1.7443393272510792
1.74725

0.0
1.659110602170943e-24
0.02384677303862668
1.1154897511661543e-06
1.115268823866204e-06
0.023847888086380144
0.04768983631244755
0.02383302079309367
0.047674645985534794
0.30891344767400697
0.4716113460479876
0.5294847577826589
0.4379293009935847
0.051990011000719094
0.07707572916902579
0.06538572103961486
0.08513525381528084
0.11015708393667414
0.1351637520192982
0.12679607947918148
0.140495313362826
0.16550117787268462
0.19049215222591404
0.2154844889933491
0.24047711056583856
0.26547340987850504
0.2904776464464729
0.28654044834467235
0.27510290904392587
0.2595519917517715
0.26146920789230804
0.28648722642952673
0.2819637398316512
0.2846748996501858
0.28007346935043076
0.28109177708435035
0.30618166889471404
0.33118789240447116
0.32583653094675336
0.31411210535223766
0.2991653580825158
0.2843054869264852
0.2856845486669124
0.2802232613802005
0.2831187432720574
0.308284107472894
0.302355418925078
0.3085919846555895
0.3337947259588767
0.35891321813359
0.38408988073169753
0.377302941

0.6105147401113403
0.6350966071177597
0.6597601402880523
0.6845304073912356
0.7094377028209787
0.7341947333245397
0.7598282031557896
0.7854857006284732
0.7873667397757547
0.8118024386835171
0.8370839256634998
0.8626501774252566
0.8507015000026772
0.875634453391703
0.9014270976605455
0.9276665055816634
0.9544506870324686
0.9819070440103167
1.0101937178384375
1.0395062511446385
1.0700580926139245
1.075136854724551
0.9679674108907401
0.9916505675423354
0.9996888434351578
0.9931180659024227
1.0970342676063336
1.0960957468069075
1.0881356513540974
1.085559813820785
1.0803353963691034
1.0762259301048207
1.0696641373261744
1.0613967572025427
1.0195009047907453
1.012950361280248
0.999188173747058
1.0231124121337212
1.0164824285031175
1.0063571344771909
0.995245652024497
0.983673208387548
0.9547842612067386
0.9453316472797388
0.9265222748384667
0.9511646003710595
0.9418767311311407
0.9561791427021815
0.9459224394082074
0.9331254314946447
0.9275848937494895
0.952975143183512
0.9414430031125011
0

1.934759447946239
1.9179496556798539
1.9009453818645379
1.8779273670222159
1.8575539483686225
1.8431156674377136
1.8625672722595712
1.8857953289872298
2.0776533176557077
2.099900083320641
2.1252263563356943
2.1536694234429237
2.0264211087939827
2.042987584891434
2.0604221906847195
2.0788776515517156
2.098564953744811
2.119744267609044
2.1427372021114333
2.167942986033253
2.1958590290945237
2.2310558996153818
2.2594366008048965
2.303317653473056
2.149658474963495
2.365879464379259
2.3753935472649315
2.3827323259435484
2.3883801360026005
2.402482535747953
2.420751705612897
2.442428929844645
2.467618076236345
2.499616634203567
2.5329632959837856
2.3553119172743413
2.3684991347155195
2.381111057350864
2.3868406650820897
2.3960692421207317
2.58800342995173
2.5740685129153826
2.5559106062782706
2.5272540848504597
2.509693594840745
2.5213023530573393
2.5375941803005326
2.5339719417922506
2.5167393208696307
2.5232917414441878
2.5097686081238755
2.2588455395480187
2.2718609952069713
2.268921938

2.4607544093995353
2.454446912269165
2.461493533246499
2.457139829117518
2.4558850150046956
2.468191237123511
2.4706468981955356
2.6624355257871457
2.678531302880705
2.6933625380375266
2.7122208549736655
2.714937626851757
2.5968500824334866
2.5719566978727504
2.5947831271997583
2.619421160688475
2.6341435092362295
2.6572569751318103
2.6567437855883713
2.6530158800139017
2.6782578117481997
2.8154955852901624
2.7735963149833123
2.779294151139758
2.798983775894388
2.77401180482354
2.7490696454965375
2.724156962260099
2.7600134016383264
2.756944883757939
2.781353195769511
2.778472111096403
2.7588756076264414
2.7919403194131833
2.8797505194320596
2.908405594471732
2.9240254049382135
2.9482085561157865
2.9725894577831555
2.9741163546054987
2.9724813003312858
2.995424654570792
3.018375847877375
 21178/50000: episode: 65, duration: 17.031s, episode steps: 328, steps per second: 19, episode reward: -673.000, mean reward: -2.052 [-1000.000, 1.000], mean action: 2.308 [0.000, 4.000], mean observa

0.44418768400106984
0.47110103947397675
0.4541740124759543
0.4792990809608448
0.5063647771993541
0.5336955763031498
0.5613882913148044
0.5895330390567671
0.6182220207390597
0.6476638269478979
0.677966531536744
0.709358654049148
0.6504633364340917
0.6752298460079225
0.6999044131669844
0.704830735355279
0.7014072842359638
0.7051623129911944
0.7008495809049686
0.7035506541368478
0.7008144008985978
0.7254781818865782
0.7500846728867554
0.7747298203951698
0.7994219683142494
0.8241859759225648
0.9227017116394363
0.9139825223244126
0.9268906521459367
0.9163235120477583
0.9207594919410037
0.9445730043087643
0.9685901629919834
0.9565212758883043
0.974412677418015
0.9606618349819805
0.9712964946618774
0.9958215273968235
1.0208676956687466
1.0076089730452105
0.9901910817690383
0.9718079023808625
0.9529527877763408
0.9341006120606954
0.9175616395508921
0.9076163599556194
0.9208047787336072
0.9470601831484489
0.9309420470261331
0.955764842764219
0.9385824105027467
0.9196631567603124
0.9303759954247

1.136898131928202
1.1620614670009288
1.1879693720161357
1.2148694230185908
1.2417915236630426
1.2707554933994236
1.3012482652278006
1.3335746440299117
1.2632069941582669
1.2860660415348493
1.3092689834141449
1.4525530694352495
1.4736413175123353
1.495249044595829
1.5173805276029406
1.5401054282061002
1.563592682668599
1.588002371660527
1.5980383055199552
1.6205264668806267
1.6099307963074403
1.6329227101926835
1.6571208722165776
1.6825894917133613
1.7095533874898616
1.7361980385904923
1.7708316635118535
1.6228349766695451
1.6323137286643594
1.6230199470058415
1.644388173008492
1.6497883881341842
1.6531317616475847
1.8173574983887342
1.8366669902145742
1.8567715229778408
1.8778393539296543
1.8706990906265046
1.8915718495701361
1.913695239828149
1.9372708625719024
1.962611583284577
1.9900973871609224
2.025106014198118
1.9210927194587535
1.9366173509350546
1.9450203081809927
1.9433066666569736
1.9486134817197645
1.9485576623270475
1.914772283063769
1.9160611247902228
2.073541955551123
2.0

1.7734609818374414
1.796869699277675
1.822647751353374
1.8137511216045956
1.7970913202924035
1.8140835324461895
1.635655716083998
1.6548427367680008
1.8215712972726694
1.8379532479395737
1.8561310548821737
1.8758695924615045
1.8972799614625506
1.9205949285057464
1.9302824780568193
1.9358158630641213
1.9243217150946024
1.9255488375061731
1.9096804112414973
1.8942186657191316
1.8912567845261292
1.9135585253559522
1.9401596434526287
1.9698723944625443
1.8324214828118108
1.8468322718602574
1.8557339893702791
1.973238847511322
1.9804617089806096
1.9964033664202745
1.9973618846299135
1.9929925394187877
1.9850816585964146
1.973241339712947
1.956797894933555
1.9448968436818537
1.9175855625012352
1.9227300838481327
1.9428602125934544
1.9280970647520803
1.9153932652074699
1.904052941617618
1.8897063926358069
1.8923779709692525
1.8969051828856203
2.0558557291187145
2.0736681252504403
1.8777876437583334
1.8796570904012793
1.8834514354307899
1.8553898121441095
1.8482450605850986
1.858973432007241
1

2.0974814204500243
2.1712992026970572
2.138058026039626
2.1480264213875966
2.149040858870471
2.146301624911548
2.121232862988659
2.0952497063476407
 22818/50000: episode: 70, duration: 16.986s, episode steps: 327, steps per second: 19, episode reward: -674.000, mean reward: -2.061 [-1000.000, 1.000], mean action: 2.156 [0.000, 4.000], mean observation: 0.160 [-2.794, 1.524], loss: 8.382233, mae: 211.384766, mean_q: -152.578171
0.0
1.659110602170943e-24
1.7433884733252233e-18
8.716795461275455e-19
-2.0067587107375535e-23
8.71675143459727e-19
0.02384677303862668
5.005105617600369e-07
0.023847273437140926
0.37493175797895545
0.45702868114874473
0.485839298874075
0.516197003590671
0.06237185469620571
-0.07181076494773875
-0.04676311517185315
-0.021713818022069037
0.0033064675697992464
-0.00028917920759257863
-0.0002823076946954208
0.024734682245609232
0.04976083806101743
0.07479188307513826
0.09982374204086188
0.12483968673386248
0.14985505402020857
0.17484629830958498
0.19981976128504475


0.43131744616446793
0.4366407374387908
0.46345090566000846
0.49038025851962835
0.5175890510852647
0.5451588390093169
0.5731831169175572
0.6017737403056324
0.6310444956989033
0.661216823916768
0.692441553773884
0.7249839664526028
0.6792759866828545
0.7047314538029926
0.7923945328449112
0.8169578133599363
0.8416879538338472
0.8661850400289361
0.8908230790103842
0.9161496703417278
0.941780616048062
0.96779692642722
0.9942979764202158
1.0081223445583813
1.0014633347700819
1.0258782337323622
1.0125593576016396
0.9974478926837833
0.9979875251025595
1.0229873174704953
1.0483435511441914
1.0741965447973563
1.1006699310863737
1.1279112882066546
1.1575181505195262
1.1875142501662426
1.2187389121947398
1.165222848560948
1.1882887160820053
1.1964179190289483
1.207667115787958
1.2305509152927663
1.2534219160084696
1.2764459610349186
1.2996780004026427
1.3009335638936286
1.3163389279686224
1.314512067102686
1.4219338204924674
1.443907830063092
1.4660979265000975
1.4888825182979866
1.4772135836986413

1.4368154266857514
1.4325629188894609
1.3980306289710842
1.4195004071269197
1.4413711125363757
1.46374395525771
1.4867364179831557
1.511225008995771
1.5167149518797354
1.5027442113362837
1.4891579326283246
1.4739792568728791
1.4577709538916555
1.4410312868705202
1.4241922898993193
1.4142942849096405
1.4061825450818906
1.4232548281684543
1.5692944333023278
1.5932089748239315
1.6187872579023246
1.6460861477144586
1.6333298101469165
1.4594158701029345
1.4794816639252544
1.4928188844242853
1.4937708891102535
1.491723578694511
1.4878365156631108
1.4818074042497777
1.4650319342003975
1.4465688623945645
1.4301220325508364
1.3872864044366697
1.3477047704524776
1.3483346837153858
1.3461957978948995
1.3121116013728522
1.3299030211066258
1.3286391982269445
1.3336755596831524
1.3562974605912075
1.3795653689333212
1.5255078027943498
1.5489144503520411
1.540585851320244
1.5606370761578434
1.5449144456878487
1.5257089792046208
1.528821479158183
1.5539757062092983
1.5803251262152977
1.5645021200856744

2.958426429829383
2.7555053217435037
2.755904053835157
2.7614583742881487
2.763011356538867
2.769897746082787
2.7673112176142936
2.8208684924957943
3.044128141762433
3.0312624417106755
3.033258559189679
3.050326447852344
2.8567984106904274
2.8617310149385684
2.8700169798130015
2.8793378999335886
2.8663716082514195
2.8735620324212534
2.871990551927862
2.8708965060492893
2.870062889390781
3.1407257617367677
3.128624274857886
3.072240298383896
3.013572367353683
2.978833195594962
2.9821493359370566
2.944403434802634
2.9134050022709275
2.8865401449591905
2.8918116882031497
2.852031756857077
2.8458298107823743
3.00507214694601
3.011573128732043
2.950470755781726
2.938029592484806
2.928696617546717
2.903324684339158
2.8717776898188196
2.8530827895520585
2.87548236833294
2.8578021355361622
2.8256364909575438
2.811677300497342
2.9857106009668923
2.954882793161323
2.9591711677014745
2.9626143678190773
2.933261923494899
2.9187854825239805
2.9231080686583946
2.9290981943568357
2.910211352997171
2.

0.14294177763211832
0.11906213343386765
0.0951873387050095
0.3178483717093235
0.16715854140370814
0.1925619994840636
0.21809922471337737
0.2054664104043532
0.19020956022366814
0.20475333600128826
0.23029838455012527
0.25594377097192866
0.28163095497040747
0.26847709751756293
0.2540774186964582
0.23855146601032065
0.22315914348786403
0.20781894782713844
0.19253194957949862
0.2078865942154517
0.19415808302786444
0.2095358596604056
0.23530269465191317
0.26116101189280233
0.287088971417975
0.3131396049584184
0.33934881093963065
0.3657583268521269
0.3924171900531599
0.4193696776853391
0.42464260430926193
0.45048465884899425
0.4770317485327162
0.5038180512471103
0.4892549666506117
0.47139831266562465
0.4533671534482912
0.4613921620872642
0.48843760501044997
0.5156431942229275
0.5431862763039107
0.5305765377255184
0.5115863183749675
0.4924465257985991
0.5053146691789129
0.48696020529229134
0.4677131999136881
0.45191107011212717
0.4623968628466121
0.49053722255828386
0.51890124094011
0.5477201

0.44023357930252144
0.465927573269873
0.49178928405359396
0.5178592506793092
0.5441589784429248
0.5708540570081968
0.6482649056414538
0.653597361082012
0.6338339167212838
0.6144197875905673
0.5952462961742845
0.5841823035513289
0.6098639550022181
0.6356878965683466
0.6172147647503056
0.5973189949555253
0.5771200751727615
0.5586143464442158
0.5421284846987771
0.5257143971273439
0.5094330476682699
0.4933132698548289
0.4773089654822242
0.4780790899974246
0.45974786542787743
0.44047876643597994
0.44658741928847623
0.42813617783212576
0.40894213514911554
0.39388931077446826
0.4019938608720448
0.4289183984249487
0.4560349085192715
0.43708995527198163
0.46404033823390844
0.49149850553985464
0.5190577100232635
0.5474790217727769
0.5396899760702362
0.5185215517387467
0.49759906923472935
0.4794082499840115
0.4655884174821983
0.47592396504159235
0.5045047942366374
0.5333933980969814
0.5158848872295586
0.4987239770765078
0.5226082399277542
0.5464260748514004
0.5702249801146808
0.5940095701554376
0

1.248737705520076
1.2709873930340911
1.2936664355697547
1.3168798247579354
1.4585046782494406
1.480060420797726
1.5029980808070944
1.5272641495525259
1.553052050033603
1.5806437919009213
1.6104081519346976
1.642795621330368
1.678441711830895
1.7180487618507128
1.5701457204059668
1.5812141703917955
1.5860525380628896
1.5839004681974487
1.5457566517744978
1.547499378430249
1.5264592397865757
1.5480277623812446
1.5702498045020048
1.5934068538009474
1.6177016092042598
1.643377397869665
1.6707365763261979
1.822215154041776
1.8433326398526864
1.8677878589381283
1.8951855829894682
1.925697149099559
1.8062126609825044
1.822361623389018
1.840890468477631
1.854339856850929
1.8574819446891295
1.8545994295620594
1.8705764196006134
1.8693992295110438
1.8613417549235005
1.847744898781182
1.869167243943181
1.8634359608377153
1.8826570587173985
1.90766271302118
2.0445592364362737
2.0233702697079994
2.0003808968893506
1.9759405498597165
1.9505544302452882
1.8469861976095054
1.8498734797169787
1.8512938

2.9072677618985128
2.7761991726001436
2.7485389647267917
2.7688996252163287
2.753991011928572
2.920268085304356
2.9193928039366117
2.933475770519166
2.9359457302134917
2.973780485973635
2.9724692036223983
2.9951438322698802
3.017816295383488
2.8828499898313495
2.90385038244723
2.9269156907373053
2.9499839554013283
2.973055392880386
3.0103386453660512
3.1819776877550217
3.132164487083086
3.114055485215298
3.0933282047823663
3.0722670678063073
3.0510426182915165
3.074642834140565
3.0987304395999473
3.12322110502003
3.2773951833443506
3.2699479730122345
3.2951594100584067
3.273549839365413
3.296553884065161
3.3199385086648103
3.290040183677769
3.215900218776323
3.234856460351093
3.209919578719661
 25768/50000: episode: 79, duration: 16.949s, episode steps: 326, steps per second: 19, episode reward: -675.000, mean reward: -2.071 [-1000.000, 1.000], mean action: 2.153 [0.000, 4.000], mean observation: 0.182 [-2.732, 1.501], loss: 5.437618, mae: 212.632233, mean_q: -168.874832
0.0
0.02384677

0.14053073986342415
0.124872032011927
0.10927670257882661
0.09368956782698937
0.07811927123257112
0.06260301511686162
0.08314272237030604
0.07118209808563498
0.05569994027779193
0.040226211942566446
0.024760328626088388
0.049091799423241836
0.07415123758411504
0.06255755127391134
0.04714099962544338
0.03173079537253201
0.016341571505908144
0.04137186090622531
0.027783656204590244
0.05187440657541524
0.07693034750006911
0.10197350654109417
0.12702128507838917
0.15207162522565387
0.177128090300488
0.2021949569036194
0.2272773813439542
0.252381600414589
0.2775151640339868
0.30268720782866393
0.32790877555365444
0.3531932037355885
0.34851661959509844
0.34083837283613844
0.3278980805362777
0.3270455733484444
0.31982748938876127
0.3064603890974874
0.2918589247131523
0.29454896386204515
0.28722205868289064
0.29280916926988615
0.2854543764901755
0.2715572130174892
0.27667150604065205
0.30191403008622236
0.2940768759212357
0.3039710235032979
0.2961096819890051
0.3032032307875636
0.3285742540725

0.4233355557099393
0.4513928057104398
0.4797483782602427
0.5085809232788325
0.5380205424994382
0.5240203523847466
0.5290327850821127
0.5314772976672876
0.5555082602436561
0.5587949625394151
0.5584551518811298
0.5231219298073009
0.5257810367061365
0.518480410257375
0.5425500178861585
0.5665741760441808
0.5906013673042544
0.5912269180196267
0.5906162556125724
0.5791698444810693
0.5469235798336222
0.5711530478165497
0.5708480734962343
0.5687454790446472
0.542171349310571
0.5664471871811417
0.5649175752916156
0.5678885497543245
0.5659495168012956
0.5623314944143926
0.5385277147242427
0.5629091530938248
0.5871828276150971
0.5838201987141362
0.5944512066344372
0.618802450825502
0.6432137766292951
0.6677763794917375
0.6925389078501769
0.6869532750325447
0.7097898539742626
0.7029845545493403
0.7141543665082256
0.7390732080637914
0.7641697091755073
0.8077714259160489
0.8297276540421892
0.8541402534424857
0.8790615938444862
0.9045317706257715
0.9306584128234093
0.9575639981896619
0.9562512182605

1.907228105293666
1.934178084807826
1.9636319416335648
1.975879461194084
1.9792853214713986
2.126000147023324
2.1021434411974678
1.9277776676655676
1.9436503532328042
1.960490914547028
1.978463993927478
1.9977731982701334
2.0186639183166823
2.0414328619401845
2.0562755206145473
2.0610608765248375
2.074576051043231
2.0960479442994284
2.1206784373423506
2.1353735346456233
2.1501964723151707
2.1397156078878945
2.1201679817566523
2.1274094908780112
2.1287422196275454
2.0915566431185963
2.087821236978802
2.0850992024176778
2.078989943777979
2.070635982023413
2.061008112874124
2.050582433575128
2.036429000830971
2.0228147542670545
2.0097003728630014
1.9970855426250573
1.9776807014210305
1.9539697487244654
1.9361922385698684
1.933806277274882
2.095299768798385
2.0910393224235886
2.0798100564444493
2.042690843856944
2.0654684771567187
2.0507445666312836
2.0512123226396124
2.0698347940770074
2.0546891269307883
1.8952411680715417
1.9058850472949835
1.9202530540126663
1.9295667236375982
1.9346250

2.7622953496852056
2.7475643286670035
2.786401782184013
2.670982066353444
2.694695919587041
2.67071588271956
2.7603308686725554
2.751901985333465
2.777355780520119
2.7932899222232876
2.805186661789746
2.8285469452168672
2.8075918027027664
2.8342816475352954
2.85947140503436
2.8849836758693534
2.8687343470909092
 27407/50000: episode: 84, duration: 16.830s, episode steps: 326, steps per second: 19, episode reward: -675.000, mean reward: -2.071 [-1000.000, 1.000], mean action: 2.224 [0.000, 4.000], mean observation: 0.163 [-2.787, 1.446], loss: 4.349227, mae: 214.181122, mean_q: -181.836700
0.0
0.023846773038626676
0.04768937413978882
0.07152740919834974
0.09536048441665247
0.11918820633565394
0.14301018186624476
0.11913247244093378
0.09525928794438382
0.0713908819706936
0.19498849453692743
0.45259563968549654
0.16337865754759717
0.18827330024634334
0.21364748133809766
0.20248873337871698
0.18712218417578233
0.17141803684347556
0.15575373099012801
0.1719225586441664
0.19733666793912902
0

0.3572587033318578
0.3445564024814819
0.3291702799214959
0.3200677243069968
0.32092467490543425
0.31528826883198385
0.34020700817960176
0.3403324188356003
0.33690348394024394
0.3618312182050876
0.3613624659868692
0.3503059935001249
0.33557592603959896
0.3278258660016414
0.3273157782543089
0.31436720988255806
0.2999509936743005
0.2856031990135264
0.27132003587668696
0.2570999274502567
0.24294118412028848
0.22884160799062375
0.23228049859170113
0.2572758378893353
0.2822328112153226
0.30718811939556945
0.3059616797315525
0.29496872516023964
0.2930423015625805
0.3180278200241039
0.3429640966228397
0.36790809424592286
0.3928654412067747
0.4178449420180713
0.4428573252481486
0.4396670570155896
0.4549572424205145
0.4798963339362369
0.47550429501271646
0.4824584179901561
0.5075278903227076
0.5325030478339208
0.5575757389912134
0.5827480919824866
0.6080484997417787
0.6335112005267524
0.6591774126641506
0.6575718118804781
0.6824441319342737
0.707859363833865
0.7335537618287136
0.7595089574962463

1.5080000891533285
1.5333485324855551
1.5600350576184359
1.588367513931819
1.6227378002990154
1.656542763894072
1.5658761412062716
1.5868173918356816
1.61008103211646
1.627647976757849
1.6424506475777707
1.644017781401813
1.6398449767206063
1.6217393713278188
1.644255692177202
1.6667721003821891
1.6901234218004046
1.8586737658573216
1.88069906424262
1.9046786626792127
1.9306437187375582
1.9581590504441575
1.9863358299411453
2.021560277517549
1.8950242959722372
1.9146000880149103
1.9354077695605407
1.9479547431734383
1.949233132032531
1.9468323516705053
1.9450640803189152
1.9482054686494532
1.943767205244088
1.9351941118293656
1.9178030779579747
1.9230644775297496
2.0540402831666262
2.031207793396211
2.022492991443432
2.040354496922834
2.0637222914381246
2.0894614071364073
1.9600389482037877
1.9777166199132368
1.985787462577426
1.9819422106936853
1.9966342271109656
2.016516869159913
2.037488514468346
2.0598239457509147
2.0838411899759577
2.109931269279499
2.138579717569441
2.17669132679

1.8346936760851127
1.8549320497930009
1.8576686222893288
1.855621839050187
1.8492652363135262
1.8459827732576823
1.8496610859806983
1.8750989336403268
1.900322583458881
2.0960910914356643
2.0884096154484464
2.06521348710918
2.042362181035997
1.870288227044682
1.8772952822617157
1.9050239536882254
1.8889250244212297
1.857974626371007
1.85607278477262
1.8250671911535497
1.7987550829178658
1.8011150229741468
1.8020376773116775
1.781926187040222
1.7807756536087054
1.79404283007871
1.8127921962093605
1.8339683723710585
2.0237578905739024
2.0250316658578167
2.0391851763599087
2.0584891009712214
2.081937422354885
1.9377831176394973
1.9422577934647265
1.9505890002934225
1.9524688414170719
2.002043571070957
2.012801782678945
2.01772378722189
2.022569682958762
2.051784334272626
2.058213421575894
2.0622330152116013
2.048182335340427
2.0067442483993827
2.1222162795226103
2.1300776306524005
2.12176902943424
2.130972074208725
2.121786881325944
2.046375000123914
2.040672598877467
2.036457454942977
2.

0.0
1.659110602170943e-24
0.02384677303862668
1.1154897511661543e-06
1.115268823866204e-06
1.1150541687722405e-06
1.1148323657730146e-06
1.1146034207508608e-06
0.023847887398229586
0.3749267941900146
0.43157850933064285
0.4739946074495283
0.5162282764790006
0.22538154351191148
0.0002942195018227345
0.0002922312842948516
0.029259611684975793
0.048844240276063086
0.07392954785099272
0.09900106402721838
0.12406220635836698
0.11987834475789813
0.10398350559466954
0.08813205989962794
0.07228639393339377
0.08913609724416102
0.1141817298746297
0.13920769326519533
0.16423177454765195
0.18923361344598327
0.21421329864693195
0.21184582362468068
0.196156532977185
0.19961080731961736
0.19609577198243183
0.18054576101512343
0.18589058137316997
0.1815469287183867
0.166116782361642
0.15069075791409836
0.13534097870015804
0.11999579237883581
0.13216709041751282
0.1247316051192668
0.10949219550017958
0.12291020653761024
0.11495815196743189
0.12784951607214493
0.12018505994095358
0.13258462090072792
0.1

0.525574608687227
0.517907999744074
0.5343264237367996
0.5260116950199084
0.5362917802272367
0.561857487558139
0.5873925244347408
0.6130933277331095
0.603889053250911
0.5928029870360032
0.6027454457103051
0.5924201442543938
0.5806265863158127
0.5809492711363939
0.6071479375401292
0.6853510931505309
0.6663722766439535
0.6459791522869198
0.6252711073583344
0.6065307721177786
0.5889037282125431
0.5714159022802383
0.5540854633948478
0.548021540614145
0.5293280876100389
0.5464558223039871
0.527414476342335
0.5453616249903733
0.5260767888079735
0.5448084120742556
0.5715042298235486
0.5518314243552676
0.5775328246173719
0.6045232360090731
0.5844370227007631
0.6107858034358613
0.5902099916904564
0.6133554130553825
0.592294665608555
0.5705289123989936
0.584377648334989
0.5636750312140002
0.5878096479919801
0.5667668804983765
0.5448478238084712
0.5237550252652521
0.5331600049867083
0.5122669970019914
0.49048568156147515
0.46345084138845605
0.46942873632401755
0.45911486138623947
0.46454266692144

0.6287118975029835
0.6518160682313393
0.6558132922640543
0.6518683359238092
0.6549185261605596
0.656671007715094
0.6204599573286026
0.6436336332067869
0.6667741136570561
0.6609313669966289
0.6681533026770027
0.6682832016276867
0.6675122368704721
0.6743597338563097
0.6750839977485059
0.6984325591627416
0.721866177354578
0.744583893390274
0.7693225818671302
0.7666222554516056
0.7878250286823272
0.8115821019231816
0.8065120818415319
0.8857540261935222
0.9092080000185476
0.9330647936898658
0.9574906162643533
0.9826047585999562
1.0085520453965298
1.0354891318676296
1.0636695665383624
1.0933111101204005
1.1231441450942519
1.15654393910631
1.1928361401510745
1.096579869905402
1.1192636222359558
1.1430674992302456
1.167952750600029
1.194009958455074
1.2123752425214398
1.235869622901605
1.2612382570873728
1.2742815166359989
1.2968441435502016
1.3221479466114199
1.329072973859923
1.3517374622054206
1.348393672094743
1.3339333761113457
1.3184799396483209
1.32335961193123
1.3469402701102349
1.3764

1.620878209538633
1.6474261944191833
1.6765494223860475
1.7082931474276586
1.5723127285508833
1.5862963226304154
1.585211326765551
1.5998896813894843
1.612866137315501
1.6174519778375736
1.6324629916804005
1.6425671976664216
1.7779862897989733
1.772676536683305
1.7833996447434
1.7795386708945706
1.787792248341569
1.8014849575954972
1.817162108346272
1.8278176414593126
1.8159520574693548
1.79897357556089
1.7776474662458286
1.756364667908856
1.6951481884801933
1.6792170306947785
1.6636236841772303
1.6600321507808928
1.6556656353419585
1.6605771643069576
1.6650151767268615
1.6554399507605018
1.7437717212506172
1.7470436710439192
1.7464311601513047
1.7537943851420388
1.7730464712935967
1.8002400719283407
1.813378844126068
1.8332340865132697
1.829295101303913
1.8243868952077984
1.8189573844456886
1.8232756624075195
1.8017611224569088
1.7822492255988314
1.7230889274922043
1.7213029236738433
1.7002209538839999
1.7110347100551135
1.7121470591490529
1.7186175234008632
1.8200033319214484
1.81258

0.3510727406881641
0.37731528817414706
0.40373212878854353
0.3919138202628462
0.3743860398261603
0.39170845398877613
0.4181672353017257
0.4447545016561387
0.4715787515962065
0.4567567835330279
0.4833130243405496
0.4652405004517637
0.4463627984351857
0.42891923625462214
0.4365414718119614
0.4182508649069166
0.39929859599321305
0.40940907521410935
0.43681664195416775
0.46437235654991477
0.44617100528518416
0.42676729145679576
0.407191927681778
0.39251023422022635
0.3787678498607737
0.36511064075357064
0.3793907785314905
0.4073623628306448
0.38861232528785583
0.4164148323420694
0.44464691602485157
0.47323752940542085
0.5022843781471932
0.5319205491353057
0.5195030242008117
0.5443650538911388
0.5692238671941137
0.5747153792082417
0.5807394738720957
0.6053944678749235
0.6089210264906894
0.6112143446097315
0.5774985514709732
0.6022395510430845
0.6268926843883015
0.6515498299107798
0.676215839194114
0.7009044520382056
0.7259401809472825
0.7508161570150701
0.7757241315962478
0.8291354852255517

1.0662146965099837
1.1455067224973163
1.1081608998195178
1.130721456054932
1.1288473585701408
1.1240812925568224
1.093401758341785
1.0891061072563113
1.0823261946651692
1.0740433703816503
1.0352128385089463
1.0480379509253543
1.0704939021912738
1.094163887267302
1.1180872984842132
1.1423149923277351
1.1340777519702419
1.1232649289725793
1.132284321456635
1.1566318202525636
1.1814044359746665
1.2067353296480028
1.197995280898907
1.1836071318551062
1.1668744344718947
1.1493058352845014
1.131171398164174
1.1128737327291782
1.0979654880265552
1.095275966412968
1.0991328572451886
1.1263570752306513
1.1539777943405025
1.2230357693539826
1.2491517313651685
1.2253151366739983
1.0847890226108885
1.0888019976238024
1.0772685426607174
1.0997623291641714
1.1222721960762325
1.1448961417217283
1.167679444291656
1.1906761327344104
1.2139507580473217
1.2375808018813443
1.261659722470433
1.2863008199798442
1.311642167761585
1.3231098526146274
1.3460904614604963
1.371987763771937
1.3682055438392453
1.35

1.729933011549657
1.7591925506211648
1.7910399539259192
1.9758346236940432
2.0105472396176878
1.871482453691007
1.8897862027721655
1.9100771391649332
1.9322436164110326
1.954747646949178
1.9781206768689406
1.9933111799314578
2.0198463790620838
2.0494245835405818
2.0825741999828096
2.094877727693096
2.1320478136425405
1.9820850459335317
2.0000383936452724
2.0099104201609337
2.0138602010771454
2.1372999268522133
2.1379832914824033
2.1482456141398494
2.1676967391315554
2.1897754267726834
2.2144989471783108
2.242178078183815
2.2732880158162647
2.2818362685826545
2.1421507951464864
2.155583781807661
2.167800715383383
2.176707839182667
2.1384454319776665
2.128739394705419
2.1430365256024957
2.161883020990418
2.3775559332085665
2.395769098512929
2.415329197469262
2.4369195638107657
2.4617721772867025
2.479856012907008
2.2646135549799706
2.2728136151896217
2.281327521097384
2.282545249869789
2.2550278092071094
2.2214036077774644
2.2323704271258884
2.232877414307727
2.225095455591562
2.39259951

2.0824589283512487
2.085935147634002
2.089478834208761
2.1256260734707286
2.1306864226044877
2.158093665362536
2.1859085460598338
2.178022015097114
2.1649659146569706
2.217481791147869
2.2090217436761135
2.237651113019714
2.2440230897158853
2.247137589790122
2.278334170007526
2.3017347780402884
2.326042050645768
2.3540769228086114
2.3810282953911406
2.4080972163884984
2.301613247286483
2.298087683844049
2.2741449343675706
2.297635973452836
 31987/50000: episode: 98, duration: 17.138s, episode steps: 325, steps per second: 19, episode reward: -676.000, mean reward: -2.080 [-1000.000, 1.000], mean action: 2.126 [0.000, 4.000], mean observation: 0.143 [-2.765, 1.468], loss: 2.670320, mae: 204.793961, mean_q: -191.693466
0.0
0.023846773038626676
0.04768937413978882
0.023833863149022375
0.04767614042027393
0.023819980076633752
1.240644141011285e-05
1.2403880256331753e-05
1.2401253021810054e-05
0.25850586303035417
0.45059510444888146
0.5085462826014804
0.4996046392158155
0.005711914605946389

0.2839231970004145
0.27899683708643325
0.3039334247612649
0.32881353045704564
0.35367297575735157
0.37851689377578634
0.4033595277755835
0.4281712552093418
0.4529886720856727
0.4778074176479798
0.4764916246701307
0.4736568949233926
0.45870251212611013
0.4403094245635579
0.43850383893564554
0.4374509507705895
0.46240913840036074
0.48723190185323684
0.512091933298478
0.5086179312952457
0.5170515663169805
0.5129614041774552
0.5156932090314776
0.5112396374322337
0.5148698207302442
0.5399606586780493
0.5343783604193151
0.5427661925609667
0.5369149856696968
0.5431432784569403
0.5683772608346562
0.5935216920393341
0.6187986475423713
0.6442205617277523
0.6366602017657538
0.6612912685499637
0.6522569588654605
0.6638912700542857
0.6895260634228344
0.7152235250792067
0.705503080522053
0.6938143372850861
0.7023374364088606
0.7372501773595167
0.7529486254835124
0.7784752223045751
0.8042383225970128
0.830415169929916
0.8571048635386581
0.8844284839750554
0.912533763762593
0.9408689822703818
0.971024

1.017687860631767
1.0138346928863027
1.0079523969509234
0.9895228836781862
1.013564700165172
1.0377678435911948
1.0317368695511149
1.0501388278419945
1.0745240279545367
1.0684894349963856
1.0848230391075384
1.1554602971621182
1.1357485674616725
1.1150138943541148
1.1004263063535347
1.0829115062315764
1.0944731363175213
1.0761500096079664
1.0551569575876623
1.033529605951578
1.0116451639080382
1.007149635454259
1.023843062921909
1.0503867171586057
1.0775983142674794
1.1057698713616053
1.0951262717904453
1.0300972059723543
1.0523295885820438
1.0746145309676316
1.08023509898493
1.0764826670318226
1.0983075393656327
1.1208165603235396
1.1435053175536791
1.1664303352010494
1.190889899046141
1.1915531783662954
1.1871782994890883
1.186735047265068
1.1837078738099103
1.1766202691572711
1.168348318511225
1.2417522853718215
1.224733494408704
1.2057614485833994
1.1871397226394422
1.1600700569607176
1.1387041475372428
1.1715477264450997
1.1937664720538999
1.218096941663807
1.2013805507856652
1.185

0.6154511307380319
0.6375075372440403
0.6596939806742759
0.6823091949131761
0.7054342052623015
0.7054954850903323
0.6980597367876356
0.6910105960950974
0.6815537999786517
0.6576490225104283
0.6700307907708661
0.6914062709230961
0.6840164708720512
0.6760381730918935
0.6669286608093041
0.6607614546559799
0.6657897478236618
0.6901548118102697
0.7151006569329433
0.7408807270246168
0.7379557898392106
0.7262501167699394
0.7340427435583975
0.7240493385362514
0.7135892332948005
0.7040547444213732
0.6974238345532233
0.6908628566857153
0.6843145132059222
0.677823161491902
0.6714299662881489
0.6651306850010907
0.6589742190470187
0.6529684586379642
0.6510739451080725
0.6496572735911663
0.637786279619198
0.63794035177827
0.6542391210733097
0.677621170862675
0.7043904357451035
0.7335559921238276
0.7648714698406762
0.7701949984183198
0.764145102545513
0.7629549330566965
0.7731991735016351
0.780348351909584
0.7834775806603987
0.776195380235581
0.769179260839852
0.7623567764513548
0.734029160433893
0.7

0.9584269065607898
0.9406070164980036
0.9517332050692664
0.9647960069343873
0.9787124951028815
0.9954121620706058
1.062724843755413
1.0491201415990294
1.0720942305170653
1.0442242022609523
1.0586940662285729
1.0749990816766857
1.0932991121879483
1.116063798488799
1.1370152034043202
 33626/50000: episode: 103, duration: 17.386s, episode steps: 329, steps per second: 19, episode reward: -672.000, mean reward: -2.043 [-1000.000, 1.000], mean action: 1.982 [0.000, 4.000], mean observation: 0.118 [-2.786, 1.585], loss: 2.524138, mae: 200.541275, mean_q: -193.847260
0.0
0.023846773038626676
0.04768937413978882
0.07152740919834974
0.047666938395340794
0.07150432590813655
0.09533642859646542
0.11916285343061667
0.1429832077389401
0.16679709925565453
0.1906041361617133
0.402389014127139
0.29252412918317566
0.3159635061973566
0.3415131863556985
0.36711893016294495
0.3928406484035278
0.4187084037935582
0.4447600762070248
0.4375937163346639
0.4630973133700912
0.4480512720600831
0.46701888305574074

0.18235519051335394
0.18825988456007917
0.21322428380236327
0.2381591307794988
0.2630758325518087
0.2879765802170295
0.31286243232485117
0.3377351033491524
0.362609311820837
0.38745329457159455
0.41230512779563694
0.43715949721006053
0.46202351791110663
0.486905935118038
0.5118175037178553
0.5367713695260425
0.5338197306581914
0.5287801304043649
0.5203444507612663
0.5058702366757082
0.49159621760400324
0.4774287504215929
0.4633748421773097
0.44837591954839007
0.47339791060256703
0.4983299081826387
0.5233088640697396
0.5182729157400444
0.5117360988313729
0.5003087767337103
0.48700231685598255
0.47380487225823564
0.4614648374823343
0.48664997665385096
0.5117733964933968
0.5056629999343235
0.4982151079105358
0.48680822647836003
0.47430276570522084
0.46188286734854134
0.4532677027600621
0.47861962918693474
0.503946916230402
0.5293777147921024
0.5549479317416199
0.5475690380258215
0.5384873768164922
0.5470165505195014
0.5726510838392719
0.5995243211465011
0.5820563937678551
0.56432823305934

1.0686345443805632
1.0580445645459897
1.082928736565638
1.1077965906921734
1.098570008195567
1.0864721365705006
1.0732477914879832
1.0593025612024565
1.0450490379504234
1.1091490556051438
1.0897023176152503
1.0706364422415824
1.0580747770780032
1.037811578590587
1.0165927852268914
0.9969444305282903
0.9870496546796759
0.9674843002666118
0.9591227161001912
0.9413668134298454
0.9213307783850676
0.9237153012598607
0.9503664196446349
0.9774694332855368
1.0054426459621786
1.034490284938272
1.0648592775996002
1.0122965998124498
1.03408257732166
1.0562032222634388
1.0773899305614718
1.1003623410361818
1.1236305939127655
1.1308375151258627
1.1606657714217923
1.1518408270219527
1.1586226009832403
1.1561286767412706
1.1513250116438387
1.1324761881372678
1.1558911170444741
1.151280573734403
1.2547941976625159
1.2774230902654027
1.3007013365682416
1.3256964798399993
1.3517839080609844
1.3783121226079562
1.4061129733211235
1.406704351157291
1.3850860125368951
1.3635581117272393
1.3815688964553303
1

1.530137984089466
1.563395197179531
1.41051133575674
1.4235573616752277
1.4231193679459824
1.430351533318104
1.4336136404012936
1.4344139625177759
1.404586799987752
1.3949629941312731
1.4090915205615355
1.4288767009988061
1.4513164659849462
1.475847880249078
1.502440425840589
1.5312642017667284
1.5625956932446408
1.732991334809184
1.771926325275813
1.811609110755597
1.6339310567795449
1.6545214059260467
1.6636795131554472
1.6871100585822512
1.7546446252915942
1.7792692334998876
1.7886477094575315
1.8134367248611134
1.8199136749546652
1.818290862476503
1.8086236346050033
1.8228078627405537
1.8136899755581233
1.6664408409437663
1.6793384853375097
1.6883361150760718
1.6952086494988432
1.6707072263149891
1.8097872889836373
1.781421587313861
1.7813695923415673
1.7793652338450185
1.7560357731778427
1.7373437670672436
1.7326752452419178
1.7339564537578374
1.733813373914083
1.745576794584949
1.7609752994032075
1.7801686518044444
1.803867986758226
1.809448540814992
1.8376219977710202
1.83271409

0.0
0.023846773038626676
0.04768937413978882
0.07152740919834974
0.09536048441665247
0.11918820633565394
0.14301018186624476
0.1668260183208345
0.19063532344528258
0.21443770545125518
0.23823277304908755
-0.10383417881675142
0.25727299832691397
0.3235062599286207
0.3491441272542883
0.37584160105007663
0.3671578472679402
0.3934306840793443
0.42016368617387584
0.4471365773694335
0.4744191478066187
0.4645602972286204
0.44435514858843567
0.42418554778181056
0.40829913527853895
0.4178240927796469
0.3984413140033362
0.420109253657577
0.4005376420329007
0.3805131910293812
0.36580751984370186
0.35126009984474377
0.36530198245568934
0.3459047017686773
0.3685636955257746
0.3490999896766501
0.37215746586156956
0.35260279257734667
0.33358601161894985
0.35010496501883315
0.3782415242381984
0.4065829071370388
0.43530501118927184
0.46451816234343507
0.4577533166123299
0.48615627899229774
0.4755872350467089
0.500429827521778
0.5252308309968892
0.5499708919144444
0.5543447942804274
0.5489955919885168
0

0.8242113624488264
0.8504798215287248
0.8772316992485653
0.9045829680068266
0.903730790660877
0.8851127545711618
0.9061673686021215
0.9334007116798774
0.9161873210268157
0.9430152614102177
0.9710033917877899
0.9532766493398354
0.8985615200718808
0.9224305257299928
0.929866442887076
0.9349349666764333
0.8960561329810652
0.9200062585273467
0.9248466975319712
0.9940436884858079
1.0167192874970745
1.0394890826647398
1.0623850199661988
1.0572919062909538
1.0495748013284756
1.0401810556369337
1.0299907051469066
1.0125974788910141
0.9751412698837183
0.9729643296497552
0.9964660861993871
0.9891538791672022
0.9791252274008175
0.9663944719756413
0.9575911091717956
0.9446382109160198
0.9316957515561559
0.9083573131397611
0.8980683166152496
0.8851689476655754
0.8708245444549746
0.8603332675130266
0.8470402037753941
0.8427875761672028
0.8683756800973665
0.8940128426078452
0.9200907266193146
0.9094914069143923
0.9350077252332698
0.9614914191860188
0.9886262065394437
1.0165325310524356
1.045452626239

0.7240152342661119
0.7088757110443297
0.6925002512146187
0.6758782729069767
0.6613972740542692
0.6481317885715651
0.6405754481463607
0.6252695258194952
0.6090309233389459
0.6067858199678797
0.6334338646396229
0.660421790730215
0.688039181163278
0.7164407627961544
0.7458116305785037
0.7763715189441636
0.8083648817860724
0.7355821612600807
0.7565453323309231
0.7783222510588641
0.8003888675399238
0.8073609965729264
0.8124173951403703
0.8162958461418097
0.7745289656511625
0.786793089710587
0.8087066303126735
0.8127410508686034
0.8152185288023721
0.8156738675778369
0.8396533551010958
0.8481712385381108
0.844293726382747
0.8413574288563466
0.8367446359164765
0.835999794317696
0.8319222387454548
0.8311165858200201
0.8255194635343434
0.8264611463649421
0.8202170761070842
0.8101557686538675
0.7879970311442986
0.7815608947022645
0.7728249117569228
0.7534192904309969
0.7464483972415822
0.737465803299712
0.7280890849379567
0.7182688645012872
0.708466026076889
0.6986638244485808
0.6888990194143444


1.15040193273026
1.157155863716726
1.1729496903365475
1.2855081364984946
1.3147951578543995
1.3574683139687969
1.4132738288697324
1.449309776862056
1.4886617968483298
1.3667167630569954
1.3909430738035464
1.415170461941893
1.3917055842875878
1.368264930910258
1.3991716262729696
1.516445159957366
1.531143994964693
1.5406183773266413
1.5727860470367945
1.6034228169490108
1.6339050334046852
1.6643897977123934
1.6949406808755814
1.692521187386092
1.5552826233966592
1.5757954259953981
1.5991687844915967
1.6477248328544702
1.6583037528465128
1.658947722438791
1.7015765530082902
1.7312250422550715
1.7358080445343562
1.721146049042857
1.7074174566031084
1.6929080132995509
1.6775702478814034
1.6613858075983197
1.6443237325834743
1.6263526296132107
1.6530093774718098
1.6344659995127453
 36568/50000: episode: 112, duration: 16.980s, episode steps: 326, steps per second: 19, episode reward: -675.000, mean reward: -2.071 [-1000.000, 1.000], mean action: 2.000 [0.000, 4.000], mean observation: 0.143

0.295872894511207
0.2844269971399266
0.26929842825881545
0.25427196862898443
0.23930057816558656
0.24293563032271842
0.2380334530002698
0.24199134380903212
0.26709221349824974
0.2920975258481507
0.3171350991217211
0.3421871090580715
0.36726133012105644
0.39236728818582506
0.4175165104549133
0.4122887975785333
0.40561897589780577
0.39252457020765136
0.3782708888102887
0.37274775419071426
0.3979178792558766
0.42305061964497387
0.4482319345560469
0.47348005422083783
0.4664587856223513
0.45797944756369935
0.4566695713468084
0.44891637387391226
0.43886862546106786
0.4254239358340537
0.41213136959056773
0.39892626585110014
0.3858112402748647
0.3837347321043291
0.4091935087070275
0.43465451010728534
0.46020425900423123
0.45173896721393136
0.44195881210808147
0.4457131618702126
0.43660649206866525
0.42595925156920944
0.4136026729906147
0.4130155446552992
0.40371967369469103
0.41613677959601825
0.4066537279507088
0.4196257563448828
0.4098903554085107
0.3988239302421293
0.40138800242332917
0.391

0.9217588304085833
0.9161306933637156
0.9224802394739742
0.9471045240879616
0.9401636465201232
0.9540790087068647
0.9462105695067531
0.9572358025294427
0.9824339710904164
1.0896811068369217
1.1140113401766605
1.0967893181973785
1.0767629218285906
1.0556952733482377
1.052698569951245
1.0336749667101728
1.0124069903617563
0.9905500980236897
0.9684791837274983
0.9648456158100093
0.9446962817258118
0.9228956463426736
0.9217362284712476
0.9482886134661279
0.9757596099180968
1.004167708256163
0.9910198422264157
0.931945290014547
0.9549207207644481
0.9778461783709833
1.0008140421552874
1.0052906995282778
1.0070918226729835
0.9781955220363658
1.0012361865309443
1.0025182578393808
1.0042359083095285
1.0273395636565652
1.0264461283218564
1.0322625501996865
1.0555749614364893
1.079141774506075
1.1030449845427657
1.127371178887255
1.1529219356912817
1.242795701135284
1.264784909346362
1.2882819429748902
1.3125778476385863
1.3378272463860672
1.3642326701585143
1.3642540205115596
1.387295320521631
1

1.8234383286011007
1.8069315007185882
1.8165464520000405
1.842302992981364
1.8698102954161917
1.8996724193879864
1.7753747893431566
1.7942211861110675
1.8139171101213136
1.9986359855956786
2.0157617676141313
2.035078786660623
2.056515190587452
2.0802687767947656
2.106676415044372
2.1161690063726017
2.1189800160412045
2.1421552056411235
2.173786979314752
2.197480365732024
2.04144149887988
2.055718415319926
2.0678326844516848
2.071513338292084
2.044097684002027
2.0404640793030824
2.2249342456191252
2.2205555202621365
2.2226980854903355
2.213255978214596
2.21052705729512
2.2012132511559943
2.2081878624022573
2.1958389921046657
2.2072392427113274
2.232090249546134
2.084552848393499
2.0972168955354475
2.1082902539247015
2.1175343152716115
2.178672563383845
2.184786408661348
2.178071115341824
2.3402272264291257
2.3446719107372327
2.3404513425391387
2.3255351526153807
2.3161280636087174
2.324884131070544
2.3198768013388436
2.3022377326722774
2.2830410018156773
2.262480254375041
2.133923162758

1.1728510883059036
1.197550496587215
1.222252055300409
1.2469558514582468
1.2716619573172407
1.2963704302834729
1.3210813127989782
1.345794632217716
1.370510400679346
1.3041258555288444
1.3295234859565244
1.354926885024795
1.3803360863241805
1.405750992128143
1.4709778141017056
1.5258802267522151
1.5600031120663334
1.5941577363054173
1.628236951479847
 38207/50000: episode: 117, duration: 17.321s, episode steps: 327, steps per second: 19, episode reward: -674.000, mean reward: -2.061 [-1000.000, 1.000], mean action: 2.003 [0.000, 4.000], mean observation: 0.117 [-2.811, 1.497], loss: 1.951583, mae: 199.147995, mean_q: -209.492981
0.0
0.023846773038626676
1.2691276814608132e-06
0.023848041932755925
0.047690316516652745
0.07152769936629619
0.09535979715406973
0.11918621684628243
0.09531301983979634
0.07144460118330116
0.38517992416340807
0.5293105051094887
0.15204432029586956
0.12117517281491627
0.14630697653590288
0.13590999911101784
0.12005184183651346
0.10427254965690472
0.12243546540

0.5899374408129245
0.6161239681370875
0.6423711573944119
0.6689056410792192
0.695798108169581
0.7231406763855825
0.7510407183913985
0.7796292124447829
0.8090668239185385
0.8155897709011132
0.8425766901187259
0.8300917943733045
0.8117710463068433
0.8372607452969525
0.7440224489215013
0.8047444491630729
0.7958193043513637
0.8191594418408964
0.8425234609448569
0.8659085004121074
0.8647117376607348
0.8642588140532785
0.8876899964322787
0.8843039895462159
0.8788999669998899
0.8724643775384412
0.8563272213772269
0.8126516171274508
0.8256364625755374
0.8495011134038774
0.8734557716134876
0.8976499964638016
0.9223134369076181
0.9467804024566276
0.9714940712749212
0.9965228613077524
1.0214033857263585
1.0467199493962989
1.073303375723493
1.1006052041531453
1.128812844138594
1.158183373231666
1.1681156620030844
1.1615900738479006
1.143129417847212
1.1243684342459779
1.1047905887141225
1.084761546324789
1.0896128955487618
1.072066490550391
1.0878995193525727
0.9716776597434149
1.0747975484872632


0.4455100382449549
0.469328016738487
0.4702055640441333
0.46831131455768443
0.46859234477272227
0.46203851032446636
0.4402160837591138
0.46420921584028835
0.4880178510180845
0.511853197696113
0.5357331128504186
0.5596770182719101
0.5837078920485232
0.6078530549273279
0.6321450405924027
0.6566226463215367
0.681332223186403
0.7063292725847377
0.7148208931322226
0.7372143960192812
0.7613519106475228
0.7859511920092046
0.7776559790621942
0.800390151381577
0.8250003489555574
0.8501152227217338
0.8397885556606554
0.8278019269506323
0.839029855451228
0.8646780533045638
0.853001133533343
0.8396718103888029
0.8478710360803984
0.8359007334863244
0.8219636295305761
0.8076221592809465
0.8635773843240372
0.888910893323794
0.9153095863144997
0.9431552034431823
0.972275608782162
1.0020688938272477
0.9118531076909091
0.9174641262444043
0.9039125609927733
0.9081966198304724
0.91032596418553
0.8627486003515425
0.8655419907798102
0.8666117421001455
0.8642675267985637
0.850128162216806
0.8361955159434304


0.9231036265770285
0.9164887626543404
0.8783747041421833
0.8669870658204053
0.8840425292428543
0.8882289607113726
0.8899720850755315
0.8618655705897791
0.8806878438141325
0.9018450489576683
0.9241902784286778
0.9288777644482742
0.9266128489154157
0.9243371579387185
0.921951341060768
0.9159641441654521
0.8994137358512422
0.898031733194988
0.8951470550508013
0.9113924767890921
0.9046298775331654
0.8673541671640115
0.9288831009791412
0.9419335127443204
0.9637797952801328
0.9873678365253791
1.0133949308400314
1.0175357631405397
1.0399733874591135
1.038040930639194
1.0237413802150774
1.0369371375670848
1.0594446562227116
1.0863118714447073
1.1161776587999004
1.148659389823208
1.0418320973699238
1.0544595485187855
1.0741954537194902
1.0827221371285911
1.0989094527699783
1.1219561973706693
1.137397708245392
1.1441027789013705
1.1460429868810882
1.149478608151623
1.1613054821546422
1.1632122076108296
1.1771524248451735
1.1958134870448787
1.1999966729347338
1.1946383573263308
1.186464967385256


0.0
0.023846773038626676
1.2691276814608132e-06
0.023848041932755925
0.047690316516652745
0.07152769936629619
0.09535979715406973
0.11918621684628243
0.09531301983979634
0.11913814707270828
0.3996578905502599
0.5080743099175569
0.10076464773061461
0.12485427442228933
0.15011555677153784
0.1753975113581324
0.200713478293732
0.22607634104791882
0.2515009539002252
0.27700453484695325
0.26726173395272873
0.255657740333372
0.24012355546902595
0.2495893218708167
0.27501000468169934
0.3004725779923317
0.3259925425936357
0.31432036772674926
0.3012740568389367
0.30716763779210177
0.29505152228781506
0.28035398298161807
0.2882907327933334
0.2760825829075374
0.2613652906643816
0.2710662316489314
0.29671753703341464
0.32248030614438644
0.30996240705426364
0.296185882217096
0.28156039839353303
0.2907626129929036
0.3165648516597229
0.30365983264840585
0.3210935906810415
0.3469848396530685
0.33371030282982495
0.3520822494949853
0.37809899710010114
0.3644142682379869
0.3498929970841277
0.3359164558266

0.4963061634526335
0.5222356145058055
0.5483329668274658
0.5746769009557461
0.6013275679043389
0.6014398854015903
0.6271440789879423
0.6535942994740191
0.6414698959008482
0.6277710227562693
0.6931744760009279
0.7191087948570181
0.6980635188071458
0.722586253898956
0.7489385103553479
0.7756803221921843
0.8030198547143722
0.8310491812554401
0.8229602580592388
0.849742891366323
0.826749780335966
0.8021513726113504
0.7524539363303744
0.7761454634334781
0.7998235549707378
0.8025328831635729
0.7970237715339806
0.7983804585943164
0.7916986746071606
0.7919014355553889
0.7870980569872371
0.8109130749763399
0.8345851658230751
0.8578384915120798
0.8818641242278682
0.9060060644625538
0.9303036261115828
0.9554436840882349
0.9804558211752564
0.9816990771570038
0.9725273801147348
0.9808046470297795
1.0054718248955814
1.0302712243945789
1.0554163626138529
1.0810755178014162
1.1073469188102605
1.1343755193916538
1.1623402486612793
1.1705703101205296
1.1955128018933259
1.27001434213726
1.245691778658791

1.0730603967728516
1.1923410312211786
1.2169405709632213
1.2428237671896347
1.2774459223702173
1.3062195208905005
1.312390749684011
1.2957796119895415
1.2751232553934537
1.2942698916607773
1.321109275562956
1.3492350276506453
1.2703437800008157
1.290091777278404
1.3101391205896395
1.3157066812366451
1.3186184449601661
1.3399587342402828
1.3616635561079349
1.3838432170046786
1.384136345940065
1.4016675245830534
1.4235992405641016
1.4200572740713187
1.4351468136005523
1.4581572882527465
1.4820276438122466
1.5073641621230085
1.6679781932576716
1.6925471058059727
1.7194644574043405
1.7488847212535459
1.7812481251511387
1.6681051679838517
1.6800312303825322
1.685832984139241
1.6905595034669068
1.6907081950912881
1.687591162066235
1.6513246908673467
1.6487679153736325
1.6405861091228742
1.6116297959057646
1.6109345827409485
1.6314512575206763
1.6538164395567294
1.6776796164561538
1.703183307809858
1.7340632441306498
1.912526038250794
1.9418933478539149
1.9749699769236677
1.8455349540095218
1

2.216835123481512
2.2065906530076362
2.1886898987978434
2.1726820126404123
2.161849692865354
2.1486035589708137
2.133419812755448
2.1166304918728964
2.106001563304627
2.091664203208732
2.0834239013828
2.0761955659340994
2.067665911528547
2.0549327538490743
2.0386326472748872
2.0378916381668915
2.1598331679237806
2.1553328286509807
2.1485767620810967
2.144707045058674
2.1421080165857087
2.1425236401188568
2.1436092734538548
2.136260201694839
2.127177213555014
2.131455155646906
2.1595059823168787
2.174164858827238
2.188893649393198
2.2062152507962445
2.1012424403756564
2.113477287012723
2.129221263953142
2.2967357974371945
2.321425578470058
2.3229733534311405
2.3481281582249816
2.331848402407571
2.3148530620343046
2.301356029771728
2.309072235905686
2.273903311899169
2.291829227696672
2.2749015329984954
2.2504800877812268
2.230063952153549
2.213032382264785
2.2390135193712033
2.2760887514685177
2.2638057526219297
2.2490178061039083
2.2366728948910994
2.2718706925690753
2.298837292595919


0.26460191308639236
0.28961420460276865
0.3146099152776276
0.3396241980848133
0.36466342987621286
0.3897377078871092
0.41485917859763
0.4400424417915276
0.43286881987107995
0.4555314952237248
0.4471505776199179
0.43607744896662665
0.4273021232764713
0.41882266000562596
0.4255459484294749
0.4508544205011064
0.4760834017524575
0.46669652947422446
0.456092787692653
0.44217083556035314
0.4277812411295519
0.4227328320376964
0.4130467916979451
0.4226215913505521
0.4481931201268322
0.43783874490019103
0.4264930351733345
0.4126305760538747
0.3989558607873458
0.38536713463902267
0.38577719597345445
0.4114701582124532
0.40089092497888307
0.38917631512495476
0.37593036482581066
0.3628370338664611
0.34981902299013895
0.3541189507010501
0.3799360251329736
0.4058706367859082
0.4319010887876973
0.4581015413555693
0.48451708673041743
0.4789667448105137
0.46573673302916213
0.45233504725109314
0.43993709117226903
0.42766900542814856
0.41552792919404197
0.40347741811024856
0.40847784723233777
0.396224734

0.692481966016304
0.7205816571120809
0.7020149413231289
0.6797747945550741
0.6572791700229204
0.6341138534721534
0.6432561788808039
0.5667023703032039
0.5721639651106295
0.5566176099693563
0.5614818941181753
0.5618252279764073
0.5261727510907043
0.5504860780611821
0.5746990204955231
0.5988997726406037
0.6230925074207928
0.6472887943556065
0.6715025595201248
0.6957505334541275
0.7200527824915932
0.7200109806253935
0.7361159718297331
0.7601665419695921
0.7842992060377557
0.8088705999882092
0.8334982986343261
0.8300279285680691
0.8497688370257602
0.8741173530932361
0.8986678193220041
0.9234897887935734
0.9486582062437883
1.0567527164849233
1.081047700663445
1.1061849075806645
1.1321548315314194
1.1591114572551264
1.1872597508988818
1.2168402397400686
1.2482279740877338
1.2818091792232202
1.2041455527276543
1.2268093593104068
1.2499644207656493
1.267299368329619
1.2686094122733
1.2692522530889245
1.2676144581573323
1.2634827855487756
1.2358093487209216
1.258567097044017
1.2815479253070443


1.5179773278198985
1.5412296124903944
1.5660537327687132
1.5926376010206464
1.6200926277442118
1.6488323614713132
1.6831145223569033
1.7209861346614228
1.769276112618758
1.7901246892725018
1.654831421409442
1.6758270811611244
1.6957810085769598
1.7045432069709632
1.7067594468895528
1.7052144476036906
1.675989514276176
1.6683929342042978
1.7988811245640006
1.7880679198211427
1.772719600592162
1.7562535794498988
1.7392902261954468
1.7224349327734696
1.705585745486668
1.6892549502662189
1.6704065593866473
1.6566004454369636
1.639307143425155
1.623874298838803
1.6083299826221282
1.5934308629840352
1.58713385722636
1.5982892173765577
1.590332081638654
1.600419859395049
1.5895265090755768
1.6023683459072395
1.6268125160750497
1.6551407901584283
1.6136394345484473
1.6290595200720823
1.644658762228082
1.6611648961361225
1.676273096431295
1.6887362643436574
1.7060891540981564
1.7098184163515084
1.7199483326667964
1.717391759354502
1.7094439047172592
1.6979471686084642
1.6745788324550828
1.66310

1.644032981721261
1.6752181346803068
1.5492286690229653
1.6092979976975026
1.633011103081901
1.6578397796510895
1.8082678564204104
1.8141164940418026
1.8362794355569676
1.8574221363042538
1.8845941651910352
1.8817092578663848
1.9091288546949197
1.8257640438650726
1.846415764641857
1.822694341724565
1.798997994806636
1.7753263517690887
1.7516788397908762
1.7792743380200815
1.8153353868838922
1.8429233271073824
1.8706871985815396
1.8587483504668474
1.8441063158828415
1.9445937972725496
1.9398954271696665
1.933382008078674
1.9147813681096622
1.9401680162759132
1.9657116450697691
1.9913965384536039
 42791/50000: episode: 131, duration: 17.257s, episode steps: 328, steps per second: 19, episode reward: -673.000, mean reward: -2.052 [-1000.000, 1.000], mean action: 2.030 [0.000, 4.000], mean observation: 0.136 [-2.775, 1.505], loss: 1.774363, mae: 194.306824, mean_q: -214.077194
0.0
0.023846773038626676
1.2691276814608132e-06
1.2688863603250014e-06
1.2686503213368118e-06
1.2684061449870433e-

0.30771684615906164
0.332826426159256
0.32572530714322867
0.3145684891416506
0.29947093365024424
0.2844795954509767
0.287346648922276
0.31252764375813485
0.30494435743863213
0.29199231491463945
0.2949130992053042
0.287213168376249
0.29309924535227905
0.3184044068969583
0.34364627410275056
0.36895444977633746
0.3943312448998245
0.4197970361783505
0.44537602298963525
0.4710970082639461
0.4969944250085933
0.5231096026898124
0.5264339088192393
0.5135718942863432
0.5265879343309467
0.5147124564042996
0.5281426600369945
0.5541038102215219
0.5800955062861745
0.6063266816596373
0.59361782466751
0.6194827680821255
0.6056971233912644
0.6272237123006518
0.6537646569812454
0.6805533652019271
0.7077284330880019
0.7353887477371165
0.7636537745673839
0.792668633000885
0.7975270344748356
0.8244693369127416
0.8530739066785817
0.839478660962022
0.7464252774263977
0.8076304813116003
0.8079575939731959
0.8009036384846333
0.74059158442253
0.7592392948016742
0.7596948634568934
0.7584367661548677
0.716730371

0.11240962505160071
0.09768188912252666
0.11153801356000718
0.1359018047572424
0.16024454074863836
0.18456012707884076
0.2088475750724132
0.2331078800346614
0.257342601378356
0.2815587112398107
0.3058861631426063
0.3300699659453741
0.354242704786024
0.37841212624702775
0.4025863065972932
0.4267749195504605
0.4509896446646132
0.47524455834902457
0.49955657728863184
0.5239459861619975
0.5484370755038499
0.5730589186454178
0.5978463236564744
0.622841006079839
0.6480930415465983
0.673662675017418
0.699622586628511
0.7139158950531689
0.7042575620070326
0.723596937502593
0.7482299050177265
0.739280495751487
0.7556596726781946
0.7807121716744004
0.770759321483898
0.7590652762695391
0.762000292842038
0.7516692743231457
0.7664204120673611
0.7923325724855765
0.8185775497475672
0.8453909499774742
0.9442505228813552
0.9710489141285881
0.9991114171854666
1.028518069504215
1.0595321443213277
0.9999294442766818
1.0211791460210544
1.0431245705352405
1.0703183149582844
1.0930938122282192
1.116413402986

2.9348451145814125
2.951473727770894
2.7390082699022495
2.7658713681463234
2.8234470109392604
2.829258440839216
2.8298467531881273
2.8342082152146477
2.8716483431349484
3.14503849194867
3.14024707758782
3.1273219456820067
3.0976234405179324
3.0678316800798355
2.7922381475507994
2.7883580981536764
2.7889241192288665
2.798150603136188
2.805776413727099
2.8172495894025884
2.8333505083517445
3.1343864095354292
3.159889271509157
3.181694633365983
2.9662547676163133
2.984216716411957
2.9820175584269344
2.9987234733815664
3.0014886471830375
3.007227859281971
3.0092476647430013
3.070492746901398
3.0484798554605117
3.298006874081358
3.2778477740772725
2.9855797171278935
2.975596199740361
2.964575910512022
2.957318592627798
2.943117691226521
2.940018726310081
2.9619242822216223
2.977060705641254
2.9284921892212212
2.93523454083345
3.1993019159799685
3.214190674560591
3.233520116392309
2.9839642150621932
2.9743271536573324
2.983344923452468
2.9630778122910915
2.984998755829351
2.9935512266458453


1.6328677698891343
1.644727146766143
1.6197723577024017
1.6106264460976207
1.6035077775633966
1.6168929095371392
1.5930349287660974
1.5863764127795477
 44432/50000: episode: 136, duration: 17.033s, episode steps: 328, steps per second: 19, episode reward: -673.000, mean reward: -2.052 [-1000.000, 1.000], mean action: 2.125 [0.000, 4.000], mean observation: 0.147 [-2.809, 1.577], loss: 1.620367, mae: 194.703598, mean_q: -216.961670
0.0
0.023846773038626676
1.2691276814608132e-06
1.2688863603250014e-06
0.02384804168279162
0.04769015305908522
0.02383366367158412
0.04767545173634598
0.07151185928238275
0.2576103199096089
0.5154289361083877
0.5730859615263959
0.24397619574403834
0.14811480053918535
0.17319264148176863
0.19826366927072625
0.19087076214970453
0.2005773114420887
0.19154759904629678
0.17577999712008302
0.18708242429564412
0.21218934927569902
0.20425623822381764
0.21320331457491046
0.2044233615701499
0.1888703646741419
0.17345290843143008
0.18539570450526938
0.21054604321181583


0.5826155025539596
0.5826912499871927
0.5548098838079645
0.5553560580765062
0.5555972063935904
0.5554927760159526
0.5541465983953978
0.5797145843684067
0.5612747700354019
0.5429967810235108
0.516121060870802
0.5092414126819891
0.5091838100058492
0.5018363760964276
0.5027552062331276
0.5271876656385146
0.551658837836666
0.5761903501039641
0.5674207025688591
0.5810196589757581
0.5716313411322531
0.5605718635259553
0.5445920313636136
0.5280390323722618
0.511592824456675
0.4953297770255433
0.520245215226938
0.5102448234895366
0.5207896559685966
0.5101554348010205
0.5179410658297289
0.54308468046879
0.5318756859263005
0.5194293536982096
0.5141964916832971
0.5027603601194969
0.5131138360908373
0.5013045340370135
0.5124535058736202
0.5379731835120278
0.5254289619445246
0.5116182621634909
0.4973764310268218
0.4916214519834101
0.5173975740916936
0.5432420436388576
0.5692953247631131
0.5956127078542419
0.6223053730662278
0.6494219550005765
0.6770738899506723
0.7053839101428935
0.7345020488859731

1.0297685046808707
0.9506977649003694
0.9727252742416472
0.9766985148414481
0.9781293327532748
0.9349630689332007
0.9567942946645489
0.9782894067864867
1.0008066540488498
1.0014533819583396
1.009232446199879
1.0075995304090242
1.0025508260892653
0.9893275955704717
0.9523942255702529
0.9651357068809219
0.9882992033098528
1.01155762886695
1.0066046663730266
1.0210865302293777
1.0445966522849721
1.037721819236843
1.0599210302051323
1.0839398652081484
1.107329634888914
1.131833346504079
1.1578641324685044
1.1563321533468738
1.18074281411167
1.1690176947504196
1.1912416650085056
1.3178879524689286
1.296446636159443
1.3172681680652292
1.3436213669677475
1.3713558131841963
1.2938008087747024
1.2999609308421818
1.3026060626956828
1.2530812376792475
1.2566732262776963
1.247527242555564
1.2492815501906114
1.246964535445118
1.2429355820129782
1.2054145058408055
1.2034977589761693
1.1738742577073253
1.1957355538232397
1.2180184077428051
1.2410898664436074
1.266549913716075
1.2914572921169283
1.299

0.9771665572090346
1.006338815544171
1.0377873132360036
1.0677504388563992
1.0823191190238077
1.1201812316697886
1.127444941054685
1.0298498649480599
1.0490195058226932
1.0566779622430933
1.0720213773935099
1.1654077638463147
1.1743563206253989
1.1894408128821155
1.1973858082089373
1.1989171817672093
1.1956963085116346
1.1904828998757289
1.182933062649616
1.173322500891621
1.1682444975619586
1.1620137714621042
1.1558104008067924
1.1473581696250987
1.1427461980155302
1.1373473283876654
1.132484184971854
1.1279610242574203
1.123765340116154
1.1197957411847943
1.1153876400058986
1.114320416041671
1.1206347105326375
1.119441978297373
1.1140140495576922
1.1078377500967052
1.103646811379866
1.1015924947340006
1.0997596723334673
1.0979826192717348
1.0963818099447218
1.109475397110006
1.1504439941383018
1.1628499066015754
1.1749558276077547
1.1965417809715126
1.2040989128350275
1.2098530375151846
1.195823813850753
1.2071810067587634
1.1969205618421148
1.2088439202802483
1.1953949316513846
1.20

0.047666938395340794
0.07150432590813655
0.047642882622027954
0.07147962315539638
0.09531075358167498
0.11913588133668741
0.39975759617295076
0.5173006468858267
0.22394772279182135
0.24678716239373055
0.2365946128061605
0.2206706989803059
0.20492467934281386
0.18922794953375488
0.17357587408256928
0.15795765376391624
0.17314059380542107
0.16127404928966632
0.1762632506732143
0.20160032835234426
0.22695618083721042
0.2523535162191602
0.2778094326801535
0.30334244643152564
0.32897407048315613
0.31929197970490963
0.30697589849831775
0.2924292714905269
0.27734792165239003
0.28511784983081395
0.27327756234328865
0.2840008221003173
0.3096473598141617
0.33532862563235294
0.36108548766624676
0.38696626890328667
0.4130053397692547
0.4392430498855774
0.46572723772121544
0.49251505356714914
0.51967522216956
0.5472681576338936
0.575449364479714
0.6042986488702207
0.6339801629708381
0.6646820550078956
0.696613635357866
0.7301311323202873
0.6810958066660798
0.7064442284868651
0.7322735911833603
0.75

0.7407601624255773
0.7282605257139517
0.715924248054026
0.7037486554305207
0.6917310110840329
0.7273843497088208
0.7086853225281788
0.6902013340398481
0.6719289709909181
0.6580791366423991
0.6396752366616753
0.6198774882578151
0.6009221161867319
0.5837211757209304
0.5753497087040879
0.6014029059589477
0.6276080674345271
0.6541601357153446
0.6811514323932485
0.7086877312768145
0.7368966842480098
0.7659065094770809
0.7959672932380084
0.7978549783515902
0.8252095358432567
0.7961552519812874
0.8198189528085535
0.8236174191953775
0.815741955995377
0.839470193262177
0.8631519624382941
0.8646697169506014
0.8641095143157644
0.8343320403659168
0.8581644107087146
0.8819653856393648
0.9058494656557983
0.9298464619056239
0.9539947915629731
0.9783406450190094
1.0031522920536728
1.0289238587439442
1.0334429291645955
1.0569785122229163
1.04938055821577
1.038927509403282
1.0274777778777124
1.0155382236001311
0.9995740399159118
1.0883348921636447
1.0718475572838353
1.0522644053393089
1.0319652444722607

0.6522256133098399
0.6761636397941932
0.6969789461871758
0.7218288508529201
0.718322537776525
0.7120940778861631
0.7254163648921004
0.7199478479969249
0.7284538136205522
0.772458695000346
0.7572836789882372
0.7417725143781561
0.7203309945441585
0.7402197521209808
0.7641978568398159
0.7496232306139892
0.7335785906044965
0.7172334370814468
0.7018270269940956
0.6872618383430672
0.6710817344274733
0.6656837324726218
0.6509288273511951
0.6347399835268631
0.6323944709788055
0.6582079767111417
0.6840696219902913
0.7101143241172758
0.7377977415662761
0.7664332034305159
0.7962346159331319
0.8274531372069338
0.8293084619271639
0.7820121638584312
0.7930220737143651
0.7987132657442119
0.8002433568162877
0.7502684911775556
0.7573763746982778
0.7260075521789812
0.7473159735235754
0.7686117060937813
0.7901859067402981
0.8120975479031928
0.8344252935191481
0.8417934414772459
0.8563258637455573
0.8768910298391509
0.8989622025099866
0.9219966183563651
0.9459294401889099
0.9708391986269266
0.982514638434

1.2148978993356996
1.252348544608112
1.270275899133708
1.287996814239038
1.3055521668666552
1.2017481826643766
1.1977428804131287
1.197877679286231
1.2135026965589852
1.2225141026195536
1.2290731473637462
1.3098514207845477
1.2999613263712153
1.3027752348323993
1.2933295105128557
1.2944357657511756
1.2908654453421502
1.2864573411982059
1.2793006831820906
1.2689722469580482
1.2632173443895938
1.252547884076329
1.2449565114551275
1.2456221010649584
1.2548890659906577
1.2696527517365768
1.2855397907103339
1.2956939512339345
1.293327163474828
1.3053379254695066
1.3051977103631178
1.2959161349681034
1.306855791876349
1.3412647042826107
1.3599308934193677
1.3567000050841012
1.2723590124422643
1.2697481956674368
1.2742328339469169
1.3387267707799444
1.3342982878073293
1.337085299759054
1.3438849596813742
1.356991347188699
1.3583789355659168
1.3795153777614997
1.3791626926220337
1.385154590471489
1.3952815201855369
1.380602811350252
1.4049418025468314
1.4133528404946507
1.4298489084694064
1.45

0.5176098854130355
-0.010984190121266502
0.014145101960101665
0.03917225370809007
0.06419736285185652
0.0527487540529392
0.07274403923831639
0.06087099134935318
0.04498235726696715
0.029099439848186814
0.05166632604103751
0.03865761647078883
0.060217612051754445
0.0852293784961282
0.110240649548935
0.1352412914618439
0.16022770126097846
0.185207744346346
0.21017673875330206
0.2351363225056357
0.2600875241480965
0.28504537206686864
0.309972970068772
0.3349118164074488
0.3598534016190039
0.3848030412376149
0.3824200490018963
0.37677947713422566
0.36131572957495445
0.3460181645446873
0.33080855024905065
0.31568216769935364
0.30063577194150526
0.2856659783526239
0.27076930488318607
0.27102659981905053
0.2677240737999157
0.2529294213269187
0.255246529361084
0.28032892171693335
0.30532395268354223
0.33034627151785056
0.32676092103163007
0.3163169231148518
0.30192340755331737
0.28764791346190005
0.2879008849210089
0.2841403646262456
0.28581015329885573
0.31095613796570987
0.33601194817541546


0.9588320763472022
0.982527532315752
1.0069327216186639
0.9995827931629367
0.9890397646988749
0.9773289539888291
0.9649988947620187
0.9515906630663256
0.9311239456545032
0.9391312791003578
0.9296848350911363
0.9175025989518908
0.9140721939772383
0.9039405767041008
0.9181805427245355
0.9070554566916929
0.9229255748125731
0.9490585858585923
1.0559539783545033
1.0813881444386526
1.10766655520873
1.1348270709344257
1.163118891922088
1.192752203970713
1.143919534955232
1.1660755526507423
1.1884189446030702
1.2108908571386694
1.2343529112272642
1.2574539043610058
1.2805887953382504
1.3026868807555918
1.3093122932785062
1.3311286368029727
1.3540528921891648
1.3482888971345017
1.3381195668753096
1.3330643774429431
1.3571272993686883
1.3812837569101146
1.4061288698874816
1.4318436939058445
1.4316452417418648
1.4167676316373077
1.40096028864486
1.4069104063294278
1.5472538938661757
1.5715754014976584
1.5495895939964661
1.5747218622555994
1.4140405409306096
1.4196630179756324
1.4076838988934424
1

0.812124684079962
0.8311109791579386
0.8565277743636903
0.8457256742703237
0.8333860594703731
0.8404355775497431
0.829487689117297
0.9132063346111857
0.8938894447085697
0.8726191153083814
0.8510505648568453
0.8306780131742705
0.8151999874735117
0.8123252477040265
0.8256218182106773
0.8521496071603628
0.8796864768498107
0.860515714037977
0.8393939376725535
0.8180651899460465
0.8281503310779532
0.7874686762625699
0.7966503531275562
0.8026322177816299
0.7955428232899483
0.7832353093324308
0.7710545919250368
0.7589221868964106
0.7468635406756994
0.6976866532670528
0.7051016934874824
0.673564833124018
0.6811819730710593
0.68643474811931
0.677893095267498
0.6681168651575496
0.6582832881097906
0.6484300428612222
0.6385944136579412
0.6288064913308755
0.619090560650958
0.5811961434118131
0.5883343662091558
0.5876902797397715
0.5511526175226956
0.5652653570062957
0.5849647831539517
0.6059077015879623
0.6275561512003679
0.6357289852041658
0.6478805606287745
0.6514185228425113
0.6536781874080182
0

1.5003682837272108
1.518879581861877
1.5353907064960934
1.54920550691483
1.5482905322045335
1.5608431648900445
1.5619267284671823
1.5528848772501824
1.5638881731906535
1.5797525443498106
1.5784031432040446
1.4878163860751288
1.5411778518048735
1.5610287594581214
1.578048972217063
1.552812953663304
1.5534460831987313
1.5585928007794922
1.5683502731374181
1.5772558969369768
1.5893951929756134
1.6362211949673382
1.637058153318059
1.6328647306284823
1.6281143982571467
1.6227731223063835
1.650185839867659
1.6778109272485762
1.6350330516175506
1.6131692978116854
1.5955893766436604
1.5775277027654158
1.559782890661824
1.5524541966260532
1.6210624413011736
1.603288603157038
1.596650814837345
1.6103245622743447
1.5888750012480872
1.5903212693007107
1.589910473974552
1.582457188772989
1.5982708759078958
1.571671734641595
 49029/50000: episode: 150, duration: 17.099s, episode steps: 328, steps per second: 19, episode reward: -673.000, mean reward: -2.052 [-1000.000, 1.000], mean action: 2.140 [0.

0.2908495292363472
0.2754823828988484
0.27413157685007944
0.2991489741773075
0.32407435731281575
0.3490138127027227
0.3463469905366343
0.33582503736606234
0.3208410974214047
0.31642133238857917
0.34143655299831277
0.3663981122746765
0.3913707374833
0.4163600886125536
0.4413753348742659
0.46642765480417475
0.49153061414445703
0.5167006385194974
0.541957588944798
0.5673254653131854
0.5928332709777906
0.618516081405624
0.6444163727439184
0.6705856831811488
0.6970867027841683
0.7239714835645428
0.7514142815434309
0.7794455325288748
0.8082252583320253
0.8379448993722215
0.8688227055984314
0.8794319087251732
0.9072997479692001
0.9126016221728371
0.9395953280419156
0.9688487778620005
0.9276835386715137
0.9516876992466956
1.0237032237973156
1.0223560484379013
0.9836032767323459
1.0057747769224008
1.0041164391095412
1.0013527804847466
1.0246813835608717
1.0480443836788667
1.0715084742257617
1.0951190649491416
1.1189234896463858
1.1429789395097247
1.1673547491287701
1.1929623143370693
1.21889046

0.867175041753314
0.8626434406316199
0.8556291941781999
0.8428346854289861
0.8675710965091238
0.8921739444733294
0.8857331628927821
0.8767286503777796
0.8762956310081722
0.9014527430656505
0.9266403327927152
0.9521970351812181
1.061895815033111
1.0446939396246062
1.0248880979200756
1.0040982312987707
0.9826650729400006
0.9610537908080848
0.9515573903276139
0.965477281078031
0.9912776674406426
1.0175236499168
1.044555202366402
1.0725092816943715
1.1016068312566627
1.1321112767395611
1.0778005034612523
1.1002341099344528
1.1229941707044884
1.1459355742754511
1.1690741677769225
1.1911700636718845
1.2144220386771898
1.2385827353037573
1.26322402834004
1.277342408283887
1.2722516144896794
1.2646332489676158
1.2551801214623977
1.2337538929183007
1.2256953096560887
1.2157172316749418
1.2022378950041377
1.2267902701461393
1.3074599524799184
1.3281582635884568
1.3122461675921362
1.2910425825365164
1.290929161051395
1.3163758047438003
1.3424309199466167
1.3249361650830969
1.3505064290843285
1.37

<keras.callbacks.callbacks.History at 0x168c0908>