In [1]:
import gym
import math
import numpy as np
import random
import matplotlib.pyplot as plt
import pybullet as p
import pybullet_data
from gym import error, spaces, utils
from gym.utils import seeding
import time
import os

In [2]:
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, Dropout, concatenate
from keras.layers import Concatenate,Conv2D,BatchNormalization,MaxPooling2D,MaxPool2D
from keras.optimizers import Adam
from keras import initializers

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
def get_encoder(img_shape = (256,256,3)):
    input_img = Input(shape=img_shape)

    x = Conv2D(16, (7,7), activation='relu', padding='same')(input_img)
    x = MaxPool2D((2,2), padding='same')(x)
    x = Conv2D(32, (3,3), activation='relu', padding='same')(x)
    x = MaxPool2D((2,2), padding='same')(x)
    x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
    x = MaxPool2D((2,2), padding='same')(x)
    x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
    x = MaxPool2D((2,2), padding='same')(x)
    x = Conv2D(128, (3,3), activation='relu', padding='same')(x)
    x = MaxPool2D((2,2), padding='same')(x)
    x = Flatten()(x)
    encoded = Dense(20 , activation='relu')(x)
    Encoder = Model(input_img , encoded)
    return Encoder

In [4]:
class BikeEnv(gym.Env):
    
    def encode(self, image):
        image = image.reshape((1,256,256,3))
        result = self.Encoder.predict(image)
        result = result.reshape((20,))
        return list(result)
    
    def __init__(self):
        p.connect(p.GUI)
        p.setRealTimeSimulation(1)
        p.resetDebugVisualizerCamera(cameraDistance=10, cameraYaw=0, cameraPitch=-40, cameraTargetPosition=[0.55,-0.35,0.2])
        self.action_space = spaces.Discrete(6)
        self.observation_space = spaces.Box(np.array([-1000]*30), np.array([1000]*30))
        
        self.timestep = 1./240.
        self.stp = 0
        self.Encoder = get_encoder()
        self.Encoder.load_weights('encoder_weights.h5')
        
    def step(self, action):
        #print("i=%d" % self.stp)
        self.stp+=1
        #print("time = " , time.time()-self.time)
     
        if (action == 0):
            self.speed = self.speed + 1
        if (action == 1):
            self.speed = self.speed - 1 
        if (action == 2):
            self.speed = self.speed  
        if (action == 3):
            self.steer = self.steer - 1 
        if (action == 4):
            self.steer = self.steer + 1
        if (action == 5):
            self.steer = self.steer 
            
              
        self.applyAction([self.speed,self.steer])

        #p.setRealTimeSimulation(1)
        time.sleep(.5)
        #p.setRealTimeSimulation(0)
        
        state = p.getLinkState(self.pid,0)[0]
        if state[2] <= 0.5 or  state[2] >= 2 or abs(self.speed)>3 or abs(self.steer)>4:
            reward = -10
            done = True
        else :
            #reward = math.sqrt((self.origin[0]-state[0])**2+(self.origin[1]-state[1])**2)
            reward = state[0] - self.origin[0]
            #reward = 1
            done = False
        self.origin = state 
        
        velocity = p.getBaseVelocity(self.pid)
        img = self.getImage()
        observation = list(self.getObservation()) + list(velocity[0])+list(velocity[1]) + self.encode(img)
        
        info = {'x':'','y':'','z':''}
        #print("Step: ",self.stp)
        #xx = time.time()
        #print("Time: ",xx-self.tttt)
        #self.tttt = xx
        #print("Action: ",action)
        #print("Reward: ",reward)
        #self.stp +=1
        return observation, reward, done, info
            
    def applyAction(self, motorCommands):
        targetVelocity = motorCommands[0] * self.speedMultiplier
        #print("targetVelocity")
        #print(targetVelocity)
        steeringAngle = motorCommands[1] * self.steeringMultiplier
        #print("steeringAngle")
        #print(steeringAngle)


        for motor in self.motorizedwheels:
            p.setJointMotorControl2(self.pid,
                                    motor,
                                    p.VELOCITY_CONTROL,
                                    targetVelocity=targetVelocity,
                                    force=self.maxForce)
        for steer in self.steeringLinks:
            p.setJointMotorControl2(self.pid,
                                    steer,
                                    p.POSITION_CONTROL,
                                    targetPosition=steeringAngle)

    def reset(self):
        #print("Reset")
        #print("setp:",self.stp)
        self.stp = 0

        p.resetSimulation()

        urdfRootPath = pybullet_data.getDataPath()
        planeUid = p.loadURDF(os.path.join(urdfRootPath,"plane.urdf"), basePosition=[0,0,0])
        
        
        for i in range(np.random.randint(10,30)):
            p.loadURDF(os.path.join(urdfRootPath, "sphere2.urdf"),basePosition=[
                np.random.randint(5,35),
                np.random.randint(-5,5),
               0.5
           ])
     
        self.pid = p.loadURDF(os.path.join(urdfRootPath, "bicycle/bike.urdf"),basePosition=[0,0,1])     
           
        
        self.origin = p.getLinkState(self.pid,0)[0]
        p.setGravity(0,0,-10)
        for wheel in range(p.getNumJoints(self.pid)):
            p.setJointMotorControl2(self.pid,
                                    wheel,
                                    p.VELOCITY_CONTROL,
                                    targetVelocity=0,
                                    force=0)

        self.steeringLinks = [0]
        self.maxForce = 20
        self.nMotors = 2
        self.motorizedwheels = [1, 2]
        self.speedMultiplier = 10.
        self.steeringMultiplier = 0.5
        
        self.speed = 0 
        self.steer = 0

        velocity = p.getBaseVelocity(self.pid)
        img = self.getImage()
        observation = list(self.getObservation()) + list(velocity[0])+list(velocity[1]) + self.encode(img)
  

        p.configureDebugVisualizer(p.COV_ENABLE_RENDERING,1)
        
        return observation
        
    
    def getImage(self):
        img = env.render()
#         img = cv2.resize(img, (64,64), interpolation = cv2.INTER_NEAREST )
#         img = img[:,:,2]
        #img =img.reshape((128,128))
        img = np.asarray(img, dtype='float32')
        img /= 255.0
        return img
        
    def getObservationDimension(self):
        return len(self.getObservation())
    
    def getObservation(self):
        observation = []
        pos, orn = p.getBasePositionAndOrientation(self.pid)

        #observation.extend(list(pos))
        observation.extend(list(orn))
        return observation
        
    def render(self, mode='rgb_array'):
        pos, orn = p.getBasePositionAndOrientation(self.pid)
      
        view_matrix = p.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=[pos[0]+11.3,pos[1],pos[2]],
                                                            distance=10,
                                                            yaw=-90 ,
                                                            pitch=0,
                                                            roll=0,
                                                            upAxisIndex=2)
        proj_matrix = p.computeProjectionMatrixFOV(fov=60,
                                                     aspect=float(960) /720,
                                                     nearVal=0.1,
                                                     farVal=100.0)
        (_, _, px, _, _) = p.getCameraImage(width=256,
                                              height=256,
                                              viewMatrix=view_matrix,
                                              projectionMatrix=proj_matrix,
                                              renderer=p.ER_BULLET_HARDWARE_OPENGL)

        rgb_array = np.array(px, dtype=np.uint8)
        rgb_array = np.reshape(rgb_array, (256,256, 4))

        rgb_array = rgb_array[:, :, :3]
        return rgb_array

    def close(self):
        p.disconnect()

In [5]:
class iDQNAgent:
    def __init__(self, state_space , action_space, episodes=500 , memory_size = 50000):
        self.action_space = action_space
        self.memory = []
        self.memory_size = memory_size
        self.gamma = 0.9
        self.epsilon = 1.0
        self.epsilon_min = 0.1
        self.epsilon_decay = self.epsilon_min / self.epsilon
        self.epsilon_decay = self.epsilon_decay ** (1. / float(episodes))
        
        n_inputs = state_space.shape[0]
        n_outputs = action_space.n
        
        self.q_model = self.build_model(n_inputs , n_outputs)
        self.q_model.compile(loss='mse' , optimizer=Adam())
        
        self.target_q_model = self.build_model(n_inputs , n_outputs)
        
        self.update_weights()
        self.replay_counter = 0
#         self.q_model.load_weights('q_model.h5')
#         self.target_q_model.load_weights('target_model.h5')
        ####################################################
    def build_model(self, n_inputs , n_outputs):
        inputs = Input(shape = (n_inputs,) , name='state')
        x = Dense(512 , activation='relu')(inputs)
        x = BatchNormalization()(x)
        x = Dense(256 , activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dense(128 , activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dense(64 , activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dense(32 , activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dense(n_outputs , activation='linear' , name = 'action')(x)
        model = Model(inputs , x)
        model.summary()
        return model
    
        #####################################################
    def act(self , state):
        if np.random.rand() < self.epsilon:
            return self.action_space.sample()
        q_values = self.q_model.predict(state)
        action = np.argmax(q_values[0])
        return action
    
        ######################################################
    def remember(self, state, action, reward , next_state , done):
        item = (state , action , reward , next_state ,done)
        if len(self.memory) > self.memory_size :
            self.memory.pop(0)
        self.memory.append(item)
        
        #######################################################
    def get_target_q_value(self, next_state, reward , double):
        if double :
            action = np.argmax(self.q_model.predict(next_state[0]))
            q_value = self.target_q_model.predict(next_state)[0][action]
        else :
            q_value = np.amax(self.target_q_model.predict(next_state)[0])
        
        q_value *= self.gamma
        q_value += reward
        return q_value
    
        ########################################################
    def replay(self , batch_size):
        sars_batch = random.sample(self.memory , batch_size)
       
        state_batch , q_value_batch = [] , []
        for state, action, reward, next_state, done in sars_batch:
            q_values = self.q_model.predict(state)
            
            q_value = self.get_target_q_value(next_state, reward, False)
            # ??????????
            q_values[0][action] = reward if done else q_value
            
            state_batch.append(state[0])
            q_value_batch.append(q_values[0])
            
        hist = self.q_model.fit(np.array(state_batch) , 
                             np.array(q_value_batch),
                             batch_size = batch_size,
                            epochs = 1,
                            verbose = 0)

        self.update_epsilon()
            
        if self.replay_counter % 10 == 0:
            self.update_weights()
                
        self.replay_counter += 1
        
        return hist.history['loss'][0]
       
       ######################################################

       ######################################################
    def update_epsilon(self):
            if self.epsilon > self.epsilon_min :
                self.epsilon *= self.epsilon_decay
        ########################################################
    def update_weights(self):
            self.target_q_model.set_weights(self.q_model.get_weights())
            

In [6]:
env = BikeEnv()
np.random.seed(123)
env.seed(123)













In [7]:
state_size = env.observation_space.shape[0]

In [8]:
episode_count = 50000
batch_size = 128

In [9]:
agent = iDQNAgent(env.observation_space ,env.action_space, episode_count)

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
state (InputLayer)           (None, 30)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               15872     
_________________________________________________________________
batch_normalization_1 (Batch (None, 512)               2048      
_________________________________________________________________
dense_3 (Dense)              (None, 256)               131328    
_________________________________________________________________
batch_normalization_2 (Batch (None, 256)               1024      
_________________________________________________________________
dense_4 (Dense)              (None, 128)               32896     
_________________________________________________________________
batch_normalization_3 (Batch (None, 128)               512 

In [None]:
all_rewards = []
all_losses =  []
loss = 0
for episode in range(episode_count):
    state = env.reset()
    state = np.reshape(state , [1,state_size])
    done = False
    total_reward = 0


    while not done :
        action = agent.act(state)
        next_state , reward , done , _ = env.step(action)
        next_state = np.reshape(next_state , [1,state_size])
        agent.remember(state , action , reward , next_state , done)
        state = next_state
        total_reward += reward
    
    if len(agent.memory) >= batch_size:
        loss = agent.replay(batch_size)
        all_losses.append(loss)
        
    print("episode {} reward {} loss {}".format(episode , total_reward, loss))
    all_rewards.append(total_reward)

episode 0 reward -10.30323402395058 loss 0
episode 1 reward -11.840119405030624 loss 0
episode 2 reward -7.655167218569871 loss 0
episode 3 reward -12.50385067072992 loss 0
episode 4 reward -11.319316360598311 loss 0
episode 5 reward -9.99684323921561 loss 0
episode 6 reward -10.035842091719267 loss 0
episode 7 reward -7.699375274084423 loss 0
episode 8 reward -10.562675041817942 loss 0
episode 9 reward -8.496367776285535 loss 0
episode 10 reward -8.605646015222966 loss 0
episode 11 reward -7.6754032127606315 loss 0
episode 12 reward -10.548073121489377 loss 0
episode 13 reward -10.018088212480055 loss 0
episode 14 reward -10.017806860225953 loss 0
episode 15 reward -10.017418054558247 loss 0
episode 16 reward -9.4568277930788 loss 0
episode 17 reward -9.585907364671487 loss 0
episode 18 reward -9.998284300338044 loss 0
episode 19 reward -7.828151783678889 loss 0
episode 20 reward -10.003427581113861 loss 0
episode 21 reward -10.01824879760843 loss 0
episode 22 reward -12.4878261445533

episode 145 reward -9.996711087049007 loss 47.655067443847656
episode 146 reward -12.47286224398206 loss 92.32038879394531
episode 147 reward -12.466104518417168 loss 67.85853576660156
episode 148 reward -10.557238367615689 loss 54.59809875488281
episode 149 reward -10.018017129302997 loss 108.4826431274414
episode 150 reward -8.609040837186662 loss 48.36619567871094
episode 151 reward -10.017724833360916 loss 53.61788558959961
episode 152 reward -12.450238317922487 loss 40.92888641357422
episode 153 reward -10.017685759518297 loss 60.19175338745117
episode 154 reward -7.775563944438012 loss 138.34375
episode 155 reward -12.416541486574735 loss 38.668724060058594
episode 156 reward -10.01750304485883 loss 82.64502716064453
episode 157 reward -9.99812233295779 loss 137.45591735839844
episode 158 reward -12.420612581193275 loss 288.5625915527344
episode 159 reward -10.017836229796487 loss 556.1323852539062
episode 160 reward -8.54479370191102 loss 754.5984497070312
episode 161 reward -10

episode 279 reward -7.828151783678889 loss 508.1314697265625
episode 280 reward -10.000762606986521 loss 514.761474609375
episode 281 reward -7.632359789746733 loss 433.88067626953125
episode 282 reward -9.575499956983132 loss 338.85113525390625
episode 283 reward -12.236538816983174 loss 427.4979248046875
episode 284 reward -11.56922224781317 loss 394.152099609375
episode 285 reward -11.889610292238466 loss 330.9837951660156
episode 286 reward -9.996937044083467 loss 302.33111572265625
episode 287 reward -8.550069647869721 loss 307.741943359375
episode 288 reward -10.609393923564317 loss 378.78240966796875
episode 289 reward -10.002233991147273 loss 336.4985656738281
episode 290 reward -8.558441985436971 loss 305.2793884277344
episode 291 reward -10.561108765432888 loss 368.3663330078125
episode 292 reward -9.526865861175438 loss 366.526611328125
episode 293 reward -10.00373311695673 loss 343.28802490234375
episode 294 reward -12.491045358339631 loss 407.8625183105469
episode 295 rewa

episode 413 reward -10.01719887207053 loss 98.34335327148438
episode 414 reward -9.996891719068252 loss 77.35073852539062
episode 415 reward -10.005358013500585 loss 77.67665100097656
episode 416 reward -10.003379807436744 loss 75.2937240600586
episode 417 reward -7.855180484548067 loss 58.27138137817383
episode 418 reward -9.996523183151302 loss 84.77457427978516
episode 419 reward -9.484123207411223 loss 72.51036071777344
episode 420 reward -12.472862243982059 loss 79.5722885131836
episode 421 reward -10.019283828529511 loss 70.30868530273438
episode 422 reward -10.017509182803277 loss 82.66868591308594
episode 423 reward -10.003732992906071 loss 73.25274658203125
episode 424 reward -9.996839389721032 loss 89.59901428222656
episode 425 reward -12.52091841098669 loss 67.57423400878906
episode 426 reward -10.00217280770866 loss 82.60067749023438
episode 427 reward -10.186495994956749 loss 64.46764373779297
episode 428 reward -10.001737681795223 loss 74.02867126464844
episode 429 reward

episode 546 reward -12.204972558964226 loss 32.68912887573242
episode 547 reward -11.6289443701182 loss 34.85394287109375
episode 548 reward -7.8166213099272115 loss 43.26388931274414
episode 549 reward -10.014289748074535 loss 52.4791145324707
episode 550 reward -10.017991574676131 loss 61.544315338134766
episode 551 reward -9.996667619198725 loss 71.07498168945312
episode 552 reward -10.003050756192524 loss 71.96633911132812
episode 553 reward -10.32967513398162 loss 80.90663146972656
episode 554 reward -7.679291014386525 loss 79.2827377319336
episode 555 reward -12.679371264315025 loss 97.3432388305664
episode 556 reward -11.889058062985894 loss 101.73828125
episode 557 reward -12.238253201165005 loss 109.67786407470703
episode 558 reward -9.47352319835705 loss 111.12977600097656
episode 559 reward -9.468294194821004 loss 118.81314086914062
episode 560 reward -9.471878525262445 loss 121.94671630859375
episode 561 reward -15.6868796482573 loss 109.41383361816406
episode 562 reward -1

episode 679 reward -10.003024919832725 loss 54.602516174316406
episode 680 reward -10.003050756192524 loss 28.38418960571289
episode 681 reward -10.003427581113861 loss 39.683494567871094
episode 682 reward -7.70275787431967 loss 39.14796447753906
episode 683 reward -10.017879726713558 loss 36.70203399658203
episode 684 reward -10.000762284822887 loss 26.227285385131836
episode 685 reward -10.018088212480055 loss 14.737853050231934
episode 686 reward -9.996889296289938 loss 23.616565704345703
episode 687 reward -7.703952779775026 loss 33.83530807495117
episode 688 reward -10.017199324037001 loss 31.738094329833984
episode 689 reward -10.003050756192524 loss 27.443588256835938
episode 690 reward -10.557419403850046 loss 25.871599197387695
episode 691 reward -7.682821057008671 loss 23.40261459350586
episode 692 reward -10.192177960483933 loss 24.781652450561523
episode 693 reward -7.71440113727251 loss 23.610206604003906
episode 694 reward -9.998940210462017 loss 27.96230697631836
episod

episode 811 reward -9.996976375302777 loss 10.381134033203125
episode 812 reward -10.0167011764502 loss 11.03011703491211
episode 813 reward -12.5435186903534 loss 11.505121231079102
episode 814 reward -10.017418054558247 loss 10.836287498474121
episode 815 reward -7.742778567072591 loss 10.671098709106445
episode 816 reward -9.996737350885539 loss 9.2864990234375
episode 817 reward -10.003466309286406 loss 9.094320297241211
episode 818 reward -10.000697820844891 loss 11.498872756958008
episode 819 reward -10.560066115291079 loss 9.16136360168457
episode 820 reward -10.017655778732536 loss 10.451818466186523
episode 821 reward -7.658197715710623 loss 9.324862480163574
episode 822 reward -10.001825486508103 loss 10.670462608337402
episode 823 reward -12.461612692441745 loss 8.445296287536621
episode 824 reward -7.7948080846509695 loss 10.278884887695312
episode 825 reward -10.00091078599106 loss 10.537960052490234
episode 826 reward -10.00346338752433 loss 9.068867683410645
episode 827 

episode 945 reward -10.176955685535846 loss 7.041341781616211
episode 946 reward -9.99893933072335 loss 7.682675361633301
episode 947 reward -12.605740336083588 loss 7.3249382972717285
episode 948 reward -15.444857077771577 loss 7.488296985626221
episode 949 reward -9.998648959576713 loss 7.17609977722168
episode 950 reward -12.481415078705059 loss 7.501835346221924
episode 951 reward -10.000762606986521 loss 6.948847770690918
episode 952 reward -12.464379356808779 loss 7.337959289550781
episode 953 reward -10.003733068526815 loss 6.539804458618164
episode 954 reward -10.00373311695673 loss 7.10009765625
episode 955 reward -12.212079172241324 loss 6.3715925216674805
episode 956 reward -7.688132851726396 loss 8.281408309936523
episode 957 reward -7.679389901422729 loss 8.241410255432129
episode 958 reward -7.703952779775026 loss 6.493931293487549
episode 959 reward -10.560066115291079 loss 6.900527000427246
episode 960 reward -7.688300160377084 loss 6.5141425132751465
episode 961 reward

episode 1077 reward -12.569652230075537 loss 6.2376708984375
episode 1078 reward -9.998471690648598 loss 6.096652984619141
episode 1079 reward -12.515150390859251 loss 7.046510219573975
episode 1080 reward -7.679389901422729 loss 6.353161334991455
episode 1081 reward -9.565373602154928 loss 7.246237754821777
episode 1082 reward -9.57682447063461 loss 8.328290939331055
episode 1083 reward -10.003724035705577 loss 8.20888614654541
episode 1084 reward -9.471878525273091 loss 8.551197052001953
episode 1085 reward -10.003733068526815 loss 8.439477920532227
episode 1086 reward -10.017684272265164 loss 8.402576446533203
episode 1087 reward -7.731519100479114 loss 8.289595603942871
episode 1088 reward -10.559679272310053 loss 7.047205924987793
episode 1089 reward -7.8390948458924194 loss 8.364191055297852
episode 1090 reward -12.564444369717926 loss 7.7216997146606445
episode 1091 reward -7.567420143086405 loss 9.10140609741211
episode 1092 reward -12.481415078705059 loss 7.033345699310303
epi

episode 1208 reward -11.865533417798051 loss 22.653839111328125
episode 1209 reward -8.682156489567848 loss 17.524368286132812
episode 1210 reward -10.172189406800555 loss 16.98223114013672
episode 1211 reward -10.000948238163975 loss 14.940339088439941
episode 1212 reward -10.002873858708961 loss 15.183366775512695
episode 1213 reward -12.473425126901539 loss 15.363188743591309
episode 1214 reward -10.018851865705301 loss 19.318700790405273
episode 1215 reward -9.474864830160259 loss 13.433472633361816
episode 1216 reward -8.496636728535016 loss 14.711654663085938
episode 1217 reward -12.514794658796966 loss 15.567328453063965
episode 1218 reward -10.014452630047645 loss 12.489394187927246
episode 1219 reward -10.105280556319645 loss 14.839675903320312
episode 1220 reward -11.589688942421128 loss 18.87997817993164
episode 1221 reward -10.003733068526815 loss 11.470364570617676
episode 1222 reward -9.998372575457541 loss 20.573699951171875
episode 1223 reward -10.000840964224766 loss 1

episode 1339 reward -7.712408113558842 loss 8.80767822265625
episode 1340 reward -10.552713550367258 loss 7.066901206970215
episode 1341 reward -8.572206725980172 loss 11.29961109161377
episode 1342 reward -9.448946981219162 loss 9.717480659484863
episode 1343 reward -10.017168890784937 loss 8.460020065307617
episode 1344 reward -7.655167218569871 loss 6.514145851135254
episode 1345 reward -9.998931360880682 loss 9.372001647949219
episode 1346 reward -9.569471755475117 loss 8.11522102355957
episode 1347 reward -10.018262936155452 loss 18.820568084716797
episode 1348 reward -8.4748308122787 loss 9.342878341674805
episode 1349 reward -12.52337140899963 loss 14.718324661254883
episode 1350 reward -10.017685722946197 loss 10.07911205291748
episode 1351 reward -9.99893933072335 loss 15.689554214477539
episode 1352 reward -8.584670610126922 loss 10.669393539428711
episode 1353 reward -10.017720106194629 loss 7.579382419586182
episode 1354 reward -7.679389901422729 loss 9.138399124145508
epis

episode 1470 reward -10.56027445359424 loss 65.833984375
episode 1471 reward -10.197743306793528 loss 165.88021850585938
episode 1472 reward -10.001226598923948 loss 133.55816650390625
episode 1473 reward -10.31806164302865 loss 82.64280700683594
episode 1474 reward -7.833931880213518 loss 51.908348083496094
episode 1475 reward -10.5792660553026 loss 286.54351806640625
episode 1476 reward -10.003050288806445 loss 85.10480499267578
episode 1477 reward -9.998886336528422 loss 216.6558837890625
episode 1478 reward -10.118925916302286 loss 577.5774536132812
episode 1479 reward -11.129170391472023 loss 191.345703125
episode 1480 reward -10.000762606986521 loss 82.27547454833984
episode 1481 reward -10.546905217320187 loss 121.46556854248047
episode 1482 reward -10.00373311695673 loss 93.76036834716797
episode 1483 reward -10.002998570049876 loss 319.86236572265625
episode 1484 reward -9.452211387352943 loss 61.92612075805664
episode 1485 reward -12.554068182700178 loss 98.90325927734375
epi

episode 1604 reward -12.691071129654588 loss 31605.9375
episode 1605 reward -10.018088212480055 loss 27946.3515625
episode 1606 reward -7.646318235722166 loss 23549.00390625
episode 1607 reward -9.451011284897431 loss 20875.37109375
episode 1608 reward -10.003612840657976 loss 16430.5546875
episode 1609 reward -12.515150390859251 loss 14064.3974609375
episode 1610 reward -10.571197538543984 loss 14504.6396484375
episode 1611 reward -10.002906352909188 loss 8027.138671875
episode 1612 reward -10.018088212480055 loss 7578.0791015625
episode 1613 reward -9.574816761163701 loss 6585.37255859375
episode 1614 reward -10.017111787873482 loss 6087.681640625
episode 1615 reward -9.458158311795648 loss 6557.1181640625
episode 1616 reward -12.49027635097005 loss 5245.693359375
episode 1617 reward -10.566592289509149 loss 4402.29248046875
episode 1618 reward -9.998819857037397 loss 4245.36083984375
episode 1619 reward -9.465074588436112 loss 2778.92529296875
episode 1620 reward -10.017880400404572

episode 1738 reward -10.560066115291079 loss 190.73333740234375
episode 1739 reward -10.003733068526815 loss 190.85499572753906
episode 1740 reward -12.481415078705059 loss 185.61883544921875
episode 1741 reward -7.791904707988117 loss 188.08059692382812
episode 1742 reward -9.998931360880682 loss 229.00799560546875
episode 1743 reward -10.620369778348453 loss 245.0418701171875
episode 1744 reward -9.998285622442292 loss 230.19845581054688
episode 1745 reward -10.018088212480055 loss 179.4368896484375
episode 1746 reward -12.471048813099651 loss 273.816650390625
episode 1747 reward -12.481415078705059 loss 241.55857849121094
episode 1748 reward -12.462380189858168 loss 253.67147827148438
episode 1749 reward -10.00296449975606 loss 210.28060913085938
episode 1750 reward -10.630344479358289 loss 230.28993225097656
episode 1751 reward -12.454772602289818 loss 282.56146240234375
episode 1752 reward -10.560066115291079 loss 254.74432373046875
episode 1753 reward -15.53997280258471 loss 248.

episode 1869 reward -9.997357499141668 loss 71.06649780273438
episode 1870 reward -10.380395061816191 loss 79.0438003540039
episode 1871 reward -7.754832394981372 loss 67.30860900878906
episode 1872 reward -9.57924431694488 loss 71.7882080078125
episode 1873 reward -10.01928817768507 loss 54.26184844970703
episode 1874 reward -7.632983724879017 loss 65.98220825195312
episode 1875 reward -7.6654585326033855 loss 77.85273742675781
episode 1876 reward -9.458475809501346 loss 52.76321792602539
episode 1877 reward -10.009115714570223 loss 48.350582122802734
episode 1878 reward -7.6747597743988205 loss 50.26490020751953
episode 1879 reward -9.998941083888894 loss 36.93550109863281
episode 1880 reward -10.00373311695673 loss 59.60814666748047
episode 1881 reward -10.003689031285662 loss 48.501380920410156
episode 1882 reward -7.632359789746733 loss 45.43663787841797
episode 1883 reward -11.591862701378579 loss 44.41299819946289
episode 1884 reward -9.483413368052709 loss 45.300262451171875
ep

episode 2000 reward -9.998806608763259 loss 32.23755645751953
episode 2001 reward -7.646318235722166 loss 37.78681182861328
episode 2002 reward -7.769203293569701 loss 27.7686824798584
episode 2003 reward -10.033348186158266 loss 30.713497161865234
episode 2004 reward -7.708809036611889 loss 29.142242431640625
episode 2005 reward -7.854253237704647 loss 30.933944702148438
episode 2006 reward -8.523393356614733 loss 35.48431396484375
episode 2007 reward -12.468809772616629 loss 35.31975555419922
episode 2008 reward -10.018120196943528 loss 29.756914138793945
episode 2009 reward -12.467935139187734 loss 36.42741775512695
episode 2010 reward -12.511987121817922 loss 32.85357666015625
episode 2011 reward -10.00095072485043 loss 30.856903076171875
episode 2012 reward -7.637991979151779 loss 36.09576416015625
episode 2013 reward -10.003466309286406 loss 41.69966125488281
episode 2014 reward -9.99806889494472 loss 27.469249725341797
episode 2015 reward -8.183333705866815 loss 25.4050865173339

episode 2131 reward -12.229306170750553 loss 1132.065185546875
episode 2132 reward -9.458120592288681 loss 1153.98486328125
episode 2133 reward -8.583957446089524 loss 939.6396484375
episode 2134 reward -10.560066115291079 loss 1209.32470703125
episode 2135 reward -10.546905154117216 loss 1069.9185791015625
episode 2136 reward -9.998894884786768 loss 856.0768432617188
episode 2137 reward -11.587999085960215 loss 1082.3330078125
episode 2138 reward -11.590741587867466 loss 971.6807861328125
episode 2139 reward -10.036078366328836 loss 1013.0819091796875
episode 2140 reward -10.561193747331911 loss 1013.3648681640625
episode 2141 reward -10.002906352909188 loss 1128.6287841796875
episode 2142 reward -9.998285622442292 loss 974.3237915039062
episode 2143 reward -8.515813756238847 loss 1204.2464599609375
episode 2144 reward -9.998844962948965 loss 1452.09521484375
episode 2145 reward -10.003732992906071 loss 1127.6719970703125
episode 2146 reward -7.710287658083847 loss 1104.5157470703125


episode 2265 reward -10.00373311695673 loss 1366.5848388671875
episode 2266 reward -10.003733068526815 loss 1666.15869140625
episode 2267 reward -10.539382738444335 loss 2521.16357421875
episode 2268 reward -10.003466309286406 loss 1600.3516845703125
episode 2269 reward -8.496367776285535 loss 1780.3419189453125
episode 2270 reward -12.226153575544021 loss 2114.5712890625
episode 2271 reward -10.00373311695673 loss 1547.826171875
episode 2272 reward -10.018088212480055 loss 1013.1270751953125
episode 2273 reward -12.485327179825177 loss 1476.320556640625
episode 2274 reward -10.389483642872442 loss 1566.5081787109375
episode 2275 reward -9.458464469815352 loss 1366.99169921875
episode 2276 reward -10.003027502294376 loss 1138.522216796875
episode 2277 reward -10.017168890784937 loss 1117.642578125
episode 2278 reward -7.787032667171532 loss 1215.048583984375
episode 2279 reward -12.515150390859251 loss 955.0374145507812
episode 2280 reward -10.002947749478302 loss 788.499267578125
epis

episode 2404 reward -10.004089792717178 loss 15965.87890625
episode 2405 reward -10.200114304416303 loss 9465.8642578125
episode 2406 reward -12.21984485916419 loss 22461.8125
episode 2407 reward -9.998225602268045 loss 6760.9697265625
episode 2408 reward -9.996215554386879 loss 7337.1513671875
episode 2409 reward -6.130577520440163 loss 26697.009765625
episode 2410 reward -5.51788795991137 loss 15047.669921875
episode 2411 reward -7.638968452727713 loss 8320.591796875
episode 2412 reward -10.143564934416142 loss 7991.54931640625
episode 2413 reward -12.576651612879527 loss 17545.4921875
episode 2414 reward -10.003027670912966 loss 13944.9150390625
episode 2415 reward -9.998931953225885 loss 2939.81103515625
episode 2416 reward -10.124863441969378 loss 13063.984375
episode 2417 reward -10.001115814267845 loss 5707.4150390625
episode 2418 reward -10.002957919651617 loss 13167.291015625
episode 2419 reward -10.0036163528436 loss 6553.20263671875
episode 2420 reward -9.997148127081914 los

episode 2540 reward -9.470941412113413 loss 6589.595703125
episode 2541 reward -10.016647196832892 loss 3696.34619140625
episode 2542 reward -10.113595201291725 loss 3028.20556640625
episode 2543 reward -7.689142969274613 loss 991.7669067382812
episode 2544 reward -12.54712613306537 loss 4465.248046875
episode 2545 reward -10.001097192418966 loss 1811.638427734375
episode 2546 reward -12.487492065054083 loss 2266.8525390625
episode 2547 reward -9.998907872904406 loss 6041.1181640625
episode 2548 reward -10.14317055024496 loss 1713.9481201171875
episode 2549 reward -7.659777402218104 loss 2304.29638671875
episode 2550 reward -11.58179234883946 loss 4129.6318359375
episode 2551 reward -10.000762606986521 loss 2141.811279296875
episode 2552 reward -10.018807413642513 loss 4357.9169921875
episode 2553 reward -10.017615556932252 loss 1705.4830322265625
episode 2554 reward -10.00294118492069 loss 2448.562255859375
episode 2555 reward -12.251154302386453 loss 1286.6199951171875
episode 2556 r

episode 2673 reward -10.000748706975237 loss 1328.909423828125
episode 2674 reward -7.70702749714051 loss 1062.557861328125
episode 2675 reward -7.679389901422729 loss 1262.0230712890625
episode 2676 reward -10.017655778732536 loss 1033.731201171875
episode 2677 reward -9.448946981215121 loss 1676.7625732421875
episode 2678 reward -8.59347206960348 loss 1039.810791015625
episode 2679 reward -7.670774526636766 loss 1157.486083984375
episode 2680 reward -7.784139723824624 loss 874.61279296875
episode 2681 reward -11.817815386765512 loss 563.7962646484375
episode 2682 reward -12.2073596928475 loss 579.2807006835938
episode 2683 reward -10.003733138315805 loss 1065.381103515625
episode 2684 reward -10.5792660553026 loss 1294.7557373046875
episode 2685 reward -7.632359789746733 loss 1772.4964599609375
episode 2686 reward -9.998471690648598 loss 957.8892822265625
episode 2687 reward -11.58862738090007 loss 725.5464477539062
episode 2688 reward -7.699753902883221 loss 1180.93359375
episode 26

episode 2806 reward -9.477279952972392 loss 2431.582275390625
episode 2807 reward -9.474214343248073 loss 2044.41064453125
episode 2808 reward -9.516870076346418 loss 1896.4945068359375
episode 2809 reward -12.481415078705059 loss 1904.978515625
episode 2810 reward -10.002886666308692 loss 1486.999267578125
episode 2811 reward -9.456949732641643 loss 1437.09912109375
episode 2812 reward -10.003027502294376 loss 1832.421630859375
episode 2813 reward -9.998912299737754 loss 2112.9013671875
episode 2814 reward -12.197601228008315 loss 2016.3768310546875
episode 2815 reward -7.7498377972701835 loss 1758.515869140625
episode 2816 reward -10.016908740889141 loss 1884.3719482421875
episode 2817 reward -10.626354732088455 loss 1575.707763671875
episode 2818 reward -12.245835078511888 loss 1661.67138671875
episode 2819 reward -10.063587738358951 loss 1859.091796875
episode 2820 reward -9.925279525581328 loss 1450.5809326171875
episode 2821 reward -7.740976420476997 loss 1648.182373046875
episod

episode 2941 reward -10.017198872070507 loss 66887.2265625
episode 2942 reward -12.635470560293548 loss 14792.244140625
episode 2943 reward -7.678924967934449 loss 4121.73388671875
episode 2944 reward -10.002906352909188 loss 1529376.5
episode 2945 reward -10.00373311695673 loss 57276.8359375
episode 2946 reward -10.575727276181118 loss 334599.78125
episode 2947 reward -10.00373311695673 loss 79831.375
episode 2948 reward -12.484357940921353 loss 35284744.0
episode 2949 reward -10.396178346784202 loss 14754.640625
episode 2950 reward -9.9984139483522 loss 49477.46875
episode 2951 reward -9.573025335791254 loss 8518.81640625
episode 2952 reward -10.0037470209508 loss 226551.609375
episode 2953 reward -10.000859270504156 loss 8675.501953125
episode 2954 reward -9.998267494413485 loss 2210.298828125
episode 2955 reward -9.998683121802266 loss 12473.71875
episode 2956 reward -10.002944423743706 loss 7674.171875
episode 2957 reward -12.449329013908683 loss 15884419.0
episode 2958 reward -10

episode 3085 reward -10.020139021991147 loss 18506.3671875
episode 3086 reward -10.232288522259893 loss 20518.84765625
episode 3087 reward -10.003466309286406 loss 13344.1640625
episode 3088 reward -12.19991829376129 loss 10976.3076171875
episode 3089 reward -7.630328281983656 loss 21293.7265625
episode 3090 reward -10.002950345879691 loss 32263.599609375
episode 3091 reward -7.7339674751396785 loss 19354.572265625
episode 3092 reward -10.004268191654425 loss 19339.037109375
episode 3093 reward -17.212008864480218 loss 24069.76171875
episode 3094 reward -7.646318235722166 loss 22696.97265625
episode 3095 reward -10.560066115291079 loss 34570.34765625
episode 3096 reward -10.571197538543984 loss 20561.1171875
episode 3097 reward -10.00373309613759 loss 13657.64453125
episode 3098 reward -10.003733068526815 loss 22988.98828125
episode 3099 reward -10.017007947408453 loss 14437.259765625
episode 3100 reward -10.002871243023804 loss 12374.5322265625
episode 3101 reward -9.997891436436907 l

episode 3223 reward -10.000761934607228 loss 4579.283203125
episode 3224 reward -7.630328281983656 loss 8394.03125
episode 3225 reward -11.57529523556396 loss 4282.0009765625
episode 3226 reward -12.502881859263308 loss 4267.7197265625
episode 3227 reward -10.001751250656211 loss 3989.0546875
episode 3228 reward -10.007091735698877 loss 3202.5634765625
episode 3229 reward -12.481415078705059 loss 4694.80712890625
episode 3230 reward -10.389427322335054 loss 4651.587890625
episode 3231 reward -10.625801673459984 loss 5342.0859375
episode 3232 reward -10.009470416139878 loss 5538.56103515625
episode 3233 reward -7.689142969274613 loss 4420.7451171875
episode 3234 reward -12.469974551520362 loss 4283.64501953125
episode 3235 reward -9.465074588436112 loss 3453.52978515625
episode 3236 reward -9.934317508870476 loss 3101.0341796875
episode 3237 reward -10.003471311996329 loss 6964.3408203125
episode 3238 reward -7.703952779775026 loss 4003.59912109375
episode 3239 reward -10.00076260698652

episode 3360 reward -7.6330850149076745 loss 6561.9404296875
episode 3361 reward -10.000697820844891 loss 48062.10546875
episode 3362 reward -10.018088212480055 loss 5045.55908203125
episode 3363 reward -8.523393356614733 loss 2743.286865234375
episode 3364 reward -10.001824388164591 loss 2468.517333984375
episode 3365 reward -9.576004559091917 loss 3896.43701171875
episode 3366 reward -9.576004559091917 loss 48127.65625
episode 3367 reward -9.576004559091917 loss 1576.48046875
episode 3368 reward -12.481141923721 loss 1997.8720703125
episode 3369 reward -7.793718936997671 loss 2220.5224609375
episode 3370 reward -10.000762606986521 loss 3360.63525390625
episode 3371 reward -6.095440687641258 loss 3095.975830078125
episode 3372 reward -7.662842013552996 loss 131070.6640625
episode 3373 reward -9.998285622442292 loss 4056.8505859375
episode 3374 reward -7.613375973789319 loss 3463.5859375
episode 3375 reward -9.442255589959501 loss 3100.9189453125
episode 3376 reward -10.560994229210682

episode 3494 reward -9.465074588436112 loss 120202.65625
episode 3495 reward -10.017418054558247 loss 2146.19921875
episode 3496 reward -9.51935541794687 loss 1862.8564453125
episode 3497 reward -10.003689465716741 loss 1415.2554931640625
episode 3498 reward -9.561551708723526 loss 6811.359375
episode 3499 reward -10.032144569628528 loss 1923.5946044921875
episode 3500 reward -10.021146150784476 loss 1839.6878662109375
episode 3501 reward -12.484317227697595 loss 1926.7939453125
episode 3502 reward -10.454724801237479 loss 1189.643798828125
episode 3503 reward -10.00373311695673 loss 1306.2503662109375
episode 3504 reward -9.447445833412322 loss 3565.5576171875
episode 3505 reward -12.44729318733738 loss 1714.9951171875
episode 3506 reward -10.5454506818073 loss 1299.3447265625
episode 3507 reward -15.51962956787772 loss 1533.47900390625
episode 3508 reward -10.560736439259822 loss 1709.04541015625
episode 3509 reward -12.462380189858168 loss 1569.439208984375
episode 3510 reward -9.47

episode 3628 reward -10.560306970383177 loss 4280.119140625
episode 3629 reward -12.18159447899533 loss 1041745.1875
episode 3630 reward -10.013686076234192 loss 21803.625
episode 3631 reward -12.450238317922487 loss 2455.926025390625
episode 3632 reward -10.003027502294376 loss 2742.99951171875
episode 3633 reward -10.003708535884593 loss 2667.62060546875
episode 3634 reward -9.998880783861043 loss 2709.7958984375
episode 3635 reward -10.566734194582923 loss 18510.951171875
episode 3636 reward -10.013171473770736 loss 3034.9931640625
episode 3637 reward -10.003632745373512 loss 3184.64697265625
episode 3638 reward -11.593847675844017 loss 2465.68505859375
episode 3639 reward -12.463134637235353 loss 8022.8876953125
episode 3640 reward -7.769203293569701 loss 3668.252197265625
episode 3641 reward -10.03358427283643 loss 2761.82421875
episode 3642 reward -10.01222711933595 loss 1366437.875
episode 3643 reward -7.6654585326033855 loss 2714.425048828125
episode 3644 reward -12.25542086372

episode 3764 reward -10.557440029162604 loss 10704.291015625
episode 3765 reward -10.003733131004504 loss 131750.359375
episode 3766 reward -11.600377809022662 loss 216618.859375
episode 3767 reward -10.000859270504156 loss 4087147.75
episode 3768 reward -7.793718936997671 loss 263036.875
episode 3769 reward -10.560066115291079 loss 1612.7568359375
episode 3770 reward -12.479541554873924 loss 1907429.75
episode 3771 reward -11.57938256475802 loss 1272135.25
episode 3772 reward -7.688064168285939 loss 7534.8515625
episode 3773 reward -8.654723134065279 loss 6478.267578125
episode 3774 reward -10.569125981824095 loss 8934.8193359375
episode 3775 reward -15.569780315978738 loss 25154.056640625
episode 3776 reward -12.610272024842418 loss 96397.90625
episode 3777 reward -10.000810890495398 loss 76172.6328125
episode 3778 reward -10.002906352909188 loss 2630.58837890625
episode 3779 reward -8.623586096247086 loss 513664.90625
episode 3780 reward -10.00373311695673 loss 7045.8359375
episode 

episode 3902 reward -11.577360882996356 loss 2596.192138671875
episode 3903 reward -10.003732992906071 loss 10679.0576171875
episode 3904 reward -11.582259284319667 loss 2343.26611328125
episode 3905 reward -11.591862701378579 loss 4050.03857421875
episode 3906 reward -12.535313210793186 loss 4854.3564453125
episode 3907 reward -10.033392517062481 loss 3634.41162109375
episode 3908 reward -12.450238317922487 loss 3224.100830078125
episode 3909 reward -7.646318235722166 loss 3960.34521484375
episode 3910 reward -15.456989934525506 loss 3813.96630859375
episode 3911 reward -8.523393356614733 loss 94366.640625
episode 3912 reward -10.568046922346161 loss 6236.1044921875
episode 3913 reward -10.003733068526815 loss 2780.80029296875
episode 3914 reward -9.998471690648682 loss 5423.11767578125
episode 3915 reward -15.429262821497122 loss 3164.373046875
episode 3916 reward -10.39617748877227 loss 3298.550537109375
episode 3917 reward -10.55212580421905 loss 2055.976806640625
episode 3918 rewa

episode 4036 reward -10.018815214562673 loss 1646.4300537109375
episode 4037 reward -12.48467691508843 loss 1420.9443359375
episode 4038 reward -10.568046922346161 loss 1637.8646240234375
episode 4039 reward -10.003673312351909 loss 1394.179931640625
episode 4040 reward -9.997735507070582 loss 2171.1787109375
episode 4041 reward -6.1621758978799175 loss 1525.92919921875
episode 4042 reward -11.606819133707907 loss 1400.19287109375
episode 4043 reward -9.571811658468695 loss 564032.6875
episode 4044 reward -7.679394001401052 loss 1436.740234375
episode 4045 reward -10.003733138315805 loss 1318.688232421875
episode 4046 reward -10.553173577808785 loss 1326.01123046875
episode 4047 reward -10.002947749478302 loss 10888.5390625
episode 4048 reward -10.576702626758468 loss 1181.03564453125
episode 4049 reward -10.003709193951396 loss 1249.29248046875
episode 4050 reward -8.4748308122787 loss 1590.7113037109375
episode 4051 reward -10.017072981062029 loss 2242.886962890625
episode 4052 rewar

episode 4170 reward -9.447445833417289 loss 670.6000366210938
episode 4171 reward -9.998924263496262 loss 1242.888916015625
episode 4172 reward -10.00369220249538 loss 1158.569091796875
episode 4173 reward -9.465074588436112 loss 5178.57373046875
episode 4174 reward -9.998924263496239 loss 1383.4298095703125
episode 4175 reward -12.506084708966938 loss 1128.343017578125
episode 4176 reward -7.686808950350349 loss 1017.8665161132812
episode 4177 reward -7.671409134265235 loss 1338.55859375
episode 4178 reward -9.57643868558637 loss 1008.801513671875
episode 4179 reward -10.017218460576718 loss 1089.0107421875
episode 4180 reward -15.444857077771577 loss 867.3919677734375
episode 4181 reward -6.615593048205848 loss 1129.095458984375
episode 4182 reward -9.99887547088436 loss 886.0462646484375
episode 4183 reward -9.998931360880682 loss 873.8633422851562
episode 4184 reward -10.017611540332819 loss 7466.47265625
episode 4185 reward -10.003733068526815 loss 1029.9852294921875
episode 4186 

episode 4305 reward -5.940009407635438 loss 6422.5107421875
episode 4306 reward -8.607410260561858 loss 1977.020751953125
episode 4307 reward -7.769203293569701 loss 1820.1356201171875
episode 4308 reward -12.485323580757347 loss 24175.42578125
episode 4309 reward -9.973598730419047 loss 2302.744873046875
episode 4310 reward -10.01742284555165 loss 2345.8134765625
episode 4311 reward -10.176050079486895 loss 158983.046875
episode 4312 reward -10.018851865705301 loss 114206.15625
episode 4313 reward -10.402342307394983 loss 27674.919921875
episode 4314 reward -7.721604005521401 loss 11969055.0
episode 4315 reward -9.99718720747167 loss 69485.3828125
episode 4316 reward -10.00294118492069 loss 17999.357421875
episode 4317 reward -10.003698065436359 loss 29269.796875
episode 4318 reward -10.380073557863604 loss 249493.984375
episode 4319 reward -10.000810890495398 loss 274967.625
episode 4320 reward -9.998334163158013 loss 1611.633056640625
episode 4321 reward -10.003733068526815 loss 178

episode 4444 reward -9.460391373100647 loss 1113.1795654296875
episode 4445 reward -10.003733474054211 loss 56251.70703125
episode 4446 reward -10.571353487633296 loss 957.0789794921875
episode 4447 reward -10.003733068526815 loss 4581.103515625
episode 4448 reward -10.003733068526815 loss 1370.824951171875
episode 4449 reward -10.017655778732536 loss 1122.9171142578125
episode 4450 reward -7.646318235722166 loss 6283.0703125
episode 4451 reward -9.998931360880764 loss 922.8805541992188
episode 4452 reward -9.998285622442292 loss 1753.033935546875
episode 4453 reward -10.383855916952278 loss 1031.68798828125
episode 4454 reward -10.017655778732536 loss 934.3782958984375
episode 4455 reward -10.018120887490324 loss 6600.5947265625
episode 4456 reward -12.503584617077703 loss 19856.771484375
episode 4457 reward -10.018088212480055 loss 1023.0210571289062
episode 4458 reward -11.599277311692623 loss 1002.8289794921875
episode 4459 reward -10.003733068526815 loss 911.4080810546875
episode 

episode 4578 reward -10.003013174192786 loss 866.6632080078125
episode 4579 reward -8.496367776285535 loss 22157.341796875
episode 4580 reward -10.017655778732536 loss 811.1737060546875
episode 4581 reward -7.874606209052068 loss 2622.712646484375
episode 4582 reward -10.000762606986521 loss 753.1927490234375
episode 4583 reward -9.44908554176256 loss 940.2618408203125
episode 4584 reward -7.89831314130134 loss 758.3348388671875
episode 4585 reward -10.003468651321935 loss 571959.5
episode 4586 reward -11.67498732904023 loss 787.8885498046875
episode 4587 reward -5.479958500003195 loss 9529.3095703125
episode 4588 reward -10.017168890784937 loss 1083.388671875
episode 4589 reward -11.658764027337767 loss 837.3798828125
episode 4590 reward -10.019047355703766 loss 985.00537109375
episode 4591 reward -10.568046921567674 loss 1105.525634765625
episode 4592 reward -10.018029210800567 loss 1049.02880859375
episode 4593 reward -10.000930912076525 loss 731.7323608398438
episode 4594 reward -9

episode 4713 reward -9.426336036252634 loss 258338.359375
episode 4714 reward -10.003732992906071 loss 1113.626708984375
episode 4715 reward -10.007514374406574 loss 974.2630615234375
episode 4716 reward -9.530008493913298 loss 2305.368408203125
episode 4717 reward -9.480728745964138 loss 1533848.375
episode 4718 reward -10.664473733704966 loss 1369.79736328125
episode 4719 reward -10.00342778739909 loss 696.6417236328125
episode 4720 reward -9.517757136447875 loss 1123.470458984375
episode 4721 reward -9.996976375302777 loss 1067.320068359375
episode 4722 reward -9.537177846690447 loss 942.169677734375
episode 4723 reward -10.003706968500014 loss 901.4287109375
episode 4724 reward -9.837235622988773 loss 1023.088134765625
episode 4725 reward -9.995822681712927 loss 770.0381469726562
episode 4726 reward -7.712408113558842 loss 636.0845947265625
episode 4727 reward -9.458062666066557 loss 866.36181640625
episode 4728 reward -9.465074588436112 loss 693.76416015625
episode 4729 reward -8.

episode 4849 reward -8.577286721224711 loss 1231.07861328125
episode 4850 reward -10.00373311695673 loss 2286.5341796875
episode 4851 reward -10.000859270504156 loss 5748790.5
episode 4852 reward -10.002873898216215 loss 916.1635131835938
episode 4853 reward -10.003607845370722 loss 910.837158203125
episode 4854 reward -12.255419327475282 loss 892.2718505859375
episode 4855 reward -10.383857558924431 loss 942.4820556640625
episode 4856 reward -10.003733068526815 loss 900.1044311523438
episode 4857 reward -8.018651808349968 loss 970.6470947265625
episode 4858 reward -10.003059713377223 loss 890.2174072265625
episode 4859 reward -9.998285622442292 loss 808.9150390625
episode 4860 reward -10.000762606986521 loss 824.4801025390625
episode 4861 reward -12.515150390859251 loss 3893986.0
episode 4862 reward -10.547811820621792 loss 967.6723022460938
episode 4863 reward -10.016954932596695 loss 53456.953125
episode 4864 reward -9.458464469815352 loss 720.464111328125
episode 4865 reward -10.00

episode 4984 reward -8.562131725043262 loss 2386.6796875
episode 4985 reward -7.594539154157376 loss 4105.2509765625
episode 4986 reward -12.413817349130484 loss 2540.080078125
episode 4987 reward -10.01712606895642 loss 6035.35009765625
episode 4988 reward -10.01807023123355 loss 4318.41015625
episode 4989 reward -10.018018228051407 loss 1994.78759765625
episode 4990 reward -10.018088212480055 loss 2376.434326171875
episode 4991 reward -10.002873898216215 loss 3996404.5
episode 4992 reward -10.003733138315805 loss 1345.3648681640625
episode 4993 reward -7.633866912091859 loss 1315.1646728515625
episode 4994 reward -10.00373071472085 loss 1089970.625
episode 4995 reward -9.432073092219293 loss 958.9236450195312
episode 4996 reward -10.00373311695673 loss 996.0653076171875
episode 4997 reward -10.01719887207053 loss 966.9396362304688
episode 4998 reward -10.003733068526815 loss 8896213.0
episode 4999 reward -10.003641900567233 loss 810.1197509765625
episode 5000 reward -10.0037331169567

episode 5123 reward -10.384861664600054 loss 727207.25
episode 5124 reward -9.998908994617834 loss 10903.6416015625
episode 5125 reward -12.173218986887406 loss 8277.5927734375
episode 5126 reward -12.199911066378773 loss 54975.765625
episode 5127 reward -15.8529652284984 loss 661265.0
episode 5128 reward -10.561108762328812 loss 86652.296875
episode 5129 reward -11.56021007953084 loss 3509.30126953125
episode 5130 reward -10.547118731259332 loss 4695.37060546875
episode 5131 reward -11.658764027337767 loss 1537.783447265625
episode 5132 reward -7.613375973789319 loss 1297499.875
episode 5133 reward -10.012106614566244 loss 187627.546875
episode 5134 reward -12.199911066378773 loss 159100.5
episode 5135 reward -9.467830681443589 loss 2430.315673828125
episode 5136 reward -8.676101579680049 loss 441886.8125
episode 5137 reward -7.632359789746733 loss 1712.413818359375
episode 5138 reward -7.671728920054056 loss 31910.576171875
episode 5139 reward -7.6654585326033855 loss 2639.8002929687

episode 5260 reward -10.018088212480055 loss 1329.3603515625
episode 5261 reward -7.687426812187638 loss 1582.0623779296875
episode 5262 reward -7.656561204636236 loss 1240.591796875
episode 5263 reward -10.568046922346161 loss 23560.55078125
episode 5264 reward -10.003970836155752 loss 1220.164794921875
episode 5265 reward -7.689142940684045 loss 1276.9737548828125
episode 5266 reward -9.568449775584105 loss 1344.475830078125
episode 5267 reward -7.75860688686508 loss 1199.7935791015625
episode 5268 reward -12.5603365425743 loss 1100.829345703125
episode 5269 reward -10.003652638686447 loss 70507.5625
episode 5270 reward -9.576004559091917 loss 1129.4613037109375
episode 5271 reward -7.750583187663603 loss 89519.5
episode 5272 reward -9.458464469815352 loss 737.5418090820312
episode 5273 reward -7.671360919978881 loss 45808.22265625
episode 5274 reward -10.002971236357304 loss 428945.5625
episode 5275 reward -10.01770471585078 loss 1050.025146484375
episode 5276 reward -10.16444732893

episode 5397 reward -10.003708887932275 loss 652869.5
episode 5398 reward -9.439224900574237 loss 136283.59375
episode 5399 reward -10.0026816615245 loss 710578.3125
episode 5400 reward -10.01719887207053 loss 1707.738525390625
episode 5401 reward -9.456810525845997 loss 179910.5625
episode 5402 reward -9.461675878041294 loss 1498.95068359375
episode 5403 reward -7.884393253503442 loss 10848410.0
episode 5404 reward -7.746497031376297 loss 15807.197265625
episode 5405 reward -9.467856754618019 loss 60723.8046875
episode 5406 reward -10.00346338752433 loss 132467.578125
episode 5407 reward -10.018120887490324 loss 1946.9085693359375
episode 5408 reward -7.6654585326033855 loss 1790.92919921875
episode 5409 reward -7.632163222795278 loss 1855.853515625
episode 5410 reward -10.018355266634394 loss 1558.8929443359375
episode 5411 reward -10.00373309613759 loss 1738.435546875
episode 5412 reward -10.003733068526815 loss 1716.74951171875
episode 5413 reward -10.018088212480055 loss 1443.1641

episode 5534 reward -10.565402657788745 loss 905.9036865234375
episode 5535 reward -9.565373602154928 loss 657.4624633789062
episode 5536 reward -12.607966841761876 loss 734.3518676757812
episode 5537 reward -12.481415078705059 loss 598.3402709960938
episode 5538 reward -8.515813756238847 loss 1641449.625
episode 5539 reward -9.465074588436796 loss 1636.313232421875
episode 5540 reward -9.452493174320008 loss 707.1053466796875
episode 5541 reward -10.003027502294376 loss 940.8278198242188
episode 5542 reward -10.00373311695673 loss 996.1039428710938
episode 5543 reward -7.680003618249616 loss 758.53759765625
episode 5544 reward -10.003732992906071 loss 102887.1171875
episode 5545 reward -9.995797637949078 loss 657.16796875
episode 5546 reward -11.589688942421128 loss 930.6725463867188
episode 5547 reward -9.476162775531241 loss 708.3923950195312
episode 5548 reward -12.44729318733738 loss 827.1830444335938
episode 5549 reward -11.591862701378579 loss 42840.05078125
episode 5550 reward 

episode 5672 reward -12.474466471501401 loss 1411.95166015625
episode 5673 reward -7.736964033218054 loss 3616.55810546875
episode 5674 reward -10.00373311695673 loss 49164.88671875
episode 5675 reward -7.679389901422729 loss 49272.3203125
episode 5676 reward -12.429540038476441 loss 1691491.125
episode 5677 reward -10.017007263853465 loss 603389.75
episode 5678 reward -10.002897790906742 loss 1940.2059326171875
episode 5679 reward -12.607966841761876 loss 443155.84375
episode 5680 reward -12.546468273246088 loss 1560.21533203125
episode 5681 reward -9.431338874704457 loss 3682760.75
episode 5682 reward -7.632983724879017 loss 1835.3458251953125
episode 5683 reward -10.017484156033074 loss 43296.3125
episode 5684 reward -9.505292450980914 loss 1359.5523681640625
episode 5685 reward -10.017602603320494 loss 4776193.5
episode 5686 reward -12.450238317922487 loss 1921.1876220703125
episode 5687 reward -10.002951382306794 loss 1860.810546875
episode 5688 reward -9.448946981219162 loss 3499

episode 5809 reward -10.003466309286406 loss 1821.0802001953125
episode 5810 reward -10.559590731112882 loss 1338.710205078125
episode 5811 reward -10.018120887490324 loss 948743.25
episode 5812 reward -10.019026269652416 loss 1093.9849853515625
episode 5813 reward -7.600282544660031 loss 709609.1875
episode 5814 reward -10.017168890784937 loss 1444.2926025390625
episode 5815 reward -10.766124018435153 loss 1364.87255859375
episode 5816 reward -10.389427322335054 loss 1833.0489501953125
episode 5817 reward -12.476841483296855 loss 1296.21142578125
episode 5818 reward -12.437442131981204 loss 1746.5823974609375
episode 5819 reward -10.568046922346161 loss 1836.0755615234375
episode 5820 reward -9.996362168299799 loss 1165.20947265625
episode 5821 reward -10.396157068852244 loss 10318.474609375
episode 5822 reward -10.013686076234192 loss 5496.0791015625
episode 5823 reward -11.637937226503043 loss 2009.0645751953125
episode 5824 reward -12.462380189858168 loss 1219.334228515625
episode 

episode 5944 reward -10.002949498690125 loss 2083.647216796875
episode 5945 reward -10.018120887490324 loss 55278808.0
episode 5946 reward -8.584064257633239 loss 2417.99658203125
episode 5947 reward -10.018088212480055 loss 436382.15625
episode 5948 reward -10.553157614111276 loss 2382.4873046875
episode 5949 reward -9.535580612745449 loss 2253.30615234375
episode 5950 reward -7.633866732736051 loss 259270.421875
episode 5951 reward -9.447445833417289 loss 2712.99072265625
episode 5952 reward -9.45556770340564 loss 2116.50537109375
episode 5953 reward -7.55896824898535 loss 2287.289794921875
episode 5954 reward -10.198813891827573 loss 2477.2109375
episode 5955 reward -10.001170563637249 loss 227160.21875
episode 5956 reward -9.451724980044876 loss 3884.263427734375
episode 5957 reward -10.546443508001822 loss 2707.70654296875
episode 5958 reward -10.148245445280546 loss 2615.673095703125
episode 5959 reward -10.000859270504156 loss 2900.975341796875
episode 5960 reward -10.0029726812

episode 6083 reward -10.018120887490324 loss 1083619.625
episode 6084 reward -11.628944183226446 loss 238487.625
episode 6085 reward -10.003733068526815 loss 22991328.0
episode 6086 reward -12.226153575544021 loss 11906891.0
episode 6087 reward -12.198804953081673 loss 3459.4609375
episode 6088 reward -12.223589008005238 loss 5070.83935546875
episode 6089 reward -9.998924221944964 loss 3253.99267578125
episode 6090 reward -10.00373311695673 loss 1088156.125
episode 6091 reward -9.575499534036458 loss 3165.97265625
episode 6092 reward -12.49596185214537 loss 73932104.0
episode 6093 reward -10.568936110999818 loss 359974.5625
episode 6094 reward -10.380073658673021 loss 2892.7587890625
episode 6095 reward -10.003733068526815 loss 2369.51025390625
episode 6096 reward -10.00122664121522 loss 3294.072998046875
episode 6097 reward -12.535738547902712 loss 2403.26025390625
episode 6098 reward -15.51962956787772 loss 6559.5869140625
episode 6099 reward -9.998900137189061 loss 547640.4375
episo

episode 6221 reward -9.996352248270373 loss 2511.93408203125
episode 6222 reward -12.46923596019893 loss 2488.93603515625
episode 6223 reward -7.860194682294465 loss 2562.82763671875
episode 6224 reward -12.446568341667941 loss 2791.494384765625
episode 6225 reward -11.63238184425122 loss 1979241.875
episode 6226 reward -10.003733068526815 loss 2342.518310546875
episode 6227 reward -7.566768136986948 loss 1965.490478515625
episode 6228 reward -7.857317801280871 loss 2729.43798828125
episode 6229 reward -12.481415078705059 loss 1946.699951171875
episode 6230 reward -10.016756072366777 loss 19310.08203125
episode 6231 reward -7.646318235722166 loss 1988.1357421875
episode 6232 reward -7.658764011156599 loss 2558.700439453125
episode 6233 reward -10.590563705908622 loss 3016.787841796875
episode 6234 reward -8.476978702325145 loss 1796.5458984375
episode 6235 reward -12.150228749856748 loss 2420.80615234375
episode 6236 reward -12.446525006730564 loss 2254.96875
episode 6237 reward -9.996

episode 6356 reward -12.502881859263308 loss 2836.66650390625
episode 6357 reward -10.01719887207053 loss 1533.674560546875
episode 6358 reward -9.998859605387551 loss 1312.213623046875
episode 6359 reward -10.010134825301169 loss 471365.03125
episode 6360 reward -12.413878951453714 loss 4113.32080078125
episode 6361 reward -12.173218986887406 loss 1858.0679931640625
episode 6362 reward -12.502881859263308 loss 1395.524169921875
episode 6363 reward -10.016753785768127 loss 1617.653076171875
episode 6364 reward -12.508966146949994 loss 1277.599853515625
episode 6365 reward -10.386127891438877 loss 1585.176025390625
episode 6366 reward -10.03460244399255 loss 1420.9052734375
episode 6367 reward -10.003733068526815 loss 1516.1650390625
episode 6368 reward -12.447537069311743 loss 1193.4219970703125
episode 6369 reward -12.413878951453714 loss 1600.5626220703125
episode 6370 reward -12.474430814339133 loss 14849741.0
episode 6371 reward -10.173056646100449 loss 8625359.0
episode 6372 rewar

episode 6493 reward -10.55016653893806 loss 7090.89990234375
episode 6494 reward -10.326807391093157 loss 7996.8046875
episode 6495 reward -10.55212580421905 loss 10578.9384765625
episode 6496 reward -10.003732992906071 loss 8218.2705078125
episode 6497 reward -12.482676283142379 loss 5090104.5
episode 6498 reward -10.006721337267109 loss 678517.0
episode 6499 reward -10.002972681268927 loss 6779.48974609375
episode 6500 reward -9.576027048398474 loss 7347.88134765625
episode 6501 reward -12.199911066378773 loss 9945.70703125
episode 6502 reward -12.447280890204048 loss 14007.8232421875
episode 6503 reward -12.481415078705059 loss 6821.2216796875
episode 6504 reward -12.22590301384244 loss 6261.53466796875
episode 6505 reward -10.553518802004005 loss 7495.2138671875
episode 6506 reward -10.017888193590657 loss 1332408.875
episode 6507 reward -10.00095072485043 loss 6463.11181640625
episode 6508 reward -10.002954047417969 loss 8455.68359375
episode 6509 reward -9.439394167370526 loss 72

episode 6631 reward -12.489712640973664 loss 7731.5830078125
episode 6632 reward -9.465221068161876 loss 8846.8125
episode 6633 reward -12.45958855009884 loss 6630.81689453125
episode 6634 reward -10.00373311695673 loss 5210.3525390625
episode 6635 reward -9.471878525273091 loss 10704.228515625
episode 6636 reward -10.544316788206421 loss 5640.28466796875
episode 6637 reward -12.225195323116615 loss 8515.37109375
episode 6638 reward -10.597370699082049 loss 6976.51513671875
episode 6639 reward -8.600437458901776 loss 27378030.0
episode 6640 reward -12.468809772616629 loss 684213952.0
episode 6641 reward -8.523393356614733 loss 6436.76806640625
episode 6642 reward -7.854768193129309 loss 542758.75
episode 6643 reward -12.515506405446768 loss 7637.203125
episode 6644 reward -10.01772001967346 loss 5518.0361328125
episode 6645 reward -12.481415078705059 loss 7173.734375
episode 6646 reward -9.568437758179469 loss 6246.6962890625
episode 6647 reward -12.255419327475282 loss 8608.421875
epi

episode 6771 reward -12.780006368549 loss 3805.7734375
episode 6772 reward -12.515150390859251 loss 426141.8125
episode 6773 reward -10.018851865705301 loss 3719.248779296875
episode 6774 reward -15.49917246036227 loss 4334.94677734375
episode 6775 reward -10.559371125875389 loss 16494.263671875
episode 6776 reward -10.004105361611604 loss 1952538.375
episode 6777 reward -10.571165098242373 loss 3678.86669921875
episode 6778 reward -10.003709413892835 loss 3174.30322265625
episode 6779 reward -12.44729318733738 loss 2594.103271484375
episode 6780 reward -12.515100158876589 loss 3054931.5
episode 6781 reward -10.002895728034224 loss 4104.3828125
episode 6782 reward -12.491435173274652 loss 2018.37353515625
episode 6783 reward -10.557440029162604 loss 3348.59521484375
episode 6784 reward -10.39331286249081 loss 2445.705078125
episode 6785 reward -10.009225388129988 loss 2546.70361328125
episode 6786 reward -10.386150600614737 loss 2233.987060546875
episode 6787 reward -12.167630990522227

episode 6909 reward -8.581780575250551 loss 8623.375
episode 6910 reward -12.717279959539972 loss 9002.5380859375
episode 6911 reward -9.998648959576713 loss 2803156224.0
episode 6912 reward -8.583957446089524 loss 10945.861328125
episode 6913 reward -10.00373309613759 loss 10558208.0
episode 6914 reward -12.5435186903534 loss 11047.203125
episode 6915 reward -10.003732992906071 loss 14058.1328125
episode 6916 reward -11.127728452392464 loss 52638.33984375
episode 6917 reward -11.5958828999538 loss 13475.572265625
episode 6918 reward -10.018120887490337 loss 11279.9794921875
episode 6919 reward -9.99893933072335 loss 402571808.0
episode 6920 reward -10.016756072366787 loss 1091464448.0
episode 6921 reward -11.904189853838806 loss 11939.4404296875
episode 6922 reward -12.44729318733738 loss 11973.33203125
episode 6923 reward -12.44462438634887 loss 14139.109375
episode 6924 reward -12.501342161126214 loss 10539.61328125
episode 6925 reward -7.7327916392389255 loss 1339362.625
episode 69

episode 7050 reward -15.525571609106713 loss 8629.9541015625
episode 7051 reward -10.55357717705641 loss 11066502.0
episode 7052 reward -10.017283773554965 loss 5979.3701171875
episode 7053 reward -9.998900852291662 loss 6426.703125
episode 7054 reward -12.413817349130484 loss 5889.11669921875
episode 7055 reward -12.461716534978478 loss 9812.6884765625
episode 7056 reward -10.003658217746587 loss 5002.123046875
episode 7057 reward -7.739310264969738 loss 8907.1376953125
episode 7058 reward -10.549850814906582 loss 9237.5888671875
episode 7059 reward -10.553173579754748 loss 1402512.25
episode 7060 reward -11.914910719581993 loss 40836800.0
episode 7061 reward -11.637937136420671 loss 2061283.25
episode 7062 reward -12.515150390859251 loss 5183.41796875
episode 7063 reward -10.569937491356884 loss 6736.65234375
episode 7064 reward -12.471048813099651 loss 7425.98193359375
episode 7065 reward -10.571197405393802 loss 305353.4375
episode 7066 reward -10.551018742135392 loss 3870711.25
ep

episode 7189 reward -10.00373311695673 loss 9986.90625
episode 7190 reward -12.56732039383474 loss 5427.19873046875
episode 7191 reward -10.538768066508196 loss 6242.57568359375
episode 7192 reward -8.607414490895556 loss 5603.75537109375
episode 7193 reward -10.310106635020949 loss 5590.6103515625
episode 7194 reward -10.635820153340038 loss 6601.4931640625
episode 7195 reward -8.580982583162621 loss 7113.8056640625
episode 7196 reward -9.465223269313068 loss 6623.0078125
episode 7197 reward -9.998509396272878 loss 8220.376953125
episode 7198 reward -8.496367776285535 loss 4373.40771484375
episode 7199 reward -10.55212580421905 loss 7365.02978515625
episode 7200 reward -10.544316788194537 loss 6437.27685546875
episode 7201 reward -10.00373311695673 loss 8849.3623046875
episode 7202 reward -7.646318235722166 loss 246160544.0
episode 7203 reward -7.679389901422729 loss 7151.669921875
episode 7204 reward -15.455478137623421 loss 3991.361572265625
episode 7205 reward -7.753634297576413 lo

episode 7329 reward -9.458464469815352 loss 415562.1875
episode 7330 reward -12.47836848489099 loss 694925.6875
episode 7331 reward -10.017539881435454 loss 4376804.0
episode 7332 reward -10.017283773554965 loss 305269.3125
episode 7333 reward -10.00373311695673 loss 4702130.0
episode 7334 reward -10.509627837885189 loss 396167296.0
episode 7335 reward -8.577382716624198 loss 184787824.0
episode 7336 reward -9.998895261969913 loss 206368.453125
episode 7337 reward -12.482676283142379 loss 131699.046875
episode 7338 reward -9.451724980044876 loss 12818.755859375
episode 7339 reward -9.464818674493223 loss 182447.203125
episode 7340 reward -12.436603964519758 loss 172401344.0
episode 7341 reward -9.463367668130203 loss 2308376.0
episode 7342 reward -12.44729318733738 loss 2201565.5
episode 7343 reward -9.448946981219162 loss 24132210.0
episode 7344 reward -12.480670746914598 loss 439723.5
episode 7345 reward -10.00295171526544 loss 165204112.0
episode 7346 reward -12.44729318733738 loss 

episode 7473 reward -7.8452730266889965 loss 457145.25
episode 7474 reward -7.877311826164217 loss 17720.107421875
episode 7475 reward -10.01906280446461 loss 19168.263671875
episode 7476 reward -7.632788086778694 loss 10302.9765625
episode 7477 reward -7.661470064543044 loss 13617.521484375
episode 7478 reward -10.018284097249486 loss 13138.609375
episode 7479 reward -7.6891813719649615 loss 732146.0
episode 7480 reward -12.481415078705059 loss 8512.0166015625
episode 7481 reward -7.712408113558842 loss 59140000.0
episode 7482 reward -11.600377809022662 loss 11589957.0
episode 7483 reward -12.546419471486598 loss 15587.6845703125
episode 7484 reward -8.583956994361941 loss 14892.4716796875
episode 7485 reward -9.474864830160259 loss 2999974.5
episode 7486 reward -10.018851865705301 loss 75430.109375
episode 7487 reward -9.998924263496239 loss 85668.6328125
episode 7488 reward -9.510540862467762 loss 10819226.0
episode 7489 reward -7.851885737360664 loss 11241.73828125
episode 7490 rew

episode 7615 reward -10.00373311695673 loss 28461.681640625
episode 7616 reward -11.6289443701182 loss 19448.96484375
episode 7617 reward -9.458464469815352 loss 50542.55859375
episode 7618 reward -7.628952979685142 loss 34896.34765625
episode 7619 reward -10.003027502294376 loss 23442.94140625
episode 7620 reward -10.003733068526815 loss 17535.662109375
episode 7621 reward -10.017418054558247 loss 19685.375
episode 7622 reward -10.808167781834538 loss 15391.6357421875
episode 7623 reward -9.998892058856137 loss 20995.712890625
episode 7624 reward -10.541417664257711 loss 23428.56640625
episode 7625 reward -9.582808029109783 loss 168121.765625
episode 7626 reward -10.003632413459309 loss 42975.94140625
episode 7627 reward -9.474213018443992 loss 22804.69140625
episode 7628 reward -11.591862701378579 loss 17868.650390625
episode 7629 reward -10.000673864220616 loss 16204.9365234375
episode 7630 reward -10.381870056960809 loss 13589.76171875
episode 7631 reward -9.472618588897072 loss 29

episode 7755 reward -10.003733138315805 loss 1778861.5
episode 7756 reward -8.046606476802241 loss 1183180160.0
episode 7757 reward -10.553173579754748 loss 13248.53125
episode 7758 reward -7.670774526636766 loss 14183.4833984375
episode 7759 reward -7.634967738360461 loss 657600256.0
episode 7760 reward -10.001118744593661 loss 19958.32421875
episode 7761 reward -7.71440113727251 loss 36872868.0
episode 7762 reward -8.515813756238847 loss 15234.205078125
episode 7763 reward -9.998892373534028 loss 15658.26953125
episode 7764 reward -9.99893933072335 loss 29417874.0
episode 7765 reward -7.6654585326033855 loss 793179.125
episode 7766 reward -9.462033383326526 loss 11456.646484375
episode 7767 reward -9.998471690648598 loss 13302.0849609375
episode 7768 reward -10.01772001967346 loss 14193.43359375
episode 7769 reward -7.7037966263409645 loss 28833.857421875
episode 7770 reward -10.44765010442185 loss 12071.30859375
episode 7771 reward -7.646318235722166 loss 545821312.0
episode 7772 re

episode 7895 reward -10.012553341529346 loss 17224.443359375
episode 7896 reward -10.546905217320187 loss 3247056384.0
episode 7897 reward -9.474864830160259 loss 9442.3232421875
episode 7898 reward -7.613375973789319 loss 2932217.25
episode 7899 reward -12.560035911340094 loss 9072.927734375
episode 7900 reward -10.000946626569258 loss 8891.40625
episode 7901 reward -9.996406602760096 loss 10072.1240234375
episode 7902 reward -11.64796289306921 loss 24718.82421875
episode 7903 reward -11.036250922765353 loss 8400.67578125
episode 7904 reward -9.471878525273091 loss 9604.96875
episode 7905 reward -10.003733068526815 loss 11904.83984375
episode 7906 reward -7.854253237704647 loss 9688.58203125
episode 7907 reward -7.604776956171034 loss 8661.3662109375
episode 7908 reward -10.557440029162604 loss 21799.228515625
episode 7909 reward -9.524750861124247 loss 8311.884765625
episode 7910 reward -10.002996908512035 loss 7041.8701171875
episode 7911 reward -10.553658086183491 loss 9064.3935546

episode 8035 reward -9.474080615656565 loss 4956.1142578125
episode 8036 reward -8.392268872149806 loss 7028.3291015625
episode 8037 reward -10.003695814677446 loss 5567.4619140625
episode 8038 reward -10.003028209900164 loss 5624.30810546875
episode 8039 reward -10.560066115310468 loss 429998720.0
episode 8040 reward -7.703952779775026 loss 8849.2841796875
episode 8041 reward -7.645593203375835 loss 9666.193359375
episode 8042 reward -8.559410471856355 loss 8170.79345703125
episode 8043 reward -7.613375973789319 loss 4916.4658203125
episode 8044 reward -10.017475240471411 loss 9290.052734375
episode 8045 reward -11.904189853838806 loss 18569.154296875
episode 8046 reward -12.5435186903534 loss 11761.5576171875
episode 8047 reward -7.784139723824624 loss 8827.3583984375
episode 8048 reward -10.53798831204835 loss 3541.33837890625
episode 8049 reward -10.003654581619811 loss 131062944.0
episode 8050 reward -7.858356508140162 loss 218842.171875
episode 8051 reward -12.508304068902818 los

episode 8174 reward -10.386929404593428 loss 18345.23828125
episode 8175 reward -7.7037966263409645 loss 24041.03515625
episode 8176 reward -8.362809137343357 loss 1254659.625
episode 8177 reward -12.499675279734538 loss 16865.55078125
episode 8178 reward -10.00373311695673 loss 25502.126953125
episode 8179 reward -10.555034530202231 loss 17169.17578125
episode 8180 reward -7.662976223794436 loss 455245.75
episode 8181 reward -8.58395842094044 loss 16821.619140625
episode 8182 reward -9.530258270927229 loss 18417.03125
episode 8183 reward -10.017655778732582 loss 136118.0625
episode 8184 reward -8.6005595493931 loss 13847.6259765625
episode 8185 reward -8.582710782413699 loss 8458.419921875
episode 8186 reward -10.003050898999392 loss 15580.2265625
episode 8187 reward -9.99879702161873 loss 26470.1015625
episode 8188 reward -11.55816704905414 loss 18659.283203125
episode 8189 reward -7.671728920054056 loss 84459.1640625
episode 8190 reward -7.712408113558842 loss 1543857.125
episode 81

episode 8315 reward -11.591862701378579 loss 39641.234375
episode 8316 reward -12.481415078705059 loss 194203.65625
episode 8317 reward -7.699019157513609 loss 35670.89453125
episode 8318 reward -10.003674928245063 loss 889439.6875
episode 8319 reward -11.599727337404705 loss 448768960.0
episode 8320 reward -9.998048614443451 loss 15136522.0
episode 8321 reward -10.54501851756382 loss 727691840.0
episode 8322 reward -10.386154358577365 loss 646075.5
episode 8323 reward -10.311295197878762 loss 72690.7421875
episode 8324 reward -12.19425767471006 loss 77043360.0
episode 8325 reward -12.463134637235353 loss 49008.36328125
episode 8326 reward -12.479541554873924 loss 845229.9375
episode 8327 reward -12.446711725508088 loss 93323.7734375
episode 8328 reward -10.018468678602348 loss 275739.375
episode 8329 reward -12.479541554873924 loss 45045.79296875
episode 8330 reward -8.528153176283135 loss 1733626112.0
episode 8331 reward -12.504487517742728 loss 77402560.0
episode 8332 reward -12.504

episode 8458 reward -8.555927435596086 loss 25424.189453125
episode 8459 reward -8.61950401675974 loss 27289.380859375
episode 8460 reward -7.654778490422883 loss 752149760.0
episode 8461 reward -10.003733068526815 loss 26930.36328125
episode 8462 reward -9.439394167372502 loss 31072.6796875
episode 8463 reward -8.69545694644319 loss 26676.4765625
episode 8464 reward -7.646318235722166 loss 25359.30859375
episode 8465 reward -10.01772001967346 loss 64048.5234375
episode 8466 reward -9.45556770340564 loss 642190272.0
episode 8467 reward -11.481946862017642 loss 28787.95703125
episode 8468 reward -10.389427322335054 loss 162416.171875
episode 8469 reward -6.136041707725967 loss 27827.572265625
episode 8470 reward -10.003732992906071 loss 4679032.5
episode 8471 reward -7.613375973789319 loss 22753.87890625
episode 8472 reward -9.456986366516423 loss 27995.78125
episode 8473 reward -7.681137990157209 loss 14256.53125
episode 8474 reward -10.568046922346161 loss 26819.6875
episode 8475 rewa

episode 8599 reward -10.003733068526815 loss 10748.728515625
episode 8600 reward -7.662842013552996 loss 11178.783203125
episode 8601 reward -10.003611851125154 loss 10595.669921875
episode 8602 reward -9.998285622442292 loss 12082.998046875
episode 8603 reward -7.679389901422729 loss 306199.21875
episode 8604 reward -9.448946981219162 loss 7560.361328125
episode 8605 reward -7.646318235722166 loss 107790.7109375
episode 8606 reward -7.646318235722166 loss 10604.306640625
episode 8607 reward -7.671728920054056 loss 12826.4521484375
episode 8608 reward -9.468280295846872 loss 9859.48828125
episode 8609 reward -7.704046569313561 loss 9030.16796875
episode 8610 reward -10.00095072485043 loss 21422.34765625
episode 8611 reward -7.670774553515628 loss 145023.453125
episode 8612 reward -15.508799225594174 loss 37218.46875
episode 8613 reward -7.646318235722166 loss 6087.5634765625
episode 8614 reward -9.46203346795414 loss 14135.796875
episode 8615 reward -7.779464716525329 loss 55430.300781

episode 8738 reward -7.600282544660031 loss 21967.94140625
episode 8739 reward -7.670917128537562 loss 9078.970703125
episode 8740 reward -10.003027502294376 loss 6893.47314453125
episode 8741 reward -10.543864847314955 loss 1847527.5
episode 8742 reward -11.573300680765481 loss 219539.9375
episode 8743 reward -12.454868590387154 loss 14079.697265625
episode 8744 reward -7.658091465803984 loss 72127.859375
episode 8745 reward -9.998931360880764 loss 11656.375
episode 8746 reward -10.37249293551506 loss 8422.916015625
episode 8747 reward -10.001113054688256 loss 904379.0
episode 8748 reward -12.481415078705059 loss 6864.671875
episode 8749 reward -10.00373311695673 loss 11618.673828125
episode 8750 reward -8.555958317343798 loss 5538.8359375
episode 8751 reward -10.546905217320187 loss 213893.78125
episode 8752 reward -11.613797838095193 loss 27920818.0
episode 8753 reward -8.585871111219884 loss 11331631.0
episode 8754 reward -11.573300680765481 loss 31794.611328125
episode 8755 reward

episode 8879 reward -12.413878951453714 loss 25654.6328125
episode 8880 reward -10.016756072366787 loss 32706.318359375
episode 8881 reward -10.016689035905133 loss 26413.916015625
episode 8882 reward -12.481141923721 loss 57374.3046875
episode 8883 reward -9.448946981219162 loss 45448.09765625
episode 8884 reward -10.569125981824095 loss 21004.78125
episode 8885 reward -10.003732992906071 loss 25685.748046875
episode 8886 reward -10.002880419663983 loss 33367.46484375
episode 8887 reward -12.515100158876589 loss 1291237248.0
episode 8888 reward -12.199911066378773 loss 20791.31640625
episode 8889 reward -8.56291154505862 loss 11929421824.0
episode 8890 reward -12.479435159255587 loss 112444.8828125
episode 8891 reward -10.00373311695673 loss 12443.2890625
episode 8892 reward -9.467830681443589 loss 57991.8984375
episode 8893 reward -8.623586096247086 loss 15383.470703125
episode 8894 reward -9.451905346251273 loss 23359.96875
episode 8895 reward -10.003745924601867 loss 15568.38964843

episode 9020 reward -12.462380189858168 loss 10935.6513671875
episode 9021 reward -12.226153575544021 loss 13459.412109375
episode 9022 reward -7.671728920054056 loss 14543.7978515625
episode 9023 reward -10.003647568349352 loss 9074.42578125
episode 9024 reward -12.497046205934831 loss 786960960.0
episode 9025 reward -10.01776982535959 loss 160240896.0
episode 9026 reward -10.01719887207053 loss 19596.205078125
episode 9027 reward -10.003733068526815 loss 150065.0
episode 9028 reward -8.56291154505862 loss 23882.732421875
episode 9029 reward -7.854253237704647 loss 311852384.0
episode 9030 reward -10.018355266634394 loss 32173.259765625
episode 9031 reward -10.000859270504156 loss 1655883.75
episode 9032 reward -10.003731094463838 loss 10846.3037109375
episode 9033 reward -9.458464469815352 loss 18019.75390625
episode 9034 reward -7.863968429078124 loss 15538.056640625
episode 9035 reward -10.017655778732536 loss 9182.4189453125
episode 9036 reward -10.002906352909188 loss 96868.21875

episode 9161 reward -9.564386301736128 loss 8592506.0
episode 9162 reward -9.585907364671487 loss 214152.84375
episode 9163 reward -8.515813756238847 loss 5528073.0
episode 9164 reward -7.593959708968049 loss 13418.068359375
episode 9165 reward -10.003732992906071 loss 16484.29296875
episode 9166 reward -10.002894619294176 loss 20243.345703125
episode 9167 reward -7.712408113558842 loss 3798770.25
episode 9168 reward -12.464379356808779 loss 129346.125
episode 9169 reward -10.018088212480055 loss 28142.322265625
episode 9170 reward -10.018412593405738 loss 20874.849609375
episode 9171 reward -7.678476093287406 loss 13863.2275390625
episode 9172 reward -10.553256460067683 loss 1201917.375
episode 9173 reward -8.59180030307684 loss 21914.27734375
episode 9174 reward -9.576004559091917 loss 19647.05078125
episode 9175 reward -7.670917595897672 loss 16325.287109375
episode 9176 reward -7.7294239332895085 loss 26147.13671875
episode 9177 reward -7.712408113558842 loss 14845.8974609375
episo

episode 9302 reward -12.484357940921353 loss 63507488.0
episode 9303 reward -10.002896861074882 loss 23497.17578125
episode 9304 reward -12.463538208600834 loss 18951.537109375
episode 9305 reward -10.549565647120396 loss 24386.3125
episode 9306 reward -12.494300445669786 loss 35711.640625
episode 9307 reward -10.395932406857682 loss 25789.197265625
episode 9308 reward -10.19799278545991 loss 17871.0625
episode 9309 reward -12.502881859263308 loss 25398.291015625
episode 9310 reward -10.018792389500481 loss 19353.46484375
episode 9311 reward -11.637937226503043 loss 13579.271484375
episode 9312 reward -12.44855836701762 loss 149006480.0
episode 9313 reward -10.00295404741797 loss 11695.138671875
episode 9314 reward -10.391463059414427 loss 20264.744140625
episode 9315 reward -12.20287852003116 loss 18026.91015625
episode 9316 reward -7.731519100479114 loss 64531604.0
episode 9317 reward -7.646318235722166 loss 26965.390625
episode 9318 reward -7.670917595897672 loss 796565.3125
episode

episode 9442 reward -10.003427359665809 loss 33999.73046875
episode 9443 reward -10.032737884419577 loss 10318401536.0
episode 9444 reward -11.583271385012882 loss 82446936.0
episode 9445 reward -10.012245653560498 loss 16501.982421875
episode 9446 reward -10.391538482999664 loss 41188.171875
episode 9447 reward -9.465168471320299 loss 33297.3515625
episode 9448 reward -11.6289443701182 loss 20567458.0
episode 9449 reward -10.809605669216104 loss 20176.828125
episode 9450 reward -10.544283952148223 loss 35651.95703125
episode 9451 reward -9.996362168299799 loss 33680.0703125
episode 9452 reward -12.502881859263308 loss 30843.921875
episode 9453 reward -10.000859270504156 loss 14544.6201171875
episode 9454 reward -7.632359789746733 loss 23126.49609375
episode 9455 reward -8.523519622661968 loss 502269600.0
episode 9456 reward -7.638968452727713 loss 25430.2109375
episode 9457 reward -12.484357940921353 loss 14389053.0
episode 9458 reward -10.546905154117216 loss 83835152.0
episode 9459 

episode 9585 reward -8.50031418941892 loss 20475.91796875
episode 9586 reward -12.481141923721 loss 25045.20703125
episode 9587 reward -10.003050606777203 loss 38057.24609375
episode 9588 reward -8.562948896878854 loss 20463.3828125
episode 9589 reward -11.597309331042087 loss 17701.984375
episode 9590 reward -8.600437458901776 loss 8500605.0
episode 9591 reward -10.562820730521917 loss 22978.310546875
episode 9592 reward -10.383183015662821 loss 19574.61328125
episode 9593 reward -7.8200573569908585 loss 30605.72265625
episode 9594 reward -11.580266743616946 loss 15605.08203125
episode 9595 reward -10.569125981824095 loss 13464.275390625
episode 9596 reward -12.44729318733738 loss 6216048.5
episode 9597 reward -9.997269195381394 loss 23621.6796875
episode 9598 reward -10.003733138315805 loss 27046.40625
episode 9599 reward -10.007418520618792 loss 15393.3251953125
episode 9600 reward -12.489712640973664 loss 16558479.0
episode 9601 reward -10.003732992906071 loss 12794.8818359375
epis

episode 9726 reward -10.003684653212899 loss 24509.4453125
episode 9727 reward -8.56291154505862 loss 8378.533203125
episode 9728 reward -8.789420264391437 loss 19906.986328125
episode 9729 reward -12.484357940921353 loss 23335.021484375
episode 9730 reward -10.568046921567674 loss 18282.57421875
episode 9731 reward -10.566998376382902 loss 19584.24609375
episode 9732 reward -8.562952550605845 loss 14924.7099609375
episode 9733 reward -9.99893933072335 loss 25974.888671875
episode 9734 reward -8.494468069296513 loss 28315.9765625
episode 9735 reward -9.998951187214477 loss 16610.763671875
episode 9736 reward -7.712123302143361 loss 305335.25
episode 9737 reward -12.212889848182991 loss 21766.912109375
episode 9738 reward -8.56291154505862 loss 13181.2373046875
episode 9739 reward -12.212200722726863 loss 17446.572265625
episode 9740 reward -10.000762606986521 loss 19643.52734375
episode 9741 reward -12.414001512163615 loss 9599.890625
episode 9742 reward -12.481141923721 loss 7959.5234

episode 9868 reward -8.51250877614598 loss 1904470.0
episode 9869 reward -10.00373311695673 loss 76041.7890625
episode 9870 reward -12.517916694879087 loss 59139.1484375
episode 9871 reward -12.51167718816421 loss 58940.484375
episode 9872 reward -15.690274923769664 loss 64881.96875
episode 9873 reward -12.508966146949994 loss 689252.625
episode 9874 reward -10.618812625671875 loss 63996.6328125
episode 9875 reward -12.315937023133218 loss 66271.0234375
episode 9876 reward -7.701804672573691 loss 58630.1015625
episode 9877 reward -10.00172411457583 loss 645064.0
episode 9878 reward -9.4651851989233 loss 43275.84765625
episode 9879 reward -10.544734896930272 loss 27852.255859375
episode 9880 reward -12.414001512163615 loss 64762.8828125
episode 9881 reward -10.012553341529346 loss 62177.72265625
episode 9882 reward -10.123060677232491 loss 116826112.0
episode 9883 reward -9.456986949999887 loss 159260.21875
episode 9884 reward -8.56291154505862 loss 44825.875
episode 9885 reward -9.4620

episode 10010 reward -10.003056857554165 loss 1496684.0
episode 10011 reward -12.481141923721 loss 26374.111328125
episode 10012 reward -10.013686076234192 loss 1440802.75
episode 10013 reward -9.471878525273091 loss 93861.15625
episode 10014 reward -10.39617748877227 loss 70454.1875
episode 10015 reward -10.380073557863604 loss 32384.251953125
episode 10016 reward -12.489712640973664 loss 12852037.0
episode 10017 reward -12.413911937961476 loss 36883.29296875
episode 10018 reward -10.003732992906071 loss 54927.66796875
episode 10019 reward -10.003050606777203 loss 33467.6953125
episode 10020 reward -15.498993613599506 loss 62483.0703125
episode 10021 reward -12.51294145100108 loss 15569976.0
episode 10022 reward -7.646273584463653 loss 416257.75
episode 10023 reward -12.444970335447707 loss 26370.970703125
episode 10024 reward -10.01719887207053 loss 52368.3046875
episode 10025 reward -10.017661899906335 loss 45011.578125
episode 10026 reward -12.476082136824306 loss 906057.375
episod

episode 10149 reward -9.997426568079078 loss 36889024.0
episode 10150 reward -12.44729318733738 loss 47446.53515625
episode 10151 reward -10.383183015662821 loss 51884.4375
episode 10152 reward -11.6289443701182 loss 499037.15625
episode 10153 reward -10.00373311695673 loss 52923.8828125
episode 10154 reward -9.998915708227583 loss 52213.04296875
episode 10155 reward -10.003597779412853 loss 54975.84375
episode 10156 reward -10.560066115291079 loss 33407.4375
episode 10157 reward -10.5454506818073 loss 691525120.0
episode 10158 reward -10.034661471959613 loss 46727.8515625
episode 10159 reward -10.380073557863604 loss 60752.65234375
episode 10160 reward -7.632359789746733 loss 6505224.0
episode 10161 reward -12.173218986887406 loss 57448.5859375
episode 10162 reward -10.00127022168306 loss 46296.06640625
episode 10163 reward -11.567208631064993 loss 59284.078125
episode 10164 reward -10.003621171801898 loss 53085.4609375
episode 10165 reward -10.00373317845134 loss 60760.01953125
episo

episode 10290 reward -10.556724534457636 loss 7271925.5
episode 10291 reward -12.414001512163615 loss 21624.154296875
episode 10292 reward -8.591856330608952 loss 34235584.0
episode 10293 reward -11.567208620845697 loss 5924678.5
episode 10294 reward -12.516751214459852 loss 75316.125
episode 10295 reward -12.473240524348306 loss 58083.14453125
episode 10296 reward -9.458464469815352 loss 32460.33203125
episode 10297 reward -12.168515340145756 loss 8417565.0
episode 10298 reward -12.453920462798838 loss 76153.0234375
episode 10299 reward -12.56041881601465 loss 24372.71875
episode 10300 reward -7.887396497807056 loss 27823906.0
episode 10301 reward -12.471076754791262 loss 30687.95703125
episode 10302 reward -10.560893107307708 loss 58704.59375
episode 10303 reward -12.413817616819282 loss 37696.88671875
episode 10304 reward -12.44855836701762 loss 33903452.0
episode 10305 reward -10.00314324992219 loss 35138.52734375
episode 10306 reward -10.018851865705301 loss 197607744.0
episode 10

episode 10432 reward -12.439901897353671 loss 218905.0625
episode 10433 reward -9.998931360880682 loss 1379891.375
episode 10434 reward -10.003733138315805 loss 44110.9921875
episode 10435 reward -7.670774526636766 loss 221489.890625
episode 10436 reward -15.455478137623421 loss 59649.109375
episode 10437 reward -10.01726131131164 loss 2272808.5
episode 10438 reward -10.549403230569379 loss 39601.015625
episode 10439 reward -10.557440029162604 loss 50616.50390625
episode 10440 reward -12.481415078705059 loss 35302.11328125
episode 10441 reward -10.569125981824095 loss 44517.05859375
episode 10442 reward -12.225195323116615 loss 286561.4375
episode 10443 reward -10.561108765432888 loss 36857.3515625
episode 10444 reward -11.845511629463072 loss 43873.78515625
episode 10445 reward -8.600437458901776 loss 12556718.0
episode 10446 reward -12.255419327475282 loss 70870.2890625
episode 10447 reward -9.458464469815352 loss 38399.4609375
episode 10448 reward -10.003636611221683 loss 1880985.5


episode 10572 reward -12.199911066378773 loss 32862.0625
episode 10573 reward -10.00373311695673 loss 43520.8828125
episode 10574 reward -9.48347464553742 loss 1380132.5
episode 10575 reward -12.468809772616629 loss 13763857.0
episode 10576 reward -12.515506405446768 loss 49163.5703125
episode 10577 reward -12.515150390859251 loss 13098.734375
episode 10578 reward -11.569302227203895 loss 37362.921875
episode 10579 reward -10.544011416122698 loss 1406712.75
episode 10580 reward -12.226153575544021 loss 4888200.5
episode 10581 reward -10.308615599356072 loss 2212598.25
episode 10582 reward -12.44855836701762 loss 19549.2734375
episode 10583 reward -12.511442632639834 loss 39783.23828125
episode 10584 reward -10.571197538543984 loss 16702.294921875
episode 10585 reward -10.39617748877227 loss 42322.671875
episode 10586 reward -12.502877693411179 loss 25955.38671875
episode 10587 reward -7.793718936997671 loss 5752249.0
episode 10588 reward -12.484357940921353 loss 14380812.0
episode 1058

episode 10713 reward -10.033392517062481 loss 1643300.0
episode 10714 reward -8.482902874980015 loss 32435.794921875
episode 10715 reward -9.99884554441434 loss 40397716.0
episode 10716 reward -10.000762606986521 loss 29885.8046875
episode 10717 reward -12.199911066378773 loss 28277.49609375
episode 10718 reward -10.56257604461313 loss 27724.87109375
episode 10719 reward -7.751146318548719 loss 182071824.0
episode 10720 reward -12.56661089330848 loss 37624.5546875
episode 10721 reward -11.59529057867114 loss 34585.83203125
episode 10722 reward -12.50215576218362 loss 21924.455078125
episode 10723 reward -12.16851140769173 loss 26401.4453125
episode 10724 reward -12.44756530790839 loss 34523.3359375
episode 10725 reward -10.552125803300877 loss 29563.2734375
episode 10726 reward -10.003733068526815 loss 26850.384765625
episode 10727 reward -15.450779065113291 loss 10410227.0
episode 10728 reward -12.60653093686009 loss 13023.322265625
episode 10729 reward -8.523393356069258 loss 867383.

episode 10852 reward -10.55212580421905 loss 33160.59375
episode 10853 reward -10.00373311695673 loss 22546.734375
episode 10854 reward -12.48140251121621 loss 32090.244140625
episode 10855 reward -12.481141923721 loss 10356.6416015625
episode 10856 reward -12.477102409936585 loss 20417.56640625
episode 10857 reward -12.481141923721 loss 29670.509765625
episode 10858 reward -12.481141923721 loss 23813.66015625
episode 10859 reward -12.502881859263308 loss 21564.390625
episode 10860 reward -12.508177537611985 loss 63171.51171875
episode 10861 reward -7.679389901422729 loss 23415.1171875
episode 10862 reward -10.001838296596116 loss 8607.251953125
episode 10863 reward -10.54501851756382 loss 258631.8125
episode 10864 reward -11.58497808869385 loss 2266290.5
episode 10865 reward -11.600377809022662 loss 4868957.0
episode 10866 reward -12.481415078705059 loss 4104863.5
episode 10867 reward -12.485191539787948 loss 25135.716796875
episode 10868 reward -12.481141923721 loss 82753928.0
episod

episode 10993 reward -10.553173579754748 loss 1553957.25
episode 10994 reward -10.54767870806899 loss 34833.0078125
episode 10995 reward -9.465074588436112 loss 4944446.0
episode 10996 reward -10.003732992906071 loss 1342432.5
episode 10997 reward -12.515100158876589 loss 5467839.0
episode 10998 reward -9.448946981219162 loss 115779.9375
episode 10999 reward -10.391538482999664 loss 38749.390625
episode 11000 reward -12.468809772616629 loss 36055.40234375
episode 11001 reward -7.811872725429394 loss 5506182.5
episode 11002 reward -12.44729318733738 loss 14327.88671875
episode 11003 reward -12.255419327475282 loss 21748862.0
episode 11004 reward -9.99625958153258 loss 28259.30859375
episode 11005 reward -12.510726515692408 loss 7922238.5
episode 11006 reward -12.51188408164511 loss 29413016.0
episode 11007 reward -15.501752894685687 loss 977788.125
episode 11008 reward -8.585756528455267 loss 19605.73046875
episode 11009 reward -10.375806238398987 loss 43656.96484375
episode 11010 rewar

episode 11133 reward -12.226153575544021 loss 757793.3125
episode 11134 reward -12.467319316754764 loss 35172.60546875
episode 11135 reward -10.001198451020153 loss 18138868.0
episode 11136 reward -12.476082136824306 loss 13566.2890625
episode 11137 reward -7.828723233481179 loss 50437.0390625
episode 11138 reward -12.502881859263308 loss 11001.71484375
episode 11139 reward -12.478540064470192 loss 13603.5703125
episode 11140 reward -10.557825016326381 loss 410599.5
episode 11141 reward -10.000717409002485 loss 32878.80078125
episode 11142 reward -11.587520024814205 loss 1977247.25
episode 11143 reward -9.998285622442292 loss 19407.4140625
episode 11144 reward -12.509489544405575 loss 11733.001953125
episode 11145 reward -12.44729318733738 loss 78835.78125
episode 11146 reward -10.386176576166607 loss 40862.90234375
episode 11147 reward -12.484357940921353 loss 196520.640625
episode 11148 reward -12.50441438371052 loss 56131.5390625
episode 11149 reward -12.44729318733738 loss 71791302

episode 11271 reward -12.517326747854696 loss 7529.560546875
episode 11272 reward -12.481141923721 loss 16994.02734375
episode 11273 reward -12.479349559500605 loss 9876.2685546875
episode 11274 reward -12.481141923721 loss 8759855.0
episode 11275 reward -10.017475240471411 loss 33434.2109375
episode 11276 reward -10.553603319611902 loss 2326008.0
episode 11277 reward -11.55121107418706 loss 14309.2763671875
episode 11278 reward -10.017418054558247 loss 2484298.5
episode 11279 reward -8.562908357425549 loss 7865.119140625
episode 11280 reward -12.502881859263308 loss 2503268.5
episode 11281 reward -12.409569390393788 loss 178545.9375
episode 11282 reward -12.450238317922487 loss 13264.828125
episode 11283 reward -10.035844633270099 loss 17357.91015625
episode 11284 reward -15.454399802816932 loss 20554.517578125
episode 11285 reward -15.364639530263869 loss 17627.123046875
episode 11286 reward -12.514684494480772 loss 12117.529296875
episode 11287 reward -11.56922224781317 loss 31493.8

episode 11410 reward -12.561167779905158 loss 21572.578125
episode 11411 reward -8.515813756238847 loss 18631.7890625
episode 11412 reward -12.503692427150067 loss 38855020.0
episode 11413 reward -12.476810570338293 loss 295874.34375
episode 11414 reward -10.380065615734777 loss 31153.0234375
episode 11415 reward -10.002918838270283 loss 37733.578125
episode 11416 reward -10.002873898216215 loss 4068259.0
episode 11417 reward -12.482676283142379 loss 1374199424.0
episode 11418 reward -15.538434014192909 loss 39413.890625
episode 11419 reward -11.563237265008404 loss 39177.16796875
episode 11420 reward -10.544316788194537 loss 37576.73046875
episode 11421 reward -7.828723233481179 loss 13219.41796875
episode 11422 reward -12.481415078705059 loss 52784.28125
episode 11423 reward -10.631645846272423 loss 37564908.0
episode 11424 reward -7.793718936997671 loss 38674.578125
episode 11425 reward -10.018067953666256 loss 37970.546875
episode 11426 reward -7.7481250680271625 loss 1258452.125
e

episode 11550 reward -8.56291154505862 loss 25763.515625
episode 11551 reward -7.646318235722166 loss 17098.111328125
episode 11552 reward -10.384477318660592 loss 13345.6640625
episode 11553 reward -12.413817616819282 loss 240029.21875
episode 11554 reward -7.793718936997671 loss 1278849.25
episode 11555 reward -7.828723233481179 loss 21671.052734375
episode 11556 reward -10.560066115310468 loss 5580847.0
episode 11557 reward -10.562572129830164 loss 10064.53125
episode 11558 reward -10.003733068526815 loss 18115.279296875
episode 11559 reward -12.483925637836053 loss 156199.578125
episode 11560 reward -10.380393986583192 loss 16496533.0
episode 11561 reward -12.380859390300795 loss 24287.177734375
episode 11562 reward -10.00368766344166 loss 137843.765625
episode 11563 reward -10.002973445432573 loss 9449.509765625
episode 11564 reward -8.580660902380771 loss 246954.15625
episode 11565 reward -10.000859270504156 loss 28973.767578125
episode 11566 reward -12.226153575544021 loss 20156

episode 11689 reward -12.502901133245185 loss 13304.875
episode 11690 reward -10.559263251160525 loss 25752724.0
episode 11691 reward -7.721604005521401 loss 15510.5205078125
episode 11692 reward -12.502881859263308 loss 31860.2578125
episode 11693 reward -12.481141923721 loss 24374656.0
episode 11694 reward -10.380073557863604 loss 17834.7109375
episode 11695 reward -15.564454089029084 loss 9808.5283203125
episode 11696 reward -12.437442131981204 loss 18006.626953125
episode 11697 reward -12.446711725508088 loss 19936.3359375
episode 11698 reward -10.55212580421905 loss 9909.5205078125
episode 11699 reward -12.502881859263308 loss 1047382.3125
episode 11700 reward -12.484357940921353 loss 12098.0595703125
episode 11701 reward -10.00373311695673 loss 17200660.0
episode 11702 reward -12.445477402314841 loss 10900.083984375
episode 11703 reward -10.581997816103232 loss 774274.0625
episode 11704 reward -7.646318235722166 loss 13008.466796875
episode 11705 reward -15.515410806516424 loss 1

episode 11829 reward -10.00373311695673 loss 60060.01953125
episode 11830 reward -12.447537069311743 loss 62203.0234375
episode 11831 reward -10.018256137175875 loss 43793.46484375
episode 11832 reward -11.637937226503043 loss 55380.60546875
episode 11833 reward -11.55121107418706 loss 20377.3984375
episode 11834 reward -12.463415956440723 loss 49320.1875
episode 11835 reward -12.543606256351307 loss 45979.59765625
episode 11836 reward -12.468610037414932 loss 43105.04296875
episode 11837 reward -12.47552616665291 loss 10754.8369140625
episode 11838 reward -10.1087205312692 loss 25004108.0
episode 11839 reward -11.86591433217112 loss 11933.443359375
episode 11840 reward -12.226153575544021 loss 10684.8212890625
episode 11841 reward -12.44729318733738 loss 27890.328125
episode 11842 reward -10.003733068526815 loss 10839.255859375
episode 11843 reward -8.577382716624198 loss 24708.078125
episode 11844 reward -12.448661913191971 loss 11591.671875
episode 11845 reward -12.481141923721 loss

episode 11970 reward -10.017703350890313 loss 3215356.0
episode 11971 reward -6.166630277763489 loss 5155673.5
episode 11972 reward -12.481415078705059 loss 65085.02734375
episode 11973 reward -15.511300791422647 loss 78366.71875
episode 11974 reward -12.45204304071345 loss 23693.83984375
episode 11975 reward -10.569125981824095 loss 1132250.625
episode 11976 reward -11.56638493025934 loss 37369640.0
episode 11977 reward -7.632359789746733 loss 889520256.0
episode 11978 reward -12.484357940921353 loss 765278.4375
episode 11979 reward -8.476791048083003 loss 81733.2109375
episode 11980 reward -10.537913387377142 loss 1865931.375
episode 11981 reward -12.46999736648563 loss 25056.640625
episode 11982 reward -12.563264304451433 loss 1026446.875
episode 11983 reward -11.637937226503043 loss 5443530.0
episode 11984 reward -10.000670745376343 loss 84502.765625
episode 11985 reward -12.186171974265394 loss 84597.40625
episode 11986 reward -10.555034530202231 loss 19623.193359375
episode 11987

episode 12110 reward -10.003617293053098 loss 43608800.0
episode 12111 reward -15.596201006202175 loss 23170.951171875
episode 12112 reward -10.399813315589915 loss 1202834.75
episode 12113 reward -10.544316788194537 loss 24435392.0
episode 12114 reward -8.56291154505862 loss 23688.712890625
episode 12115 reward -12.48618252621977 loss 2793503.0
episode 12116 reward -10.017642278122208 loss 16195.208984375
episode 12117 reward -12.511442632639834 loss 2934260.5
episode 12118 reward -10.017651387626186 loss 89167080.0
episode 12119 reward -12.484357940921353 loss 67572488.0
episode 12120 reward -12.481207655020144 loss 21002.67578125
episode 12121 reward -12.32543190554858 loss 11713190.0
episode 12122 reward -12.480927330948541 loss 152960.578125
episode 12123 reward -9.47171621108792 loss 1015725.6875
episode 12124 reward -12.481141923721 loss 80730560.0
episode 12125 reward -15.508799225594174 loss 3395235.0
episode 12126 reward -12.45204304071345 loss 21935.44921875
episode 12127 re

episode 12251 reward -9.458464469815352 loss 18861.658203125
episode 12252 reward -11.848251389546386 loss 47361.4921875
episode 12253 reward -12.484357940921353 loss 6197862.0
episode 12254 reward -10.397718494632109 loss 20734.607421875
episode 12255 reward -10.01719887207053 loss 62859.71875
episode 12256 reward -10.544316788194537 loss 678325.6875
episode 12257 reward -11.600406422176349 loss 1201748.75
episode 12258 reward -12.199911066378773 loss 40150.61328125
episode 12259 reward -12.407054259343298 loss 25438.33984375
episode 12260 reward -10.665871330544036 loss 229297.328125
episode 12261 reward -8.667832963037249 loss 31919.90625
episode 12262 reward -12.454319384311217 loss 564970.75
episode 12263 reward -12.116316057219816 loss 15634.365234375
episode 12264 reward -15.081491212965156 loss 36603.8671875
episode 12265 reward -15.463213808734416 loss 24746806.0
episode 12266 reward -12.212139548116234 loss 29845.1796875
episode 12267 reward -12.472929943907886 loss 26505.517

episode 12391 reward -12.482676283142379 loss 37614.56640625
episode 12392 reward -12.502881859263308 loss 9743.76171875
episode 12393 reward -10.554181250984328 loss 21525.55859375
episode 12394 reward -10.003664005588213 loss 10555.81640625
episode 12395 reward -10.538007360424828 loss 13139.5498046875
episode 12396 reward -9.996658130903478 loss 13956.0
episode 12397 reward -12.567465270879644 loss 27014.005859375
episode 12398 reward -12.473425126901539 loss 18036.6015625
episode 12399 reward -12.44729318733738 loss 96108.671875
episode 12400 reward -10.00373311695673 loss 11058425.0
episode 12401 reward -9.532852756822901 loss 39178.5625
episode 12402 reward -8.491730953787162 loss 37849888.0
episode 12403 reward -12.476082136824306 loss 339911.6875
episode 12404 reward -10.002884414001066 loss 60610.69140625
episode 12405 reward -8.56291154505862 loss 138824.34375
episode 12406 reward -12.46999736648563 loss 114511.078125
episode 12407 reward -12.512064017763404 loss 11829.863281

episode 12531 reward -12.482676283142379 loss 36790.23046875
episode 12532 reward -12.484357940921353 loss 71078088.0
episode 12533 reward -8.486843040347868 loss 15186.2099609375
episode 12534 reward -9.458062356578518 loss 1265009.25
episode 12535 reward -12.479541554873924 loss 41278.76171875
episode 12536 reward -12.44729318733738 loss 490003.34375
episode 12537 reward -12.221492603003876 loss 4498928.0
episode 12538 reward -9.465071167433251 loss 49694.078125
episode 12539 reward -15.455478137623421 loss 47577.0234375
episode 12540 reward -12.462510900626722 loss 7305704.5
episode 12541 reward -12.224910123038356 loss 19331.419921875
episode 12542 reward -11.002897920865689 loss 38351.609375
episode 12543 reward -15.4929158476137 loss 48429.7109375
episode 12544 reward -12.502881859263308 loss 40689.97265625
episode 12545 reward -10.544316788307217 loss 44315.62109375
episode 12546 reward -10.562820730521917 loss 65480.578125
episode 12547 reward -12.411644414078252 loss 12658.716

episode 12671 reward -8.476978573328982 loss 33960.54296875
episode 12672 reward -12.468809772616629 loss 36062.6875
episode 12673 reward -10.386082651652295 loss 22472.525390625
episode 12674 reward -12.44855836701762 loss 12975.27734375
episode 12675 reward -12.476409651937608 loss 44427.625
episode 12676 reward -12.467319316754764 loss 17620.55078125
episode 12677 reward -10.017578456600532 loss 10256.4462890625
episode 12678 reward -12.44855836701762 loss 11957.115234375
episode 12679 reward -10.380073557863604 loss 21752.5
episode 12680 reward -12.486304000478642 loss 13114.7451171875
episode 12681 reward -10.003722966206679 loss 12448.6982421875
episode 12682 reward -12.436589538248427 loss 70460088.0
episode 12683 reward -9.998907018410879 loss 13841.2255859375
episode 12684 reward -12.484374582188725 loss 9854.958984375
episode 12685 reward -12.502881859263308 loss 1346810.625
episode 12686 reward -12.401272430235155 loss 17288.18359375
episode 12687 reward -12.413878951453714 

episode 12809 reward -15.478816535164878 loss 25181.205078125
episode 12810 reward -12.382185628541674 loss 38982.56640625
episode 12811 reward -12.44702878211453 loss 28652.365234375
episode 12812 reward -9.996312217253953 loss 23259.03515625
episode 12813 reward -12.44729318733738 loss 20991.3984375
episode 12814 reward -15.594585222536518 loss 38534976.0
episode 12815 reward -12.502881859263308 loss 32413.439453125
episode 12816 reward -12.44729318733738 loss 148236144.0
episode 12817 reward -12.476082136824306 loss 2227046.75
episode 12818 reward -12.63055614012076 loss 769337.25
episode 12819 reward -12.428902201736342 loss 19025.609375
episode 12820 reward -12.175766458360103 loss 54955.61328125
episode 12821 reward -8.009054135102724 loss 14715.3701171875
episode 12822 reward -12.450238317922487 loss 374750.15625
episode 12823 reward -12.447537069311743 loss 30259.03125
episode 12824 reward -11.576004934485713 loss 25620.84375
episode 12825 reward -12.484357940921353 loss 364032

In [None]:
plt.plot(range(episode_count),all_rewards)
plt.show()

In [None]:
plt.plot(range(len(all_losses)),all_losses)
plt.show()

In [None]:
agent.q_model.save_weights('q_model.h5')
agent.target_q_model.save_weights('target_model.h5')