In [1]:
import numpy as np
import matplotlib.pyplot as plt
from time import sleep

class ENV:
    def __init__(self, map_size, obs_pos1, obs_pos2, robot_start_pos, goal_pos):
        self.map_size = map_size # integer
        self.obs_pos1 = obs_pos1 # [a,b], 2by1 list
        self.obs_pos2 = obs_pos2 # [a,b], 2by1 list
        self.goal_pos = goal_pos
        self.robot_pos1 = robot_start_pos[0:2] #[a,b] 2by1 list
        self.robot_pos2 = robot_start_pos[2:4]
        # set the walls
        self.fig = plt.figure()
        ax = plt.axes(xlim=(-0.5,self.map_size), ylim=(-0.5,self.map_size))  
        #self.render_env()
    def render_env(self):        
        # draw the obstacles and goal
        obs1 = plt.scatter(self.obs_pos1[0], self.obs_pos1[1], c='r', marker = 's', linewidths = 5) # have to check whether we can receive <list or np.array>        
        obs2 = plt.scatter(self.obs_pos2[0], self.obs_pos2[1], c='r', marker = 's', linewidths = 5)
        goal = plt.scatter(self.goal_pos[0], self.goal_pos[1], c='g', marker='x', linewidths = 4)
        # draw the robot                
        ro1 = plt.scatter(self.robot_pos1[0], self.robot_pos1[1], c='b', linewidths = 3)
        ro2 = plt.scatter(self.robot_pos2[0], self.robot_pos2[1], c='b', linewidths = 3)        
        self.fig.canvas.draw()   
        sleep(0.2)
        ro1.remove()
        ro2.remove()        
    def update(self, robot_current_pos):
        self.robot_pos1 = robot_current_pos[0:2]
        self.robot_pos2 = robot_current_pos[2:4]        

    

## DQN to cooperate 2 Mobile Manipulator
* made by hotae
* network includes one agent's action
* full observable state

In [2]:
#Plotting setting
%matplotlib notebook
import matplotlib.pyplot as plt
from matplotlib import animation
from time import sleep

import numpy as np
import tensorflow as tf
import random
import dqn_cooperation
from collections import deque

import time
start = time.time()

# Create New environment with transition law
ACTION_NUM = 8
INPUT_SIZE = 10
OUTPUT_SIZE = ACTION_NUM
VEL = 1
TIME_GAP = 1
MAP_SIZE = 7
def annealing_epsilon(episode, min_e, max_e, target_episode):
    slope = (min_e - max_e) / (target_episode)
    intercept = max_e

    return max(min_e, slope * episode + intercept)

class new_env:     
    def create_env(self, arg_state=[0.,0.,1.0,1.0], g_pos=[5,5], obs_pos1=[0,7], obs_pos2=[7,0], obs_size=5):
        self.state = np.array(arg_state+g_pos+obs_pos1+obs_pos2) # reset
        self.n_state = np.array(arg_state+g_pos+obs_pos1+obs_pos2)
        self.obstacle_size = obs_size
        return self.state, self.obstacle_size
        
    def next_step(self, arg_state, arg_action):
        self._fail = False
        self.reward = 0
        # choose the action
        arg_action1 = arg_action 
        
        '''position update through action
        UP = 0, DOWN = 1, LEFT = 2, RIGHT = 3'''        
        # agent located on the first state 
        if arg_action1 == 0:
            self.n_state[0:4] = arg_state[0:4] + np.array([0,1,0,0])*VEL*TIME_GAP # move the 1st agent
        elif arg_action1 == 1:
            self.n_state[0:4] = arg_state[0:4] + np.array([0,-1,0,0])*VEL*TIME_GAP
        elif arg_action1 == 2:
            self.n_state[0:4] = arg_state[0:4] + np.array([-1,0,0,0])*VEL*TIME_GAP
        elif arg_action1 == 3:
            self.n_state[0:4] = arg_state[0:4] + np.array([1,0,0,0])*VEL*TIME_GAP
        elif arg_action1 == 4:
            self.n_state[0:4] = arg_state[0:4] + np.array([0,0,0,1])*VEL*TIME_GAP
        elif arg_action1 == 5:
            self.n_state[0:4] = arg_state[0:4] + np.array([0,0,0,-1])*VEL*TIME_GAP
        elif arg_action1 == 6:
            self.n_state[0:4] = arg_state[0:4] + np.array([0,0,-1,0])*VEL*TIME_GAP
        else:
            self.n_state[0:4] = arg_state[0:4] + np.array([0,0,1,0])*VEL*TIME_GAP      

       
        '''get the reward'''
        if np.linalg.norm((self.n_state[0:2]+self.n_state[2:4])/2-self.n_state[4:6])!=0:
            self.reward = (1/np.linalg.norm((self.n_state[0:2]+self.n_state[2:4])/2-self.n_state[4:6])-\
            1/np.linalg.norm((arg_state[0:2]+arg_state[2:4])/2-self.n_state[4:6]))*800
        if np.linalg.norm(self.n_state[0:2]-self.n_state[6:8])<1 or np.linalg.norm(self.n_state[2:4]-self.n_state[6:8])<1:
            self.reward = self.reward-2 # collision
        if np.linalg.norm(self.n_state[0:2]-self.n_state[8:10])<1 or np.linalg.norm(self.n_state[2:4]-self.n_state[8:10])<1:
            self.reward = self.reward-2 # collision
        if np.linalg.norm((self.n_state[0:2]+self.n_state[2:4])/2-self.n_state[4:6])<1 and np.linalg.norm(self.n_state[0:2]-self.n_state[2:4])<3: # approximately set condition
            self.reward = self.reward + 100 # achieve goal
            self._fail = True
        if np.linalg.norm(self.n_state[0:2]-self.n_state[2:4])>3:
            self.reward = self.reward-6*np.linalg.norm(self.n_state[0:2]-self.n_state[2:4]) # drop the object
            #self._fail = True            
        '''
        if any(x<0 for x in self.n_state):            
            self.reward = -50 # away from the map
            self._fail = True   
        if any(x>7 for x in self.n_state):            
            self.reward = -80 # away from the map
            self._fail = True 
        '''
        return self.n_state, self.reward, self._fail
    
#env = new_env() 
#state, g_pos, o_pos, o_size = env.create_env() # set the enviornment
DISCOUNT_RATE = 0.98
REPLAY_MEMORY = 10000
BATCH_SIZE = 50
MAX_EPI = 10000
MAX_STEP = 300
# minimum epsilon for epsilon greedy
MIN_E = 0.1
# epsilon will be `MIN_E` at `EPSILON_DECAYING_EPISODE`
EPSILON_DECAYING_STEP = MAX_STEP * 0.05
TARGET_UPDATE_FQ = 20

def train_minibatch(mainDQN, targetDQN, minibatch):
    state_array = np.array([x[0] for x in minibatch])
    action_array = np.array([x[1] for x in minibatch]) # [ x among 0~3] * BATCH_SIZE
    reward_array = np.array([x[2] for x in minibatch])
    n_state_array = np.array([x[3] for x in minibatch])
    _fail_array = np.array([x[4] for x in minibatch])    
    
    X_batch = state_array   
    Y_batch = mainDQN.predict(state_array) # 4 elements * BATCH_SIZE 
    
    # consideration for action constraint
    target_q = targetDQN.predict(n_state_array)
    j = 0
    for x in n_state_array:
        t_dqn = targetDQN.predict(x) #[[1 2 3 ...]]
        t_dqn = t_dqn.flatten() # [1 2 3 ...]
        if x[0]<TIME_GAP*VEL:
            t_dqn[2] = -float("inf") # put a large num on action 2(left)
        if x[1]<TIME_GAP*VEL:
            t_dqn[1] = -float("inf") # put a large num on action 1(down)
        if x[0] > MAP_SIZE - TIME_GAP*VEL:
            t_dqn[3] = -float("inf") # remove action 3(right)
        if x[1] > MAP_SIZE - TIME_GAP*VEL:
            t_dqn[0] = -float("inf") # remove action 0(up)  
        if x[2]<TIME_GAP*VEL:
            t_dqn[6] = -float("inf") # put a large num on action 6(left)
        if x[3]<TIME_GAP*VEL:
            t_dqn[5] = -float("inf") # put a large num on action 5(down)
        if x[2] > MAP_SIZE - TIME_GAP*VEL:
            t_dqn[7] = -float("inf") # remove action 7(right)
        if x[3] > MAP_SIZE - TIME_GAP*VEL:
            t_dqn[4] = -float("inf") # remove action 4(up)   
        target_q[j] = t_dqn
        j += 1
        
    Q_target = reward_array + DISCOUNT_RATE*np.max(target_q, axis=1)*~_fail_array # if fail, Q = reward
    
    Y_batch[np.arange(len(X_batch)), action_array] = Q_target
    
    # Train
    cost_batch, _ = mainDQN.update(X_batch, Y_batch)
    return cost_batch

def get_copy_var_ops(dest_scope_name = "target", src_scope_name = "main"):
    op_holder = []
    
    src_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=src_scope_name)
    dest_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=dest_scope_name)

    for src_var, dest_var in zip(src_vars, dest_vars):
        op_holder.append(dest_var.assign(src_var.value()))

    return op_holder


def main():
    replay_buffer = deque(maxlen=REPLAY_MEMORY) # detract element from both sides    
    total_reward_buffer = []
    step_buffer = []
    new_graph = tf.Graph()
    with tf.Session(graph=new_graph)) as sess:
        mainDQN = dqn_cooperation.DQN(sess, INPUT_SIZE, OUTPUT_SIZE)
        mainDQN.build_network(16,32,0.002)
        targetDQN = dqn_cooperation.DQN(sess, INPUT_SIZE, OUTPUT_SIZE)
        targetDQN.build_network(16,32,0.002)
        init = tf.global_variables_initializer()
        sess.run(init)
        
         #restore model
        '''
        new_saver = tf.train.import_meta_graph("./dqn_single_action.ckpt.meta")        
        new_saver.restore(sess,"./dqn_single_action.ckpt")
        '''
        # initial copy main q -> target q
        copy_ops = get_copy_var_ops(dest_scope_name = "target", src_scope_name = "main")
        sess.run(copy_ops)
        
        reward_accum_last100 = 0
        reward_sum = 0


        game = ENV(MAP_SIZE, [0,7], [7,0], [0,0,1,1], [5,5]) # __init__(self, map_size, obs_pos1, obs_pos2, robot_start_pos, goal_pos):
        game.render_env()
        
        for episode in range(MAX_EPI):
            
            _fail = False
            step_count = 0 # how many moves included in an episode
            env1 = new_env()
            state, _= env1.create_env() # get only state, return self.state, self.goal_pos, self.obstacle_pos1, self.obstacle_pos2, self.obstacle_size

            goal_ = True
             
            reward_sum = 0    
            
            while not _fail:
                e = annealing_epsilon(step_count, MIN_E, 1.0, EPSILON_DECAYING_STEP)
                # after sufficient learning, we present the game scene        
                if episode > MAX_EP:
                    game.update(state)
                    game.render_env()
                    
                step_count += 1
                # apply constraint about walls                   
                if np.random.rand()< e:
                    act_candi = range(ACTION_NUM)
                    if state[0]<TIME_GAP*VEL:
                        act_candi.remove(2) # remove action 2(left)
                    if state[1]<TIME_GAP*VEL:
                        act_candi.remove(1) # remove action 1(down)
                    if state[0] > MAP_SIZE - TIME_GAP*VEL:
                        act_candi.remove(3) # remove action 3(right)000000
                    if state[1] > MAP_SIZE - TIME_GAP*VEL:
                        act_candi.remove(0) # remove action 0(up)    
                    if state[2]<TIME_GAP*VEL:
                        act_candi.remove(6) # remove action 6(left)
                    if state[3]<TIME_GAP*VEL:
                        act_candi.remove(5) # remove action 5(down)
                    if state[2] > MAP_SIZE - TIME_GAP*VEL:
                        act_candi.remove(7) # remove action 7(right)
                    if state[3] > MAP_SIZE - TIME_GAP*VEL:
                        act_candi.remove(4) # remove action 4(up)    
                    act_candi1 = np.array(act_candi)
                    action_l = random.sample(act_candi1,1) # choose upL, down, left, right 
                    action = action_l[0]
                else:  
                    act_candi = mainDQN.predict(state)
                    act_candi = act_candi.flatten()
                    
                    if state[0]<TIME_GAP*VEL:
                        act_candi[2] = -float("inf") # put a large num on action 2(left)
                    if state[1]<TIME_GAP*VEL:
                        act_candi[1] = -float("inf") # put a large num on action 1(down)
                    if state[0] > MAP_SIZE - TIME_GAP*VEL:
                        act_candi[3] = -float("inf") # remove action 3(right)
                    if state[1] > MAP_SIZE - TIME_GAP*VEL:
                        act_candi[0] = -float("inf") # remove action 0(up)  
                    if state[2]<TIME_GAP*VEL:
                        act_candi[6] = -float("inf") # put a large num on action 6(left)
                    if state[3]<TIME_GAP*VEL:
                        act_candi[5] = -float("inf") # put a large num on action 5(down)
                    if state[2] > MAP_SIZE - TIME_GAP*VEL:
                        act_candi[7] = -float("inf") # remove action 7(right)
                    if state[3] > MAP_SIZE - TIME_GAP*VEL:
                        act_candi[4] = -float("inf") # remove action 4(up)    
                    action = np.argmax(act_candi)   
                    '''
                    dd_predict = mainDQN.predict(state).flatten()
                    aa = np.max(dd_predict)
                    max_indx, = np.where(dd_predict==aa)                    
                    action = random.sample(max_indx,1)[0]
                    '''
                    
                n_state, reward, _fail = env1.next_step(state, action)
                replay_buffer.append((state, action, reward, n_state, _fail))
                '''
                get the reward once every two steps
                
                if step_count%2 == 1:
                    state_mem = state
                    action_mem = action
                    n_state, reward, _fail = env1.next_step(state, action) # have to input the action 

                    if _fail == False:
                        reward = 0 # do not allocate
                    else:
                        replay_buffer.append((state, action, reward, n_state, _fail))
                if step_count%2 == 0:
                    n_state, reward, _fail = env1.next_step(state, action) # have to input the action 
                    replay_buffer.append((state_mem, action_mem, reward/2, state, _fail)) # each action has responsibility of reward equally
                    replay_buffer.append((state, action, reward/2, n_state, _fail)) 
                # if count >30, stop that episode and start new episode, counted every time one agent moves
                '''
                if step_count >MAX_STEP-1:
                    #reward = -30
                    _fail = True   
                    goal_ = False
                
                reward_sum += reward    # sum total reward and penalty about long time(-0.5)                                  
                state = n_state
             
                if len(replay_buffer)>BATCH_SIZE*4:
                    minibatch = random.sample(replay_buffer, BATCH_SIZE)                    
                    train_minibatch(mainDQN, targetDQN, minibatch)
                if step_count % TARGET_UPDATE_FQ == 0:
                    sess.run(copy_ops)
                    
            total_reward_buffer.append(reward_sum)
            step_buffer.append(step_count)
            if goal_ == True:
                print("[Episode {:>5}]  total reward: {:>5}  steps: {:>5}, success".format(episode, reward_sum, step_count))
            else:
                print("[Episode {:>5}]  total reward: {:>5}  steps: {:>5}, failure".format(episode, reward_sum, step_count))
    
        #print("Success ratio: {}".format(reward_accum_last100/100))
        fig1 =plt.figure()
        plt.plot(range(MAX_EPI), total_reward_buffer)
        plt.show()
        
        # save model        
        save_path = new_saver.save(sess, "./dqn_single_action.ckpt")   
        # save data (reward, step)
        f = open("reward_s.txt", 'w')
        for i in range(len(total_reward_buffer)):
            f.write("{:>5}  {:>5}\n".format(total_reward_buffer[i], step_buffer[i]))
        f.close
        
if __name__ == "__main__":
    main()    
    end = time.time()-start
    print(end)         



  from ._conv import register_converters as _register_converters


<IPython.core.display.Javascript object>

[Episode     0]  total reward: -292.780808811, success
[Episode     1]  total reward: -1662.83510444, failure
[Episode     2]  total reward: -542.835104445, success
[Episode     3]  total reward: -1566.83510444, failure
[Episode     4]  total reward: -1060.83510444, success
[Episode     5]  total reward: -1026.78080881, failure
[Episode     6]  total reward: -770.780808811, failure
[Episode     7]  total reward: -880.835104445, success
[Episode     8]  total reward: -900.780808811, failure
[Episode     9]  total reward: -1014.78080881, failure
[Episode    10]  total reward: -844.835104445, failure
[Episode    11]  total reward: -1146.83510444, failure
[Episode    12]  total reward: -1228.83510444, failure
[Episode    13]  total reward: -1076.78080881, failure
[Episode    14]  total reward: -638.780808811, failure
[Episode    15]  total reward: -780.780808811, failure
[Episode    16]  total reward: -534.835104445, success
[Episode    17]  total reward: -626.835104445, success
[Episode  

[Episode   151]  total reward: -906.835104445, success
[Episode   152]  total reward: -1294.83510444, success
[Episode   153]  total reward: -214.835104445, success
[Episode   154]  total reward: -442.835104445, success
[Episode   155]  total reward: -88.835104445, success
[Episode   156]  total reward: -42.835104445, success
[Episode   157]  total reward: -478.835104445, success
[Episode   158]  total reward: -178.835104445, success
[Episode   159]  total reward: -154.835104445, success
[Episode   160]  total reward: -322.835104445, success
[Episode   161]  total reward: -70.835104445, success
[Episode   162]  total reward: -172.835104445, success
[Episode   163]  total reward: -46.835104445, success
[Episode   164]  total reward: -190.835104445, success
[Episode   165]  total reward: -694.835104445, success
[Episode   166]  total reward: -40.835104445, success
[Episode   167]  total reward: -312.780808811, success
[Episode   168]  total reward: -106.835104445, success
[Episode   169]

[Episode   302]  total reward: -160.835104445, success
[Episode   303]  total reward: -594.835104445, success
[Episode   304]  total reward: -76.835104445, success
[Episode   305]  total reward: -94.835104445, success
[Episode   306]  total reward: -94.835104445, success
[Episode   307]  total reward: -118.835104445, success
[Episode   308]  total reward: -88.835104445, success
[Episode   309]  total reward: -88.835104445, success
[Episode   310]  total reward: -82.835104445, success
[Episode   311]  total reward: -112.835104445, success
[Episode   312]  total reward: -88.835104445, success
[Episode   313]  total reward: -94.835104445, success
[Episode   314]  total reward: -82.835104445, success
[Episode   315]  total reward: -100.835104445, success
[Episode   316]  total reward: -94.835104445, success
[Episode   317]  total reward: 17.164895555, success
[Episode   318]  total reward: -94.835104445, success
[Episode   319]  total reward: -88.835104445, success
[Episode   320]  total r

[Episode   459]  total reward: -76.835104445, success
[Episode   460]  total reward: -94.835104445, success
[Episode   461]  total reward: -88.835104445, success
[Episode   462]  total reward: -100.835104445, success
[Episode   463]  total reward: -178.835104445, success
[Episode   464]  total reward: -88.835104445, success
[Episode   465]  total reward: -94.835104445, success
[Episode   466]  total reward: -114.780808811, success
[Episode   467]  total reward: -100.835104445, success
[Episode   468]  total reward: -88.835104445, success
[Episode   469]  total reward: -94.835104445, success
[Episode   470]  total reward: -88.835104445, success
[Episode   471]  total reward: -100.835104445, success
[Episode   472]  total reward: 47.164895555, success
[Episode   473]  total reward: 65.164895555, success
[Episode   474]  total reward: -360.835104445, success
[Episode   475]  total reward: 83.164895555, success
[Episode   476]  total reward: -112.835104445, success
[Episode   477]  total r

[Episode   615]  total reward: -76.835104445, success
[Episode   616]  total reward: -94.835104445, success
[Episode   617]  total reward: -82.835104445, success
[Episode   618]  total reward: -88.835104445, success
[Episode   619]  total reward: -88.835104445, success
[Episode   620]  total reward: -88.835104445, success
[Episode   621]  total reward: -100.835104445, success
[Episode   622]  total reward: -100.835104445, success
[Episode   623]  total reward: -88.835104445, success
[Episode   624]  total reward: -100.835104445, success
[Episode   625]  total reward: -94.835104445, success
[Episode   626]  total reward: -82.835104445, success
[Episode   627]  total reward: -100.835104445, success
[Episode   628]  total reward: -94.835104445, success
[Episode   629]  total reward: -328.835104445, success
[Episode   630]  total reward: -148.835104445, success
[Episode   631]  total reward: -82.835104445, success
[Episode   632]  total reward: -94.835104445, success
[Episode   633]  total

[Episode   766]  total reward: -1312.83510444, failure
[Episode   767]  total reward: -934.835104445, failure
[Episode   768]  total reward: -1038.83510444, failure
[Episode   769]  total reward: -394.835104445, success
[Episode   770]  total reward: -1164.83510444, failure
[Episode   771]  total reward: -738.780808811, success
[Episode   772]  total reward: -132.780808811, success
[Episode   773]  total reward: -460.780808811, success
[Episode   774]  total reward: -100.835104445, success
[Episode   775]  total reward: -70.835104445, success
[Episode   776]  total reward: -424.835104445, success
[Episode   777]  total reward: -64.835104445, success
[Episode   778]  total reward: -22.835104445, success
[Episode   779]  total reward: -94.835104445, success
[Episode   780]  total reward: -556.835104445, failure
[Episode   781]  total reward: -20.835104445, success
[Episode   782]  total reward: -628.835104445, success
[Episode   783]  total reward: -70.835104445, success
[Episode   784] 

[Episode   918]  total reward: -88.835104445, success
[Episode   919]  total reward: -64.835104445, success
[Episode   920]  total reward: 65.164895555, success
[Episode   921]  total reward: -30.835104445, success
[Episode   922]  total reward: 83.164895555, success
[Episode   923]  total reward: -22.835104445, success
[Episode   924]  total reward: -154.835104445, failure
[Episode   925]  total reward: -42.7808088114, success
[Episode   926]  total reward: -28.835104445, success
[Episode   927]  total reward: 89.164895555, success
[Episode   928]  total reward: -28.835104445, success
[Episode   929]  total reward: -1008.78080881, success
[Episode   930]  total reward: -10.835104445, success
[Episode   931]  total reward: -670.835104445, failure
[Episode   932]  total reward: -112.835104445, success
[Episode   933]  total reward: -658.835104445, failure
[Episode   934]  total reward: -58.835104445, failure
[Episode   935]  total reward: -286.835104445, success
[Episode   936]  total r

[Episode  1070]  total reward: -1146.83510444, success
[Episode  1071]  total reward: -1030.83510444, failure
[Episode  1072]  total reward: -1674.83510444, failure
[Episode  1073]  total reward: -1712.83510444, failure
[Episode  1074]  total reward: -1768.83510444, failure
[Episode  1075]  total reward: -1654.83510444, success
[Episode  1076]  total reward: -1432.83510444, success
[Episode  1077]  total reward: -1738.83510444, failure
[Episode  1078]  total reward: -1546.83510444, failure
[Episode  1079]  total reward: -1434.83510444, failure
[Episode  1080]  total reward: -502.835104445, success
[Episode  1081]  total reward: -372.835104445, success
[Episode  1082]  total reward: -202.835104445, success
[Episode  1083]  total reward: -664.835104445, success
[Episode  1084]  total reward: -472.835104445, failure
[Episode  1085]  total reward: -1280.83510444, success
[Episode  1086]  total reward: -432.780808811, success
[Episode  1087]  total reward: -204.835104445, success
[Episode  

[Episode  1221]  total reward: -46.835104445, success
[Episode  1222]  total reward: -16.835104445, success
[Episode  1223]  total reward: -4.83510444496, failure
[Episode  1224]  total reward: -34.835104445, success
[Episode  1225]  total reward: -94.835104445, success
[Episode  1226]  total reward: -106.835104445, success
[Episode  1227]  total reward: -198.780808811, failure
[Episode  1228]  total reward: -58.835104445, success
[Episode  1229]  total reward: -382.835104445, success
[Episode  1230]  total reward: -46.835104445, success
[Episode  1231]  total reward: -82.835104445, success
[Episode  1232]  total reward: -160.835104445, success
[Episode  1233]  total reward: -378.780808811, failure
[Episode  1234]  total reward: -136.835104445, success
[Episode  1235]  total reward: 9.2191911886, success
[Episode  1236]  total reward: 87.2191911886, success
[Episode  1237]  total reward: -32.7808088114, success
[Episode  1238]  total reward: -12.7808088114, success
[Episode  1239]  tot

[Episode  1372]  total reward: -130.835104445, success
[Episode  1373]  total reward: -52.835104445, success
[Episode  1374]  total reward: -840.780808811, success
[Episode  1375]  total reward: -34.835104445, success
[Episode  1376]  total reward: -52.835104445, success
[Episode  1377]  total reward: 107.164895555, success
[Episode  1378]  total reward: -310.835104445, success
[Episode  1379]  total reward: -88.835104445, success
[Episode  1380]  total reward: -70.835104445, success
[Episode  1381]  total reward: 71.164895555, success
[Episode  1382]  total reward: -148.835104445, success
[Episode  1383]  total reward: -82.835104445, success
[Episode  1384]  total reward: -226.835104445, success
[Episode  1385]  total reward: -52.835104445, success
[Episode  1386]  total reward: -898.835104445, success
[Episode  1387]  total reward: 101.164895555, success
[Episode  1388]  total reward: -952.835104445, success
[Episode  1389]  total reward: 101.164895555, success
[Episode  1390]  total

[Episode  1523]  total reward: -310.835104445, success
[Episode  1524]  total reward: -52.835104445, success
[Episode  1525]  total reward: -814.835104445, success
[Episode  1526]  total reward: -382.835104445, success
[Episode  1527]  total reward: -88.835104445, success
[Episode  1528]  total reward: -262.835104445, success
[Episode  1529]  total reward: -178.835104445, success
[Episode  1530]  total reward: -358.835104445, success
[Episode  1531]  total reward: -256.835104445, success
[Episode  1532]  total reward: -634.835104445, success
[Episode  1533]  total reward: -484.835104445, success
[Episode  1534]  total reward: -366.780808811, success
[Episode  1535]  total reward: -244.835104445, success
[Episode  1536]  total reward: -142.835104445, success
[Episode  1537]  total reward: -106.835104445, success
[Episode  1538]  total reward: -640.835104445, success
[Episode  1539]  total reward: -184.835104445, success
[Episode  1540]  total reward: -190.835104445, success
[Episode  15

[Episode  1679]  total reward: -76.835104445, success
[Episode  1680]  total reward: -70.835104445, success
[Episode  1681]  total reward: -70.835104445, success
[Episode  1682]  total reward: -64.835104445, success
[Episode  1683]  total reward: -76.835104445, success
[Episode  1684]  total reward: -58.835104445, success
[Episode  1685]  total reward: -64.835104445, success
[Episode  1686]  total reward: -70.835104445, success
[Episode  1687]  total reward: -124.835104445, success
[Episode  1688]  total reward: -82.835104445, success
[Episode  1689]  total reward: -70.835104445, success
[Episode  1690]  total reward: -70.835104445, success
[Episode  1691]  total reward: -88.835104445, success
[Episode  1692]  total reward: -70.835104445, success
[Episode  1693]  total reward: -82.835104445, success
[Episode  1694]  total reward: -208.835104445, success
[Episode  1695]  total reward: -76.835104445, success
[Episode  1696]  total reward: -76.835104445, success
[Episode  1697]  total rew

[Episode  1831]  total reward: -64.835104445, success
[Episode  1832]  total reward: -58.835104445, success
[Episode  1833]  total reward: -90.7808088114, success
[Episode  1834]  total reward: -82.835104445, success
[Episode  1835]  total reward: -76.835104445, success
[Episode  1836]  total reward: -76.835104445, success
[Episode  1837]  total reward: -76.835104445, success
[Episode  1838]  total reward: -112.835104445, success
[Episode  1839]  total reward: -70.835104445, success
[Episode  1840]  total reward: -70.835104445, success
[Episode  1841]  total reward: -46.835104445, success
[Episode  1842]  total reward: -76.835104445, success
[Episode  1843]  total reward: -46.835104445, success
[Episode  1844]  total reward: -196.835104445, success
[Episode  1845]  total reward: -64.835104445, success
[Episode  1846]  total reward: -64.835104445, success
[Episode  1847]  total reward: -52.835104445, success
[Episode  1848]  total reward: -70.835104445, success
[Episode  1849]  total re

[Episode  1982]  total reward: -1734.78080881, failure
[Episode  1983]  total reward: -1006.83510444, failure
[Episode  1984]  total reward: -300.780808811, success
[Episode  1985]  total reward: -1652.78080881, failure
[Episode  1986]  total reward: -274.835104445, success
[Episode  1987]  total reward: -1384.83510444, failure
[Episode  1988]  total reward: -546.780808811, failure
[Episode  1989]  total reward: -502.835104445, success
[Episode  1990]  total reward: -586.835104445, success
[Episode  1991]  total reward: -28.835104445, success
[Episode  1992]  total reward: -604.835104445, success
[Episode  1993]  total reward: -94.835104445, success
[Episode  1994]  total reward: -768.835104445, failure
[Episode  1995]  total reward: -214.835104445, success
[Episode  1996]  total reward: -288.780808811, success
[Episode  1997]  total reward: -826.835104445, success
[Episode  1998]  total reward: -208.835104445, success
[Episode  1999]  total reward: -160.835104445, success
[Episode  20

[Episode  2132]  total reward: -154.835104445, success
[Episode  2133]  total reward: -46.835104445, success
[Episode  2134]  total reward: -112.835104445, success
[Episode  2135]  total reward: -360.835104445, success
[Episode  2136]  total reward: 41.164895555, success
[Episode  2137]  total reward: -394.835104445, success
[Episode  2138]  total reward: -230.835104445, success
[Episode  2139]  total reward: -94.835104445, success
[Episode  2140]  total reward: -208.835104445, success
[Episode  2141]  total reward: -102.780808811, success
[Episode  2142]  total reward: -86.835104445, success
[Episode  2143]  total reward: -76.835104445, success
[Episode  2144]  total reward: -64.835104445, success
[Episode  2145]  total reward: -66.835104445, success
[Episode  2146]  total reward: -148.835104445, success
[Episode  2147]  total reward: -222.835104445, success
[Episode  2148]  total reward: -228.835104445, success
[Episode  2149]  total reward: -102.835104445, success
[Episode  2150]  t

[Episode  2284]  total reward: -268.835104445, success
[Episode  2285]  total reward: -1702.83510444, success
[Episode  2286]  total reward: -920.835104445, success
[Episode  2287]  total reward: -294.835104445, success
[Episode  2288]  total reward: -652.835104445, success
[Episode  2289]  total reward: -334.835104445, success
[Episode  2290]  total reward: -130.835104445, success
[Episode  2291]  total reward: -1120.83510444, success
[Episode  2292]  total reward: -232.835104445, success
[Episode  2293]  total reward: -558.835104445, success
[Episode  2294]  total reward: -214.835104445, success
[Episode  2295]  total reward: -406.835104445, success
[Episode  2296]  total reward: -970.835104445, failure
[Episode  2297]  total reward: -370.835104445, success
[Episode  2298]  total reward: -12.835104445, success
[Episode  2299]  total reward: -1348.83510444, failure
[Episode  2300]  total reward: -124.835104445, success
[Episode  2301]  total reward: -382.835104445, success
[Episode  2

[Episode  2436]  total reward: -550.835104445, failure
[Episode  2437]  total reward: -220.835104445, success
[Episode  2438]  total reward: -244.835104445, success
[Episode  2439]  total reward: -852.835104445, success
[Episode  2440]  total reward: -634.835104445, failure
[Episode  2441]  total reward: -844.835104445, success
[Episode  2442]  total reward: -40.835104445, success
[Episode  2443]  total reward: -34.835104445, success
[Episode  2444]  total reward: -220.835104445, success
[Episode  2445]  total reward: -292.835104445, failure
[Episode  2446]  total reward: 35.164895555, success
[Episode  2447]  total reward: -292.835104445, success
[Episode  2448]  total reward: -196.835104445, failure
[Episode  2449]  total reward: -10.835104445, success
[Episode  2450]  total reward: -234.780808811, success
[Episode  2451]  total reward: -100.835104445, success
[Episode  2452]  total reward: 95.164895555, success
[Episode  2453]  total reward: -102.835104445, success
[Episode  2454]  

[Episode  2593]  total reward: -10.835104445, success
[Episode  2594]  total reward: 101.164895555, success
[Episode  2595]  total reward: -28.835104445, success
[Episode  2596]  total reward: 95.164895555, success
[Episode  2597]  total reward: -28.835104445, success
[Episode  2598]  total reward: -34.835104445, success
[Episode  2599]  total reward: -120.835104445, success
[Episode  2600]  total reward: -52.835104445, success
[Episode  2601]  total reward: -136.835104445, success
[Episode  2602]  total reward: 41.164895555, success
[Episode  2603]  total reward: 89.164895555, success
[Episode  2604]  total reward: -44.835104445, success
[Episode  2605]  total reward: -124.835104445, success
[Episode  2606]  total reward: 107.164895555, success
[Episode  2607]  total reward: -16.835104445, success
[Episode  2608]  total reward: -46.835104445, success
[Episode  2609]  total reward: 1.16489555504, success
[Episode  2610]  total reward: -10.835104445, success
[Episode  2611]  total rewar

[Episode  2745]  total reward: -100.835104445, success
[Episode  2746]  total reward: -286.835104445, success
[Episode  2747]  total reward: -1012.83510444, success
[Episode  2748]  total reward: -66.835104445, success
[Episode  2749]  total reward: -124.835104445, success
[Episode  2750]  total reward: -118.835104445, success
[Episode  2751]  total reward: -532.835104445, success
[Episode  2752]  total reward: -154.835104445, success
[Episode  2753]  total reward: -70.835104445, success
[Episode  2754]  total reward: -178.835104445, success
[Episode  2755]  total reward: -64.835104445, success
[Episode  2756]  total reward: -166.835104445, success
[Episode  2757]  total reward: -130.835104445, success
[Episode  2758]  total reward: -64.835104445, success
[Episode  2759]  total reward: -340.835104445, success
[Episode  2760]  total reward: -88.835104445, success
[Episode  2761]  total reward: -88.835104445, success
[Episode  2762]  total reward: 95.164895555, success
[Episode  2763]  t

[Episode  2901]  total reward: 15.164895555, success
[Episode  2902]  total reward: 47.164895555, success
[Episode  2903]  total reward: -4.7808088114, success
[Episode  2904]  total reward: 47.164895555, success
[Episode  2905]  total reward: 33.164895555, success
[Episode  2906]  total reward: 29.164895555, success
[Episode  2907]  total reward: -204.835104445, success
[Episode  2908]  total reward: 3.16489555504, success
[Episode  2909]  total reward: 9.16489555504, success
[Episode  2910]  total reward: 13.164895555, success
[Episode  2911]  total reward: -122.835104445, success
[Episode  2912]  total reward: 17.164895555, success
[Episode  2913]  total reward: 17.164895555, success
[Episode  2914]  total reward: -240.835104445, success
[Episode  2915]  total reward: -12.835104445, success
[Episode  2916]  total reward: -8.83510444496, success
[Episode  2917]  total reward: 5.16489555504, success
[Episode  2918]  total reward: -0.835104444958, success
[Episode  2919]  total reward:

[Episode  3053]  total reward: -904.835104445, success
[Episode  3054]  total reward: -124.835104445, success
[Episode  3055]  total reward: -76.835104445, success
[Episode  3056]  total reward: -88.835104445, success
[Episode  3057]  total reward: -226.835104445, success
[Episode  3058]  total reward: -184.835104445, success
[Episode  3059]  total reward: 35.164895555, success
[Episode  3060]  total reward: -220.835104445, success
[Episode  3061]  total reward: -82.835104445, success
[Episode  3062]  total reward: -148.835104445, success
[Episode  3063]  total reward: -112.835104445, success
[Episode  3064]  total reward: -58.835104445, success
[Episode  3065]  total reward: -70.835104445, success
[Episode  3066]  total reward: -52.835104445, success
[Episode  3067]  total reward: -52.835104445, success
[Episode  3068]  total reward: -54.7808088114, success
[Episode  3069]  total reward: -48.7808088114, success
[Episode  3070]  total reward: -30.7808088114, success
[Episode  3071]  to

[Episode  3205]  total reward: -34.835104445, success
[Episode  3206]  total reward: -28.835104445, failure
[Episode  3207]  total reward: -298.835104445, failure
[Episode  3208]  total reward: -310.835104445, failure
[Episode  3209]  total reward: -34.835104445, failure
[Episode  3210]  total reward: -12.835104445, failure
[Episode  3211]  total reward: 1.16489555504, failure
[Episode  3212]  total reward: -26.835104445, failure
[Episode  3213]  total reward: -308.835104445, failure
[Episode  3214]  total reward: 7.16489555504, failure
[Episode  3215]  total reward: 7.16489555504, failure
[Episode  3216]  total reward: -214.835104445, failure
[Episode  3217]  total reward: 7.16489555504, failure
[Episode  3218]  total reward: -34.835104445, failure
[Episode  3219]  total reward: -592.835104445, success
[Episode  3220]  total reward: 1.16489555504, failure
[Episode  3221]  total reward: 7.16489555504, failure
[Episode  3222]  total reward: -4.83510444496, failure
[Episode  3223]  total

[Episode  3355]  total reward: -1012.83510444, failure
[Episode  3356]  total reward: -100.835104445, success
[Episode  3357]  total reward: -322.835104445, success
[Episode  3358]  total reward: -1322.83510444, failure
[Episode  3359]  total reward: -942.835104445, failure
[Episode  3360]  total reward: -1138.83510444, failure
[Episode  3361]  total reward: -24.835104445, success
[Episode  3362]  total reward: -610.835104445, success
[Episode  3363]  total reward: -1410.83510444, success
[Episode  3364]  total reward: -924.835104445, success
[Episode  3365]  total reward: -226.835104445, success
[Episode  3366]  total reward: -406.835104445, success
[Episode  3367]  total reward: -228.835104445, success
[Episode  3368]  total reward: -114.835104445, success
[Episode  3369]  total reward: -1594.83510444, failure
[Episode  3370]  total reward: -640.835104445, success
[Episode  3371]  total reward: -436.835104445, success
[Episode  3372]  total reward: -1726.83510444, failure
[Episode  3

[Episode  3505]  total reward: -1036.83510444, failure
[Episode  3506]  total reward: -1108.83510444, success
[Episode  3507]  total reward: -58.835104445, success
[Episode  3508]  total reward: -272.835104445, failure
[Episode  3509]  total reward: -568.835104445, success
[Episode  3510]  total reward: -94.835104445, success
[Episode  3511]  total reward: -1414.83510444, success
[Episode  3512]  total reward: -826.835104445, success
[Episode  3513]  total reward: -448.835104445, failure
[Episode  3514]  total reward: -24.835104445, success
[Episode  3515]  total reward: -628.835104445, failure
[Episode  3516]  total reward: -742.835104445, failure
[Episode  3517]  total reward: -238.835104445, success
[Episode  3518]  total reward: -88.835104445, failure
[Episode  3519]  total reward: -1222.83510444, success
[Episode  3520]  total reward: -1210.83510444, success
[Episode  3521]  total reward: -208.835104445, success
[Episode  3522]  total reward: -436.835104445, failure
[Episode  3523

[Episode  3655]  total reward: -688.835104445, success
[Episode  3656]  total reward: -352.835104445, success
[Episode  3657]  total reward: -1004.83510444, failure
[Episode  3658]  total reward: -994.835104445, failure
[Episode  3659]  total reward: -490.835104445, success
[Episode  3660]  total reward: -906.835104445, failure
[Episode  3661]  total reward: -906.835104445, failure
[Episode  3662]  total reward: -594.835104445, success
[Episode  3663]  total reward: -262.835104445, success
[Episode  3664]  total reward: -1396.83510444, success
[Episode  3665]  total reward: -1138.83510444, success
[Episode  3666]  total reward: -346.835104445, failure
[Episode  3667]  total reward: -304.835104445, success
[Episode  3668]  total reward: -1102.83510444, success
[Episode  3669]  total reward: -1240.83510444, failure
[Episode  3670]  total reward: -1074.83510444, failure
[Episode  3671]  total reward: -1764.78080881, failure
[Episode  3672]  total reward: -926.835104445, failure
[Episode  

[Episode  3805]  total reward: -124.835104445, failure
[Episode  3806]  total reward: -682.835104445, failure
[Episode  3807]  total reward: -0.835104444958, success
[Episode  3808]  total reward: -952.835104445, failure
[Episode  3809]  total reward: -772.835104445, failure
[Episode  3810]  total reward: -316.835104445, failure
[Episode  3811]  total reward: -1252.83510444, failure
[Episode  3812]  total reward: -1138.83510444, success
[Episode  3813]  total reward: -52.835104445, success
[Episode  3814]  total reward: -310.835104445, success
[Episode  3815]  total reward: -118.835104445, failure
[Episode  3816]  total reward: -240.835104445, success
[Episode  3817]  total reward: -214.835104445, success
[Episode  3818]  total reward: -376.835104445, success
[Episode  3819]  total reward: -1072.83510444, failure
[Episode  3820]  total reward: -496.835104445, failure
[Episode  3821]  total reward: -88.835104445, failure
[Episode  3822]  total reward: -266.780808811, failure
[Episode  3

[Episode  3955]  total reward: -296.835104445, failure
[Episode  3956]  total reward: -624.780808811, failure
[Episode  3957]  total reward: -720.835104445, failure
[Episode  3958]  total reward: -578.835104445, failure
[Episode  3959]  total reward: -226.835104445, failure
[Episode  3960]  total reward: -658.835104445, success
[Episode  3961]  total reward: -118.835104445, success
[Episode  3962]  total reward: -550.835104445, failure
[Episode  3963]  total reward: -154.835104445, success
[Episode  3964]  total reward: -574.835104445, failure
[Episode  3965]  total reward: -328.835104445, success
[Episode  3966]  total reward: -810.835104445, failure
[Episode  3967]  total reward: -250.835104445, failure
[Episode  3968]  total reward: -532.835104445, success
[Episode  3969]  total reward: -256.835104445, failure
[Episode  3970]  total reward: -46.835104445, success
[Episode  3971]  total reward: -292.835104445, failure
[Episode  3972]  total reward: -142.835104445, success
[Episode  3

[Episode  4105]  total reward: -356.835104445, failure
[Episode  4106]  total reward: -646.835104445, success
[Episode  4107]  total reward: -744.780808811, failure
[Episode  4108]  total reward: -334.835104445, success
[Episode  4109]  total reward: -706.835104445, failure
[Episode  4110]  total reward: -568.835104445, failure
[Episode  4111]  total reward: -340.835104445, failure
[Episode  4112]  total reward: -814.835104445, failure
[Episode  4113]  total reward: -100.835104445, success
[Episode  4114]  total reward: -1240.83510444, failure
[Episode  4115]  total reward: -270.835104445, success
[Episode  4116]  total reward: -898.835104445, success
[Episode  4117]  total reward: -94.835104445, success
[Episode  4118]  total reward: -570.835104445, failure
[Episode  4119]  total reward: -406.835104445, failure
[Episode  4120]  total reward: -710.835104445, failure
[Episode  4121]  total reward: -560.780808811, success
[Episode  4122]  total reward: -634.835104445, failure
[Episode  4

[Episode  4255]  total reward: -436.835104445, failure
[Episode  4256]  total reward: -346.835104445, failure
[Episode  4257]  total reward: -194.835104445, success
[Episode  4258]  total reward: -364.835104445, success
[Episode  4259]  total reward: -1054.83510444, failure
[Episode  4260]  total reward: -1000.83510444, failure
[Episode  4261]  total reward: -160.835104445, success
[Episode  4262]  total reward: -456.835104445, success
[Episode  4263]  total reward: -108.835104445, success
[Episode  4264]  total reward: -76.835104445, failure
[Episode  4265]  total reward: -198.780808811, failure
[Episode  4266]  total reward: -92.835104445, success
[Episode  4267]  total reward: -164.835104445, failure
[Episode  4268]  total reward: -28.835104445, failure
[Episode  4269]  total reward: 1.16489555504, failure
[Episode  4270]  total reward: 73.164895555, success
[Episode  4271]  total reward: -136.835104445, success
[Episode  4272]  total reward: -4.83510444496, success
[Episode  4273] 

[Episode  4405]  total reward: -1684.83510444, success
[Episode  4406]  total reward: -1744.83510444, failure
[Episode  4407]  total reward: -958.835104445, success
[Episode  4408]  total reward: -460.835104445, success
[Episode  4409]  total reward: -1468.83510444, success
[Episode  4410]  total reward: -106.835104445, success
[Episode  4411]  total reward: -1684.83510444, failure
[Episode  4412]  total reward: -814.835104445, failure
[Episode  4413]  total reward: -1414.83510444, failure
[Episode  4414]  total reward: -1702.83510444, failure
[Episode  4415]  total reward: -1672.83510444, failure
[Episode  4416]  total reward: -850.835104445, failure
[Episode  4417]  total reward: -1126.83510444, failure
[Episode  4418]  total reward: -706.780808811, failure
[Episode  4419]  total reward: -1264.83510444, failure
[Episode  4420]  total reward: -216.780808811, failure
[Episode  4421]  total reward: -76.835104445, failure
[Episode  4422]  total reward: -40.835104445, failure
[Episode  44

[Episode  4555]  total reward: -634.835104445, failure
[Episode  4556]  total reward: 1.16489555504, failure
[Episode  4557]  total reward: -998.835104445, failure
[Episode  4558]  total reward: -716.835104445, success
[Episode  4559]  total reward: -1336.83510444, failure
[Episode  4560]  total reward: -226.835104445, success
[Episode  4561]  total reward: -778.780808811, failure
[Episode  4562]  total reward: -396.780808811, success
[Episode  4563]  total reward: -1116.78080881, failure
[Episode  4564]  total reward: -88.835104445, success
[Episode  4565]  total reward: 93.2191911886, success
[Episode  4566]  total reward: -698.780808811, failure
[Episode  4567]  total reward: -474.780808811, failure
[Episode  4568]  total reward: -1026.83510444, failure
[Episode  4569]  total reward: -808.835104445, success
[Episode  4570]  total reward: -60.7808088114, success
[Episode  4571]  total reward: -626.780808811, success
[Episode  4572]  total reward: -40.835104445, failure
[Episode  4573

[Episode  4707]  total reward: -52.835104445, success
[Episode  4708]  total reward: -202.835104445, success
[Episode  4709]  total reward: -82.835104445, success
[Episode  4710]  total reward: -46.835104445, success
[Episode  4711]  total reward: -154.835104445, success
[Episode  4712]  total reward: -166.835104445, success
[Episode  4713]  total reward: 95.164895555, success
[Episode  4714]  total reward: -250.835104445, success
[Episode  4715]  total reward: -196.835104445, failure
[Episode  4716]  total reward: -430.835104445, failure
[Episode  4717]  total reward: -496.835104445, success
[Episode  4718]  total reward: -412.835104445, success
[Episode  4719]  total reward: 47.164895555, success
[Episode  4720]  total reward: -58.835104445, success
[Episode  4721]  total reward: -82.835104445, success
[Episode  4722]  total reward: -52.835104445, success
[Episode  4723]  total reward: -748.835104445, success
[Episode  4724]  total reward: -178.835104445, success
[Episode  4725]  tot

[Episode  4857]  total reward: -448.835104445, success
[Episode  4858]  total reward: -268.835104445, success
[Episode  4859]  total reward: -1240.83510444, failure
[Episode  4860]  total reward: -328.835104445, success
[Episode  4861]  total reward: -334.835104445, failure
[Episode  4862]  total reward: -148.835104445, success
[Episode  4863]  total reward: -52.835104445, success
[Episode  4864]  total reward: -130.835104445, success
[Episode  4865]  total reward: -40.835104445, success
[Episode  4866]  total reward: -22.835104445, success
[Episode  4867]  total reward: -274.835104445, success
[Episode  4868]  total reward: -178.835104445, success
[Episode  4869]  total reward: -160.835104445, success
[Episode  4870]  total reward: -94.835104445, success
[Episode  4871]  total reward: -226.835104445, success
[Episode  4872]  total reward: -262.835104445, success
[Episode  4873]  total reward: -46.835104445, success
[Episode  4874]  total reward: 107.164895555, success
[Episode  4875] 

[Episode  5008]  total reward: -448.835104445, failure
[Episode  5009]  total reward: -78.835104445, success
[Episode  5010]  total reward: -438.780808811, success
[Episode  5011]  total reward: -76.835104445, success
[Episode  5012]  total reward: -124.835104445, success
[Episode  5013]  total reward: -166.835104445, success
[Episode  5014]  total reward: -166.835104445, success
[Episode  5015]  total reward: -124.835104445, success
[Episode  5016]  total reward: -190.835104445, failure
[Episode  5017]  total reward: -574.835104445, success
[Episode  5018]  total reward: -100.835104445, success
[Episode  5019]  total reward: -934.835104445, success
[Episode  5020]  total reward: -402.835104445, success
[Episode  5021]  total reward: -430.835104445, failure
[Episode  5022]  total reward: -1600.83510444, failure
[Episode  5023]  total reward: -826.835104445, success
[Episode  5024]  total reward: -450.780808811, success
[Episode  5025]  total reward: -76.835104445, success
[Episode  502

[Episode  5158]  total reward: -194.780808811, success
[Episode  5159]  total reward: -286.835104445, success
[Episode  5160]  total reward: -158.780808811, success
[Episode  5161]  total reward: -178.835104445, success
[Episode  5162]  total reward: -808.835104445, success
[Episode  5163]  total reward: -178.835104445, success
[Episode  5164]  total reward: -40.835104445, success
[Episode  5165]  total reward: -112.835104445, success
[Episode  5166]  total reward: -112.835104445, success
[Episode  5167]  total reward: -364.835104445, success
[Episode  5168]  total reward: -450.835104445, success
[Episode  5169]  total reward: -538.835104445, success
[Episode  5170]  total reward: -400.835104445, success
[Episode  5171]  total reward: -184.835104445, success
[Episode  5172]  total reward: -478.835104445, success
[Episode  5173]  total reward: -108.835104445, success
[Episode  5174]  total reward: 11.164895555, success
[Episode  5175]  total reward: 35.164895555, success
[Episode  5176]

[Episode  5308]  total reward: -10.835104445, success
[Episode  5309]  total reward: -220.835104445, success
[Episode  5310]  total reward: 65.164895555, success
[Episode  5311]  total reward: -12.835104445, success
[Episode  5312]  total reward: -78.835104445, success
[Episode  5313]  total reward: -208.835104445, success
[Episode  5314]  total reward: -268.835104445, failure
[Episode  5315]  total reward: -106.835104445, success
[Episode  5316]  total reward: -448.835104445, success
[Episode  5317]  total reward: -274.835104445, failure
[Episode  5318]  total reward: -18.835104445, success
[Episode  5319]  total reward: -36.835104445, success
[Episode  5320]  total reward: -12.835104445, success
[Episode  5321]  total reward: -24.835104445, success
[Episode  5322]  total reward: -174.835104445, success
[Episode  5323]  total reward: -12.835104445, success
[Episode  5324]  total reward: -600.835104445, success
[Episode  5325]  total reward: -54.835104445, success
[Episode  5326]  tota

[Episode  5459]  total reward: -672.835104445, success
[Episode  5460]  total reward: -928.835104445, success
[Episode  5461]  total reward: -1000.83510444, success
[Episode  5462]  total reward: -120.835104445, success
[Episode  5463]  total reward: -256.835104445, success
[Episode  5464]  total reward: -78.835104445, success
[Episode  5465]  total reward: -160.835104445, success
[Episode  5466]  total reward: -826.835104445, failure
[Episode  5467]  total reward: -412.835104445, success
[Episode  5468]  total reward: -280.835104445, success
[Episode  5469]  total reward: -214.835104445, success
[Episode  5470]  total reward: -114.835104445, success
[Episode  5471]  total reward: 71.164895555, success
[Episode  5472]  total reward: -586.835104445, success
[Episode  5473]  total reward: -682.835104445, success
[Episode  5474]  total reward: -586.835104445, success
[Episode  5475]  total reward: -106.835104445, success
[Episode  5476]  total reward: -40.835104445, success
[Episode  5477

[Episode  5612]  total reward: -456.835104445, success
[Episode  5613]  total reward: -256.835104445, success
[Episode  5614]  total reward: -496.835104445, success
[Episode  5615]  total reward: -406.835104445, success
[Episode  5616]  total reward: -130.835104445, success
[Episode  5617]  total reward: -70.835104445, success
[Episode  5618]  total reward: -172.835104445, success
[Episode  5619]  total reward: -64.835104445, success
[Episode  5620]  total reward: -64.835104445, success
[Episode  5621]  total reward: -304.835104445, success
[Episode  5622]  total reward: -118.835104445, success
[Episode  5623]  total reward: -52.835104445, success
[Episode  5624]  total reward: -22.835104445, success
[Episode  5625]  total reward: -82.835104445, failure
[Episode  5626]  total reward: -118.835104445, success
[Episode  5627]  total reward: -574.835104445, failure
[Episode  5628]  total reward: -904.835104445, success
[Episode  5629]  total reward: -562.835104445, success
[Episode  5630] 

[Episode  5764]  total reward: -1570.83510444, success
[Episode  5765]  total reward: -118.835104445, success
[Episode  5766]  total reward: 41.164895555, success
[Episode  5767]  total reward: -136.835104445, success
[Episode  5768]  total reward: -58.835104445, success
[Episode  5769]  total reward: 35.164895555, success
[Episode  5770]  total reward: -64.835104445, success
[Episode  5771]  total reward: -1558.83510444, failure
[Episode  5772]  total reward: -928.835104445, failure
[Episode  5773]  total reward: -82.835104445, success
[Episode  5774]  total reward: -244.835104445, failure
[Episode  5775]  total reward: 89.164895555, success
[Episode  5776]  total reward: -166.835104445, failure
[Episode  5777]  total reward: -42.835104445, success
[Episode  5778]  total reward: 59.164895555, success
[Episode  5779]  total reward: -34.835104445, success
[Episode  5780]  total reward: -94.835104445, success
[Episode  5781]  total reward: -286.835104445, failure
[Episode  5782]  total r

[Episode  5917]  total reward: -706.835104445, success
[Episode  5918]  total reward: -1786.83510444, failure
[Episode  5919]  total reward: -616.835104445, success
[Episode  5920]  total reward: -124.835104445, success
[Episode  5921]  total reward: -1270.83510444, success
[Episode  5922]  total reward: -154.835104445, success
[Episode  5923]  total reward: -1786.83510444, failure
[Episode  5924]  total reward: -1054.83510444, success
[Episode  5925]  total reward: -880.835104445, success
[Episode  5926]  total reward: -820.835104445, success
[Episode  5927]  total reward: -262.835104445, success
[Episode  5928]  total reward: -922.835104445, success
[Episode  5929]  total reward: -1096.83510444, success
[Episode  5930]  total reward: -994.835104445, success
[Episode  5931]  total reward: -1006.83510444, success
[Episode  5932]  total reward: -394.835104445, success
[Episode  5933]  total reward: -250.835104445, success
[Episode  5934]  total reward: -178.835104445, success
[Episode  

[Episode  6073]  total reward: 71.164895555, success
[Episode  6074]  total reward: -28.835104445, success
[Episode  6075]  total reward: -22.835104445, success
[Episode  6076]  total reward: -748.835104445, failure
[Episode  6077]  total reward: -28.835104445, success
[Episode  6078]  total reward: -22.835104445, success
[Episode  6079]  total reward: -34.835104445, success
[Episode  6080]  total reward: 71.164895555, success
[Episode  6081]  total reward: -34.835104445, success
[Episode  6082]  total reward: -28.835104445, success
[Episode  6083]  total reward: -28.835104445, success
[Episode  6084]  total reward: -1378.83510444, success
[Episode  6085]  total reward: -70.835104445, success
[Episode  6086]  total reward: -34.835104445, success
[Episode  6087]  total reward: -22.835104445, success
[Episode  6088]  total reward: -34.835104445, success
[Episode  6089]  total reward: -28.835104445, success
[Episode  6090]  total reward: -22.835104445, success
[Episode  6091]  total rewar

[Episode  6224]  total reward: -168.835104445, success
[Episode  6225]  total reward: -242.835104445, success
[Episode  6226]  total reward: -274.835104445, success
[Episode  6227]  total reward: -100.835104445, success
[Episode  6228]  total reward: -162.835104445, success
[Episode  6229]  total reward: -342.835104445, success
[Episode  6230]  total reward: -138.835104445, success
[Episode  6231]  total reward: -206.835104445, success
[Episode  6232]  total reward: -1024.83510444, success
[Episode  6233]  total reward: -660.835104445, success
[Episode  6234]  total reward: -700.835104445, success
[Episode  6235]  total reward: -246.835104445, success
[Episode  6236]  total reward: -74.835104445, success
[Episode  6237]  total reward: -134.835104445, success
[Episode  6238]  total reward: -1072.83510444, success
[Episode  6239]  total reward: -244.835104445, success
[Episode  6240]  total reward: -304.835104445, success
[Episode  6241]  total reward: -544.835104445, success
[Episode  6

[Episode  6374]  total reward: -88.835104445, success
[Episode  6375]  total reward: -766.835104445, success
[Episode  6376]  total reward: -232.835104445, success
[Episode  6377]  total reward: 107.164895555, success
[Episode  6378]  total reward: -16.835104445, success
[Episode  6379]  total reward: -490.835104445, success
[Episode  6380]  total reward: 107.164895555, success
[Episode  6381]  total reward: -10.835104445, success
[Episode  6382]  total reward: -124.835104445, success
[Episode  6383]  total reward: -276.835104445, success
[Episode  6384]  total reward: -102.780808811, success
[Episode  6385]  total reward: -136.835104445, success
[Episode  6386]  total reward: -120.835104445, failure
[Episode  6387]  total reward: -410.835104445, failure
[Episode  6388]  total reward: -640.835104445, failure
[Episode  6389]  total reward: -94.835104445, success
[Episode  6390]  total reward: -176.835104445, failure
[Episode  6391]  total reward: -464.835104445, failure
[Episode  6392] 

[Episode  6524]  total reward: -584.780808811, failure
[Episode  6525]  total reward: -870.780808811, failure
[Episode  6526]  total reward: -168.780808811, success
[Episode  6527]  total reward: -1084.83510444, failure
[Episode  6528]  total reward: -484.835104445, failure
[Episode  6529]  total reward: -1206.78080881, failure
[Episode  6530]  total reward: -1482.78080881, failure
[Episode  6531]  total reward: -1066.83510444, failure
[Episode  6532]  total reward: -1342.83510444, failure
[Episode  6533]  total reward: -160.835104445, success
[Episode  6534]  total reward: -664.835104445, success
[Episode  6535]  total reward: -388.835104445, failure
[Episode  6536]  total reward: -1186.83510444, failure
[Episode  6537]  total reward: -652.835104445, success
[Episode  6538]  total reward: -1300.83510444, failure
[Episode  6539]  total reward: -1186.83510444, success
[Episode  6540]  total reward: -664.835104445, failure
[Episode  6541]  total reward: -262.835104445, success
[Episode  

[Episode  6674]  total reward: -856.835104445, failure
[Episode  6675]  total reward: -1256.83510444, failure
[Episode  6676]  total reward: -1000.83510444, failure
[Episode  6677]  total reward: -1510.83510444, failure
[Episode  6678]  total reward: -244.835104445, failure
[Episode  6679]  total reward: -1062.83510444, failure
[Episode  6680]  total reward: 7.16489555504, failure
[Episode  6681]  total reward: -292.835104445, failure
[Episode  6682]  total reward: -124.835104445, failure
[Episode  6683]  total reward: -502.835104445, success
[Episode  6684]  total reward: -574.835104445, failure
[Episode  6685]  total reward: -1392.83510444, failure
[Episode  6686]  total reward: -1372.83510444, failure
[Episode  6687]  total reward: -238.835104445, success
[Episode  6688]  total reward: -124.835104445, success
[Episode  6689]  total reward: -1408.83510444, failure
[Episode  6690]  total reward: -1306.83510444, failure
[Episode  6691]  total reward: -1762.83510444, failure
[Episode  6

[Episode  6824]  total reward: -72.7808088114, success
[Episode  6825]  total reward: -312.780808811, failure
[Episode  6826]  total reward: -6.7808088114, failure
[Episode  6827]  total reward: -444.780808811, failure
[Episode  6828]  total reward: -168.780808811, failure
[Episode  6829]  total reward: -666.780808811, success
[Episode  6830]  total reward: -570.780808811, success
[Episode  6831]  total reward: -672.780808811, failure
[Episode  6832]  total reward: -42.7808088114, failure
[Episode  6833]  total reward: -690.780808811, failure
[Episode  6834]  total reward: -120.780808811, failure
[Episode  6835]  total reward: -622.835104445, success
[Episode  6836]  total reward: -810.780808811, failure
[Episode  6837]  total reward: -612.780808811, failure
[Episode  6838]  total reward: -636.780808811, failure
[Episode  6839]  total reward: -1284.78080881, failure
[Episode  6840]  total reward: -102.780808811, success
[Episode  6841]  total reward: -354.780808811, success
[Episode  6

[Episode  6975]  total reward: -358.835104445, failure
[Episode  6976]  total reward: -28.835104445, success
[Episode  6977]  total reward: -496.835104445, success
[Episode  6978]  total reward: -22.835104445, success
[Episode  6979]  total reward: -652.835104445, success
[Episode  6980]  total reward: -28.835104445, success
[Episode  6981]  total reward: -22.835104445, success
[Episode  6982]  total reward: -22.835104445, success
[Episode  6983]  total reward: -28.835104445, failure
[Episode  6984]  total reward: -40.835104445, success
[Episode  6985]  total reward: -28.835104445, success
[Episode  6986]  total reward: -130.835104445, success
[Episode  6987]  total reward: -46.835104445, success
[Episode  6988]  total reward: 1.16489555504, failure
[Episode  6989]  total reward: -40.835104445, success
[Episode  6990]  total reward: -28.835104445, success
[Episode  6991]  total reward: -46.835104445, success
[Episode  6992]  total reward: -34.835104445, success
[Episode  6993]  total r

[Episode  7126]  total reward: -216.835104445, success
[Episode  7127]  total reward: -1788.78080881, failure
[Episode  7128]  total reward: -916.835104445, failure
[Episode  7129]  total reward: -88.835104445, success
[Episode  7130]  total reward: -1480.83510444, failure
[Episode  7131]  total reward: -1280.83510444, failure
[Episode  7132]  total reward: -538.835104445, success
[Episode  7133]  total reward: -778.835104445, success
[Episode  7134]  total reward: -948.835104445, failure
[Episode  7135]  total reward: -706.835104445, success
[Episode  7136]  total reward: -744.835104445, success
[Episode  7137]  total reward: -1330.83510444, failure
[Episode  7138]  total reward: -1396.83510444, failure
[Episode  7139]  total reward: -100.835104445, success
[Episode  7140]  total reward: -454.835104445, success
[Episode  7141]  total reward: -10.835104445, success
[Episode  7142]  total reward: -70.835104445, success
[Episode  7143]  total reward: 107.164895555, success
[Episode  7144

[Episode  7279]  total reward: -478.835104445, success
[Episode  7280]  total reward: -172.835104445, success
[Episode  7281]  total reward: -256.835104445, success
[Episode  7282]  total reward: -694.835104445, success
[Episode  7283]  total reward: -10.835104445, success
[Episode  7284]  total reward: -46.835104445, success
[Episode  7285]  total reward: -796.835104445, success
[Episode  7286]  total reward: -16.835104445, success
[Episode  7287]  total reward: -60.835104445, success
[Episode  7288]  total reward: -28.835104445, success
[Episode  7289]  total reward: -88.835104445, failure
[Episode  7290]  total reward: -580.835104445, success
[Episode  7291]  total reward: -22.835104445, success
[Episode  7292]  total reward: 107.164895555, success
[Episode  7293]  total reward: -4.83510444496, success
[Episode  7294]  total reward: -34.835104445, success
[Episode  7295]  total reward: -418.835104445, success
[Episode  7296]  total reward: -216.835104445, success
[Episode  7297]  to

[Episode  7429]  total reward: -1608.83510444, failure
[Episode  7430]  total reward: -964.835104445, failure
[Episode  7431]  total reward: -294.835104445, success
[Episode  7432]  total reward: -1408.83510444, failure
[Episode  7433]  total reward: -1054.83510444, failure
[Episode  7434]  total reward: -864.835104445, failure
[Episode  7435]  total reward: -788.780808811, failure
[Episode  7436]  total reward: -878.780808811, failure
[Episode  7437]  total reward: -1068.78080881, failure
[Episode  7438]  total reward: -1408.83510444, success
[Episode  7439]  total reward: -460.835104445, success
[Episode  7440]  total reward: -274.835104445, success
[Episode  7441]  total reward: -1030.83510444, success
[Episode  7442]  total reward: -1722.83510444, failure
[Episode  7443]  total reward: -900.835104445, failure
[Episode  7444]  total reward: -1138.83510444, failure
[Episode  7445]  total reward: -726.780808811, success
[Episode  7446]  total reward: -882.835104445, failure
[Episode  

[Episode  7579]  total reward: -854.835104445, failure
[Episode  7580]  total reward: -338.835104445, failure
[Episode  7581]  total reward: -1270.83510444, failure
[Episode  7582]  total reward: -4.83510444496, failure
[Episode  7583]  total reward: -550.835104445, success
[Episode  7584]  total reward: -292.835104445, failure
[Episode  7585]  total reward: -28.835104445, success
[Episode  7586]  total reward: -156.780808811, success
[Episode  7587]  total reward: -36.835104445, success
[Episode  7588]  total reward: -148.835104445, success
[Episode  7589]  total reward: -172.835104445, success
[Episode  7590]  total reward: -124.835104445, success
[Episode  7591]  total reward: -196.835104445, success
[Episode  7592]  total reward: -106.835104445, failure
[Episode  7593]  total reward: -124.835104445, success
[Episode  7594]  total reward: -496.835104445, success
[Episode  7595]  total reward: -88.835104445, success
[Episode  7596]  total reward: -34.835104445, success
[Episode  7597

[Episode  7732]  total reward: -352.835104445, success
[Episode  7733]  total reward: -238.835104445, success
[Episode  7734]  total reward: -100.835104445, success
[Episode  7735]  total reward: -214.835104445, success
[Episode  7736]  total reward: -402.780808811, success
[Episode  7737]  total reward: -82.835104445, success
[Episode  7738]  total reward: -76.835104445, success
[Episode  7739]  total reward: -322.835104445, success
[Episode  7740]  total reward: -358.835104445, success
[Episode  7741]  total reward: -286.835104445, success
[Episode  7742]  total reward: -106.835104445, success
[Episode  7743]  total reward: -178.835104445, success
[Episode  7744]  total reward: -118.835104445, success
[Episode  7745]  total reward: -262.835104445, success
[Episode  7746]  total reward: -148.835104445, success
[Episode  7747]  total reward: -100.835104445, success
[Episode  7748]  total reward: -76.835104445, success
[Episode  7749]  total reward: -454.835104445, success
[Episode  775

[Episode  7882]  total reward: -202.835104445, success
[Episode  7883]  total reward: -190.835104445, success
[Episode  7884]  total reward: -538.835104445, success
[Episode  7885]  total reward: -1066.83510444, failure
[Episode  7886]  total reward: -1164.83510444, success
[Episode  7887]  total reward: -1606.83510444, failure
[Episode  7888]  total reward: -64.835104445, success
[Episode  7889]  total reward: -1252.83510444, success
[Episode  7890]  total reward: -1282.83510444, failure
[Episode  7891]  total reward: -274.835104445, success
[Episode  7892]  total reward: -88.835104445, success
[Episode  7893]  total reward: -148.835104445, success
[Episode  7894]  total reward: -682.835104445, failure
[Episode  7895]  total reward: 75.164895555, success
[Episode  7896]  total reward: -382.835104445, success
[Episode  7897]  total reward: -94.835104445, success
[Episode  7898]  total reward: -628.835104445, success
[Episode  7899]  total reward: -100.835104445, success
[Episode  7900]

[Episode  8033]  total reward: -334.835104445, success
[Episode  8034]  total reward: -232.835104445, success
[Episode  8035]  total reward: -316.835104445, success
[Episode  8036]  total reward: -94.835104445, success
[Episode  8037]  total reward: -130.835104445, success
[Episode  8038]  total reward: -172.835104445, failure
[Episode  8039]  total reward: 63.164895555, success
[Episode  8040]  total reward: -490.835104445, success
[Episode  8041]  total reward: -320.835104445, failure
[Episode  8042]  total reward: -1768.83510444, failure
[Episode  8043]  total reward: -1750.83510444, failure
[Episode  8044]  total reward: -100.835104445, success
[Episode  8045]  total reward: -94.835104445, success
[Episode  8046]  total reward: -1012.83510444, success
[Episode  8047]  total reward: -1360.83510444, failure
[Episode  8048]  total reward: -94.835104445, success
[Episode  8049]  total reward: -208.835104445, success
[Episode  8050]  total reward: -430.835104445, success
[Episode  8051]

[Episode  8184]  total reward: -562.835104445, success
[Episode  8185]  total reward: -956.835104445, failure
[Episode  8186]  total reward: -1222.83510444, success
[Episode  8187]  total reward: -238.835104445, success
[Episode  8188]  total reward: -130.835104445, success
[Episode  8189]  total reward: -508.835104445, success
[Episode  8190]  total reward: -72.7808088114, success
[Episode  8191]  total reward: -82.835104445, success
[Episode  8192]  total reward: -472.835104445, success
[Episode  8193]  total reward: -130.835104445, success
[Episode  8194]  total reward: -130.835104445, success
[Episode  8195]  total reward: -346.835104445, success
[Episode  8196]  total reward: -124.835104445, success
[Episode  8197]  total reward: -100.835104445, success
[Episode  8198]  total reward: -1720.83510444, failure
[Episode  8199]  total reward: -1018.83510444, success
[Episode  8200]  total reward: -1120.83510444, success
[Episode  8201]  total reward: -88.835104445, success
[Episode  82

[Episode  8334]  total reward: -322.835104445, failure
[Episode  8335]  total reward: -58.835104445, failure
[Episode  8336]  total reward: -184.835104445, failure
[Episode  8337]  total reward: -102.835104445, failure
[Episode  8338]  total reward: -1080.83510444, failure
[Episode  8339]  total reward: -482.835104445, failure
[Episode  8340]  total reward: -98.7808088114, failure
[Episode  8341]  total reward: -602.835104445, failure
[Episode  8342]  total reward: -942.780808811, failure
[Episode  8343]  total reward: -1092.78080881, failure
[Episode  8344]  total reward: -1014.78080881, success
[Episode  8345]  total reward: -856.835104445, failure
[Episode  8346]  total reward: -518.780808811, failure
[Episode  8347]  total reward: -190.835104445, failure
[Episode  8348]  total reward: -1160.78080881, failure
[Episode  8349]  total reward: -226.835104445, failure
[Episode  8350]  total reward: -498.780808811, failure
[Episode  8351]  total reward: -446.835104445, failure
[Episode  8

[Episode  8484]  total reward: -1666.83510444, failure
[Episode  8485]  total reward: -1396.83510444, failure
[Episode  8486]  total reward: -1720.83510444, failure
[Episode  8487]  total reward: -1602.83510444, success
[Episode  8488]  total reward: -292.835104445, success
[Episode  8489]  total reward: -944.835104445, failure
[Episode  8490]  total reward: -1220.83510444, failure
[Episode  8491]  total reward: -606.835104445, success
[Episode  8492]  total reward: -1158.83510444, failure
[Episode  8493]  total reward: -1552.83510444, failure
[Episode  8494]  total reward: -1660.83510444, failure
[Episode  8495]  total reward: -392.835104445, success
[Episode  8496]  total reward: -184.835104445, success
[Episode  8497]  total reward: -76.835104445, success
[Episode  8498]  total reward: -46.835104445, success
[Episode  8499]  total reward: -58.835104445, success
[Episode  8500]  total reward: -40.835104445, success
[Episode  8501]  total reward: -52.835104445, success
[Episode  8502]

[Episode  8634]  total reward: -736.835104445, failure
[Episode  8635]  total reward: -1116.83510444, failure
[Episode  8636]  total reward: -1248.78080881, failure
[Episode  8637]  total reward: -1090.83510444, failure
[Episode  8638]  total reward: -1098.83510444, failure
[Episode  8639]  total reward: -432.780808811, failure
[Episode  8640]  total reward: -730.835104445, failure
[Episode  8641]  total reward: -540.835104445, failure
[Episode  8642]  total reward: -780.780808811, failure
[Episode  8643]  total reward: -312.780808811, failure
[Episode  8644]  total reward: -1080.78080881, failure
[Episode  8645]  total reward: -904.780808811, failure
[Episode  8646]  total reward: -814.780808811, failure
[Episode  8647]  total reward: -1270.83510444, failure
[Episode  8648]  total reward: -892.835104445, failure
[Episode  8649]  total reward: -196.835104445, success
[Episode  8650]  total reward: -142.835104445, success
[Episode  8651]  total reward: -1014.83510444, failure
[Episode  

[Episode  8784]  total reward: -812.835104445, failure
[Episode  8785]  total reward: -238.835104445, success
[Episode  8786]  total reward: -76.835104445, success
[Episode  8787]  total reward: -940.835104445, failure
[Episode  8788]  total reward: -884.835104445, failure
[Episode  8789]  total reward: -118.835104445, failure
[Episode  8790]  total reward: -784.835104445, failure
[Episode  8791]  total reward: -996.835104445, failure
[Episode  8792]  total reward: -1164.83510444, failure
[Episode  8793]  total reward: -1342.83510444, failure
[Episode  8794]  total reward: -1636.83510444, failure
[Episode  8795]  total reward: -250.835104445, success
[Episode  8796]  total reward: -1586.83510444, failure
[Episode  8797]  total reward: -946.835104445, success
[Episode  8798]  total reward: -1378.83510444, success
[Episode  8799]  total reward: -1384.83510444, failure
[Episode  8800]  total reward: -820.835104445, success
[Episode  8801]  total reward: -462.835104445, success
[Episode  8

[Episode  8934]  total reward: -366.780808811, failure
[Episode  8935]  total reward: -378.780808811, success
[Episode  8936]  total reward: -588.780808811, failure
[Episode  8937]  total reward: -1596.78080881, failure
[Episode  8938]  total reward: -1656.78080881, failure
[Episode  8939]  total reward: -1428.78080881, failure
[Episode  8940]  total reward: -732.780808811, success
[Episode  8941]  total reward: -1644.78080881, failure
[Episode  8942]  total reward: -1650.78080881, failure
[Episode  8943]  total reward: -616.780808811, success
[Episode  8944]  total reward: -1222.83510444, failure
[Episode  8945]  total reward: -1654.78080881, failure
[Episode  8946]  total reward: -1302.78080881, failure
[Episode  8947]  total reward: -704.780808811, failure
[Episode  8948]  total reward: -996.780808811, failure
[Episode  8949]  total reward: -798.780808811, failure
[Episode  8950]  total reward: -782.780808811, failure
[Episode  8951]  total reward: -1328.78080881, failure
[Episode  

[Episode  9084]  total reward: -1122.83510444, failure
[Episode  9085]  total reward: -1232.83510444, failure
[Episode  9086]  total reward: -892.835104445, failure
[Episode  9087]  total reward: -162.835104445, success
[Episode  9088]  total reward: -94.835104445, success
[Episode  9089]  total reward: -856.835104445, success
[Episode  9090]  total reward: -1132.83510444, failure
[Episode  9091]  total reward: -1672.83510444, failure
[Episode  9092]  total reward: -1528.83510444, failure
[Episode  9093]  total reward: -1636.83510444, failure
[Episode  9094]  total reward: -1678.83510444, failure
[Episode  9095]  total reward: -1570.83510444, failure
[Episode  9096]  total reward: -1652.83510444, failure
[Episode  9097]  total reward: -1546.83510444, failure
[Episode  9098]  total reward: -1564.83510444, failure
[Episode  9099]  total reward: -766.835104445, success
[Episode  9100]  total reward: -1258.83510444, failure
[Episode  9101]  total reward: -1386.83510444, failure
[Episode  9

[Episode  9233]  total reward: -1594.83510444, failure
[Episode  9234]  total reward: -88.835104445, success
[Episode  9235]  total reward: -100.835104445, failure
[Episode  9236]  total reward: -1270.83510444, failure
[Episode  9237]  total reward: -292.835104445, success
[Episode  9238]  total reward: -1252.83510444, failure
[Episode  9239]  total reward: -1294.83510444, failure
[Episode  9240]  total reward: -802.835104445, failure
[Episode  9241]  total reward: -778.835104445, failure
[Episode  9242]  total reward: -964.835104445, failure
[Episode  9243]  total reward: -1402.83510444, failure
[Episode  9244]  total reward: -1396.83510444, failure
[Episode  9245]  total reward: -1538.83510444, failure
[Episode  9246]  total reward: -904.835104445, success
[Episode  9247]  total reward: -1214.83510444, failure
[Episode  9248]  total reward: -1618.83510444, failure
[Episode  9249]  total reward: -1756.83510444, failure
[Episode  9250]  total reward: -1650.83510444, failure
[Episode  9

[Episode  9383]  total reward: -16.835104445, failure
[Episode  9384]  total reward: -256.835104445, success
[Episode  9385]  total reward: -766.835104445, failure
[Episode  9386]  total reward: -1078.83510444, failure
[Episode  9387]  total reward: -298.835104445, failure
[Episode  9388]  total reward: -556.835104445, failure
[Episode  9389]  total reward: -970.835104445, failure
[Episode  9390]  total reward: -1204.83510444, failure
[Episode  9391]  total reward: -730.835104445, failure
[Episode  9392]  total reward: -1390.83510444, failure
[Episode  9393]  total reward: -112.835104445, success
[Episode  9394]  total reward: -850.835104445, success
[Episode  9395]  total reward: -1020.83510444, success
[Episode  9396]  total reward: -1030.83510444, failure
[Episode  9397]  total reward: -268.835104445, success
[Episode  9398]  total reward: -432.835104445, success
[Episode  9399]  total reward: -798.835104445, failure
[Episode  9400]  total reward: -1260.83510444, success
[Episode  9

[Episode  9536]  total reward: -94.835104445, success
[Episode  9537]  total reward: -106.835104445, success
[Episode  9538]  total reward: -94.835104445, success
[Episode  9539]  total reward: -202.835104445, success
[Episode  9540]  total reward: -94.835104445, success
[Episode  9541]  total reward: -220.835104445, success
[Episode  9542]  total reward: -88.835104445, success
[Episode  9543]  total reward: -94.835104445, success
[Episode  9544]  total reward: -76.835104445, success
[Episode  9545]  total reward: -88.835104445, success
[Episode  9546]  total reward: -82.835104445, success
[Episode  9547]  total reward: -82.835104445, success
[Episode  9548]  total reward: -496.835104445, success
[Episode  9549]  total reward: -94.835104445, success
[Episode  9550]  total reward: -46.835104445, success
[Episode  9551]  total reward: -536.835104445, success
[Episode  9552]  total reward: -94.835104445, success
[Episode  9553]  total reward: -94.835104445, success
[Episode  9554]  total 

[Episode  9687]  total reward: -340.835104445, failure
[Episode  9688]  total reward: -450.835104445, success
[Episode  9689]  total reward: -36.835104445, success
[Episode  9690]  total reward: 23.164895555, success
[Episode  9691]  total reward: 29.164895555, success
[Episode  9692]  total reward: 29.164895555, success
[Episode  9693]  total reward: -28.835104445, success
[Episode  9694]  total reward: -994.835104445, failure
[Episode  9695]  total reward: -1276.83510444, failure
[Episode  9696]  total reward: 71.164895555, success
[Episode  9697]  total reward: -64.835104445, success
[Episode  9698]  total reward: -1144.83510444, success
[Episode  9699]  total reward: -112.835104445, success
[Episode  9700]  total reward: 83.164895555, success
[Episode  9701]  total reward: 89.164895555, success
[Episode  9702]  total reward: 77.164895555, success
[Episode  9703]  total reward: -400.835104445, success
[Episode  9704]  total reward: 83.164895555, success
[Episode  9705]  total reward

[Episode  9838]  total reward: -1560.83510444, failure
[Episode  9839]  total reward: -364.835104445, success
[Episode  9840]  total reward: -256.835104445, success
[Episode  9841]  total reward: -1498.83510444, failure
[Episode  9842]  total reward: 71.164895555, success
[Episode  9843]  total reward: 71.164895555, success
[Episode  9844]  total reward: -94.835104445, success
[Episode  9845]  total reward: -106.835104445, success
[Episode  9846]  total reward: -1276.83510444, success
[Episode  9847]  total reward: -148.835104445, success
[Episode  9848]  total reward: -244.835104445, success
[Episode  9849]  total reward: -82.835104445, success
[Episode  9850]  total reward: -82.835104445, success
[Episode  9851]  total reward: -160.835104445, success
[Episode  9852]  total reward: -118.835104445, success
[Episode  9853]  total reward: -856.835104445, success
[Episode  9854]  total reward: -70.835104445, success
[Episode  9855]  total reward: -82.835104445, success
[Episode  9856]  to

[Episode  9989]  total reward: -88.835104445, success
[Episode  9990]  total reward: -88.835104445, success
[Episode  9991]  total reward: -112.835104445, success
[Episode  9992]  total reward: -106.835104445, success
[Episode  9993]  total reward: -88.835104445, success
[Episode  9994]  total reward: -94.835104445, success
[Episode  9995]  total reward: -82.835104445, success
[Episode  9996]  total reward: -82.835104445, success
[Episode  9997]  total reward: -100.835104445, success
[Episode  9998]  total reward: -810.835104445, failure
[Episode  9999]  total reward: -82.835104445, success
2190.31798697


## DQN to cooperate 2 Mobile Manipulator
* made by hotae
* network includes one agent's action
* hidden state(goal, obs1, obs2)

In [None]:
#Plotting setting
%matplotlib notebook
import matplotlib.pyplot as plt
from matplotlib import animation
from time import sleep

import numpy as np
import tensorflow as tf
import random
import dqn_cooperation
from collections import deque

import time
start = time.time()

# Create New environment with transition law
ACTION_NUM = 4
INPUT_SIZE = 4
OUTPUT_SIZE = ACTION_NUM
VEL = 0.5
TIME_GAP = 1

def annealing_epsilon(episode, min_e, max_e, target_episode):
    slope = (min_e - max_e) / (target_episode)
    intercept = max_e

    return max(min_e, slope * episode + intercept)

class new_env:     
    def create_env(self, arg_state=[0.,0.,1.0,1.0], g_pos=[5,5], obs_pos1=[2,2], obs_pos2=[3,4], obs_size=5):
        self.state = np.array(arg_state) # reset
        self.n_state = np.array(arg_state) #reset
        self.goal_pos = np.array(g_pos)
        self.obstacle_pos1 = np.array(obs_pos1)
        self.obstacle_pos2 = np.array(obs_pos2)
        self.obstacle_size = obs_size
        return self.state, self.goal_pos, self.obstacle_pos1, self.obstacle_pos2, self.obstacle_size
        
    def next_step(self, arg_state, arg_action):
        self._fail = False
        self.reward = 0
        # choose the action
        arg_action1 = arg_action 
        
        '''position update through action
        UP = 0, DOWN = 1, LEFT = 2, RIGHT = 3'''        
        # agent located on the first state 
        if arg_action1 == 0:
            mid_state = arg_state + np.array([0,1,0,0])*VEL*TIME_GAP # move the 1st agent
        elif arg_action1 == 1:
            mid_state = arg_state + np.array([0,-1,0,0])*VEL*TIME_GAP
        elif arg_action1 == 2:
            mid_state = arg_state + np.array([-1,0,0,0])*VEL*TIME_GAP
        else:
            mid_state = arg_state + np.array([1,0,0,0])*VEL*TIME_GAP
        
        self.n_state[0:2] = mid_state[2:4] # change 1 and 2
        self.n_state[2:4] = mid_state[0:2]        
       
        '''get the reward'''
        if np.linalg.norm((self.n_state[0:2]+self.n_state[2:4])/2-self.goal_pos)!=0:
            self.reward = (1/np.linalg.norm((self.n_state[0:2]+self.n_state[2:4])/2-self.goal_pos)-\
            1/np.linalg.norm((arg_state[0:2]+arg_state[2:4])/2-self.goal_pos))*30
        if np.linalg.norm(self.n_state[0:2]-self.obstacle_pos1)<1 or np.linalg.norm(self.n_state[2:4]-self.obstacle_pos1)<1:
            self.reward = -2 # collision
        if np.linalg.norm(self.n_state[0:2]-self.obstacle_pos2)<1 or np.linalg.norm(self.n_state[2:4]-self.obstacle_pos2)<1:
            self.reward = -2 # collision
        if np.linalg.norm((self.n_state[0:2]+self.n_state[2:4])/2-self.goal_pos)<1: # approximately set condition
            self.reward =  100 # achieve goal
            self._fail = True
        if np.linalg.norm(self.n_state[0:2]-self.n_state[2:4])>2.5:
            self.reward = -60 # drop the object
            self._fail = True       
        '''
        if any(x<0 for x in self.n_state):            
            self.reward = -50 # away from the map
            self._fail = True   
        if any(x>7 for x in self.n_state):            
            self.reward = -80 # away from the map
            self._fail = True 
        '''
        return self.n_state, self.reward, self._fail
    
#env = new_env() 
#state, g_pos, o_pos, o_size = env.create_env() # set the enviornment
DISCOUNT_RATE = 0.98
REPLAY_MEMORY = 10000
BATCH_SIZE = 50
MAX_EPI = 120000

# minimum epsilon for epsilon greedy
MIN_E = 0.0
# epsilon will be `MIN_E` at `EPSILON_DECAYING_EPISODE`
EPSILON_DECAYING_EPISODE = MAX_EPI * 0.5

def train_minibatch(DQN, minibatch):
    state_array = np.array([x[0] for x in minibatch])
    action_array = np.array([x[1] for x in minibatch]) # [ x among 0~3] * BATCH_SIZE
    reward_array = np.array([x[2] for x in minibatch])
    n_state_array = np.array([x[3] for x in minibatch])
    _fail_array = np.array([x[4] for x in minibatch])    
    
    X_batch = state_array   
    Y_batch = DQN.predict(state_array) # 4 elements * BATCH_SIZE 
    Q_target = reward_array + DISCOUNT_RATE*np.max(DQN.predict(n_state_array),axis=1)*~_fail_array # if fail, Q = reward
    
    Y_batch[np.arange(len(X_batch)), action_array] = Q_target
    
    # Train
    cost_batch, _ = DQN.update(X_batch, Y_batch)
    return cost_batch

def main():
    replay_buffer = deque(maxlen=REPLAY_MEMORY) # detract element from both sides    
    
    with tf.Session() as sess:
        mainDQN = dqn_cooperation.DQN(sess, INPUT_SIZE, OUTPUT_SIZE)
        mainDQN.build_network(8,32,0.002)
        init = tf.global_variables_initializer()
        sess.run(init)
        reward_accum_last100 = 0
        reward_sum = 0
        '''
        #set list for drawing graph
        arrX = []
        arrY = []
        # draw the graph
        fig = plt.figure()
        ax = plt.axes(xlim=(0,1000), ylim=(-50,100))
        '''
        MAP_SIZE = 7
        game = ENV(MAP_SIZE, [2,2], [3,4], [0,0,1,1], [5,5]) # __init__(self, map_size, obs_pos1, obs_pos2, robot_start_pos, goal_pos):
        game.render_env()
        
        for episode in range(MAX_EPI):
            e = annealing_epsilon(episode, MIN_E, 1.0, EPSILON_DECAYING_EPISODE)
            _fail = False
            count = 0 # how many moves included in an episode
            env1 = new_env()
            state, _, _, _, _= env1.create_env() # get only state, return self.state, self.goal_pos, self.obstacle_pos1, self.obstacle_pos2, self.obstacle_size
            if episode >MAX_EPI-100:
                reward_accum_last100 += reward_sum # to calculate success ratio, accumulate reward_sum
                                    
            '''
            # draw the graph lively
            x = episode
            y = reward_sum
            arrX.append(x)
            arrY.append(y)
            a= plt.plot(arrX,arrY,'g')
            fig.canvas.draw()    
            sleep(0.2)
            #a.remove()
            '''
            reward_sum = 0            
            while not _fail:
                # after sufficient learning, we present the game scene
                if episode >MAX_EPI-20:
                    game.update(state)
                    game.render_env()
                    
                count += 1
                # apply constraint about walls                   
                if np.random.rand()< e:
                    act_candi = range(ACTION_NUM)
                    if state[0]<TIME_GAP*VEL:
                        act_candi.remove(2) # remove action 2(left)
                    if state[1]<TIME_GAP*VEL:
                        act_candi.remove(1) # remove action 1(down)
                    if state[0] > MAP_SIZE - TIME_GAP*VEL:
                        act_candi.remove(3) # remove action 3(right)
                    if state[1] > MAP_SIZE - TIME_GAP*VEL:
                        act_candi.remove(0) # remove action 0(up)    
                    act_candi1 = np.array(act_candi)
                    action_l = random.sample(act_candi1,1) # choose upL, down, left, right 
                    action = action_l[0]
                else:  
                    act_candi = mainDQN.predict(state)
                    act_candi = act_candi.flatten()
                    
                    if state[0]<TIME_GAP*VEL:
                        act_candi[2] = -1000 # put a large num on action 2(left)
                    if state[1]<TIME_GAP*VEL:
                        act_candi[1] = -1000 # put a large num on action 1(down)
                    if state[0] > MAP_SIZE - TIME_GAP*VEL:
                        act_candi[3] = -1000 # remove action 3(right)
                    if state[1] > MAP_SIZE - TIME_GAP*VEL:
                        act_candi[0] = -1000 # remove action 0(up)    
                    action = np.argmax(act_candi)   
                    '''
                    dd_predict = mainDQN.predict(state).flatten()
                    aa = np.max(dd_predict)
                    max_indx, = np.where(dd_predict==aa)                    
                    action = random.sample(max_indx,1)[0]
                    '''
                n_state, reward, _fail = env1.next_step(state, action) # have to input the action 
                # if count >30, stop that episode and start new episode, counted every time one agent moves
                if count >40:
                    reward = -30
                    _fail = True
                    
                reward_sum += reward    # sum total reward and penalty about long time(-0.5)          
                replay_buffer.append((state, action, reward, n_state, _fail)) #resolve the correlation
                
                state = n_state
                           
                if len(replay_buffer)>BATCH_SIZE:
                    minibatch = random.sample(replay_buffer, BATCH_SIZE)                    
                    train_minibatch(mainDQN, minibatch)
            
            print("[Episode {:>5}]  total reward: {:>5}".format(episode, reward_sum))
    print("Success ratio: {}".format(reward_accum_last100/100))
    
        
if __name__ == "__main__":
    main()    
    end = time.time()-start
    print(end)         

