In [0]:
import tensorflow as tf      
import numpy as np           
from vizdoom import *        

import random                
import time                  
from skimage import transform

from collections import deque
import matplotlib.pyplot as plt 

import warnings 
warnings.filterwarnings('ignore') 

  from ._conv import register_converters as _register_converters


In [0]:
def create_environment():
    game = DoomGame()
    
    
    game.load_config("basic.cfg")
    
    
    game.set_doom_scenario_path("basic.wad")
    
    game.set_doom_map("map01")
    game.set_screen_resolution(vizdoom.ScreenResolution.RES_640X480)
    game.set_screen_format(vizdoom.ScreenFormat.RGB24)
    game.set_depth_buffer_enabled(True)
    game.set_labels_buffer_enabled(True)
    game.set_automap_buffer_enabled(True)
    game.set_render_hud(False)
    game.set_render_minimal_hud(False)  
    game.set_render_crosshair(False)
    game.set_render_weapon(True)
    game.set_render_decals(False)  
    game.set_render_particles(False)
    game.set_render_effects_sprites(False)  
    game.set_render_messages(False)  
    game.set_render_corpses(False)
    game.set_render_screen_flashes(True) 
    game.add_available_button(vizdoom.Button.MOVE_LEFT)
    game.add_available_button(vizdoom.Button.MOVE_RIGHT)
    game.add_available_button(vizdoom.Button.ATTACK)
    game.add_available_game_variable(vizdoom.GameVariable.AMMO2)
    game.set_episode_timeout(200)
    game.set_episode_start_time(10)
    game.set_window_visible(True)
    game.set_sound_enabled(True)
    game.set_living_reward(-1)
    game.set_mode(vizdoom.Mode.PLAYER)

#     game.set_window_visible(False)
    game.init()
    
    left = [1, 0, 0]
    right = [0, 1, 0]
    shoot = [0, 0, 1]
    possible_actions = [left, right, shoot]
    
    return game, possible_actions
       
def test_environment():
    game = DoomGame()
    game.load_config("basic.cfg")
    game.set_doom_scenario_path("basic.wad")
    game.init()
    shoot = [0, 0, 1]
    left = [1, 0, 0]
    right = [0, 1, 0]
    actions = [shoot, left, right]

    episodes = 10
    for i in range(episodes):
        game.new_episode()
        while not game.is_episode_finished():
            state = game.get_state()
            img = state.screen_buffer
            misc = state.game_variables
            action = random.choice(actions)
            print(action)
            reward = game.make_action(action)
            print ("\treward:", reward)
            time.sleep(0.02)
        print ("Result:", game.get_total_reward())
        time.sleep(2)
    game.close()

In [0]:
game, possible_actions = create_environment()

In [0]:

def preprocess_frame(frame):
   
    #print(frame.shape)
    if(len(frame.shape) != 2):
        cropped_frame = frame[30:-10,30:-30,0]
    else:
        cropped_frame = frame[30:-10,30:-30]
    #print(cropped_frame.shape)
   
    normalized_frame = cropped_frame/255.0
    
    
    preprocessed_frame = transform.resize(normalized_frame, [84,84])
    
    return preprocessed_frame

In [0]:
stack_size = 4 


stacked_frames  =  deque([np.zeros((84,84), dtype=np.int) for i in range(stack_size)], maxlen=4) 

def stack_frames(stacked_frames, state, is_new_episode):
    
    frame = preprocess_frame(state)
#     print(frame.shape)
    if is_new_episode:
        
        stacked_frames = deque([np.zeros((84,84), dtype=np.int) for i in range(stack_size)], maxlen=4)
        
        
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        
        
        stacked_state = np.stack(stacked_frames, axis=2)
        
    else:
        
        stacked_frames.append(frame)

        
        stacked_state = np.stack(stacked_frames, axis=2) 
    
    return stacked_state, stacked_frames

In [0]:

state_size = [84,84,4]      
action_size = game.get_available_buttons_size()              
learning_rate =  0.0002     


total_episodes = 500        
max_steps = 100              
batch_size = 64             


explore_start = 1.0           
explore_stop = 0.01             
decay_rate = 0.0001            


gamma = 0.95               


pretrain_length = batch_size   
memory_size = 1000000         


training = True


episode_render = False

In [0]:
class DQNetwork:
    def __init__(self, state_size, action_size, learning_rate, name='DQNetwork'):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        
        with tf.variable_scope(name):
            
            self.inputs_ = tf.placeholder(tf.float32, [None, *state_size], name="inputs")
            self.actions_ = tf.placeholder(tf.float32, [None, 3], name="actions_")
            
            
            self.target_Q = tf.placeholder(tf.float32, [None], name="target")
            
            
            self.conv1 = tf.layers.conv2d(inputs = self.inputs_,
                                         filters = 32,
                                         kernel_size = [8,8],
                                         strides = [4,4],
                                         padding = "VALID",
                                          kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                         name = "conv1")
            
            self.conv1_batchnorm = tf.layers.batch_normalization(self.conv1,
                                                   training = True,
                                                   epsilon = 1e-5,
                                                     name = 'batch_norm1')
            
            self.conv1_out = tf.nn.elu(self.conv1_batchnorm, name="conv1_out")
            ## --> [20, 20, 32]
            
       
            self.conv2 = tf.layers.conv2d(inputs = self.conv1_out,
                                 filters = 64,
                                 kernel_size = [4,4],
                                 strides = [2,2],
                                 padding = "VALID",
                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                 name = "conv2")
        
            self.conv2_batchnorm = tf.layers.batch_normalization(self.conv2,
                                                   training = True,
                                                   epsilon = 1e-5,
                                                     name = 'batch_norm2')

            self.conv2_out = tf.nn.elu(self.conv2_batchnorm, name="conv2_out")
            ## --> [9, 9, 64]

            self.conv3 = tf.layers.conv2d(inputs = self.conv2_out,
                                 filters = 128,
                                 kernel_size = [4,4],
                                 strides = [2,2],
                                 padding = "VALID",
                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                 name = "conv3")
        
            self.conv3_batchnorm = tf.layers.batch_normalization(self.conv3,
                                                   training = True,
                                                   epsilon = 1e-5,
                                                     name = 'batch_norm3')

            self.conv3_out = tf.nn.elu(self.conv3_batchnorm, name="conv3_out")
            ## --> [3, 3, 128]
            
            
            self.flatten = tf.layers.flatten(self.conv3_out)
            ## --> [1152]
            
            
            self.fc = tf.layers.dense(inputs = self.flatten,
                                  units = 512,
                                  activation = tf.nn.elu,
                                       kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                name="fc1")
            
            
            self.output = tf.layers.dense(inputs = self.fc, 
                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                          units = 3, 
                                        activation=None)


            self.Q = tf.reduce_sum(tf.multiply(self.output, self.actions_), axis=1)
            
            
            
            self.loss = tf.reduce_mean(tf.square(self.target_Q - self.Q))
            
            self.optimizer = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss)

In [0]:
tf.reset_default_graph()

print(state_size)
DQNetwork = DQNetwork(state_size, action_size, learning_rate)

[84, 84, 4]

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.batch_normalization instead.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.


In [0]:
class DDDQNNet:
    def __init__(self, state_size, action_size, learning_rate, name):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        self.name = name
        
        
        
        with tf.variable_scope(self.name):
            
            
            self.inputs_ = tf.placeholder(tf.float32, [None, *state_size], name="inputs")
            
            
            self.ISWeights_ = tf.placeholder(tf.float32, [None,1], name='IS_weights')
            
            self.actions_ = tf.placeholder(tf.float32, [None, action_size], name="actions_")
            
            
            self.target_Q = tf.placeholder(tf.float32, [None], name="target")
            

            self.conv1 = tf.layers.conv2d(inputs = self.inputs_,
                                         filters = 32,
                                         kernel_size = [8,8],
                                         strides = [4,4],
                                         padding = "VALID",
                                          kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                         name = "conv1")
            
            self.conv1_out = tf.nn.elu(self.conv1, name="conv1_out")
            
            
            
            self.conv2 = tf.layers.conv2d(inputs = self.conv1_out,
                                 filters = 64,
                                 kernel_size = [4,4],
                                 strides = [2,2],
                                 padding = "VALID",
                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                 name = "conv2")

            self.conv2_out = tf.nn.elu(self.conv2, name="conv2_out")

            self.conv3 = tf.layers.conv2d(inputs = self.conv2_out,
                                 filters = 128,
                                 kernel_size = [4,4],
                                 strides = [2,2],
                                 padding = "VALID",
                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                 name = "conv3")

            self.conv3_out = tf.nn.elu(self.conv3, name="conv3_out")
            
            
            self.flatten = tf.layers.flatten(self.conv3_out)
            
            
            
            self.value_fc = tf.layers.dense(inputs = self.flatten,
                                  units = 512,
                                  activation = tf.nn.elu,
                                       kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                name="value_fc")
            
            self.value = tf.layers.dense(inputs = self.value_fc,
                                        units = 1,
                                        activation = None,
                                        kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                name="value")
            
            
            self.advantage_fc = tf.layers.dense(inputs = self.flatten,
                                  units = 512,
                                  activation = tf.nn.elu,
                                       kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                name="advantage_fc")
            
            self.advantage = tf.layers.dense(inputs = self.advantage_fc,
                                        units = self.action_size,
                                        activation = None,
                                        kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                name="advantages")
            
            
            self.output = self.value + tf.subtract(self.advantage, tf.reduce_mean(self.advantage, axis=1, keepdims=True))
              
            
            self.Q = tf.reduce_sum(tf.multiply(self.output, self.actions_), axis=1)
            
            
            self.absolute_errors = tf.abs(self.target_Q - self.Q)
            
            self.loss = tf.reduce_mean(self.ISWeights_ * tf.squared_difference(self.target_Q, self.Q))
            
            self.optimizer = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss)

In [0]:
tf.reset_default_graph()


DQNetwork = DDDQNNet(state_size, action_size, learning_rate, name="DQNetwork")


TargetNetwork = DDDQNNet(state_size, action_size, learning_rate, name="TargetNetwork")


In [0]:
class Memory():
    def __init__(self, max_size):
        self.buffer = deque(maxlen = max_size)
    
    def add(self, experience):
        self.buffer.append(experience)
    
    def sample(self, batch_size):
        buffer_size = len(self.buffer)
        index = np.random.choice(np.arange(buffer_size),
                                size = batch_size,
                                replace = False)
        
        return [self.buffer[i] for i in index]

In [0]:

memory = Memory(max_size = memory_size)

game.new_episode()

for i in range(pretrain_length):
    if i == 0:
        state = game.get_state().screen_buffer
        state, stacked_frames = stack_frames(stacked_frames, state, True)
    
    
    
    action = random.choice(possible_actions)
    
    
    reward = game.make_action(action)
    
    
    done = game.is_episode_finished()
    
    
    if done:
        
        next_state = np.zeros(state.shape)
        
        
        memory.add((state, action, reward, next_state, done))
        
        
        game.new_episode()
        
        
        state = game.get_state().screen_buffer
        
        
        state, stacked_frames = stack_frames(stacked_frames, state, True)
        
    else:
        
        next_state = game.get_state().screen_buffer
        next_state, stacked_frames = stack_frames(stacked_frames, next_state, False)

        memory.add((state, action, reward, next_state, done))

        state = next_state

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


In [0]:
writer = tf.summary.FileWriter("./tensorboard/dqn/1")

tf.summary.scalar("Loss", DQNetwork.loss)

write_op = tf.summary.merge_all()

In [0]:
def predict_action(explore_start, explore_stop, decay_rate, decay_step, state, actions):
    
    exp_exp_tradeoff = np.random.rand()

    explore_probability = explore_stop + (explore_start - explore_stop) * np.exp(-decay_rate * decay_step)
    
    if (explore_probability > exp_exp_tradeoff):
        
        action = random.choice(possible_actions)
        
    else:
        
        Qs = sess.run(DQNetwork.output, feed_dict = {DQNetwork.inputs_: state.reshape((1, *state.shape))})
        
        
        choice = np.argmax(Qs)
        action = possible_actions[int(choice)]
                
    return action, explore_probability

In [0]:
saver = tf.train.Saver()

In [0]:

saver = tf.train.Saver()

if training == True:
    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())
         
        decay_step = 0

        
        game.init()

        for episode in range(total_episodes):
            
            step = 0

            episode_rewards = []
       
            game.new_episode()
            state = game.get_state().screen_buffer
            
            .
            state, stacked_frames = stack_frames(stacked_frames, state, True)

            while step < max_steps:
                step += 1
                
               
                decay_step +=1
                
                
                action, explore_probability = predict_action(explore_start, explore_stop, decay_rate, decay_step, state, possible_actions)

                
                reward = game.make_action(action)

                
                done = game.is_episode_finished()
                
                
                episode_rewards.append(reward)

                
                if done:
                    
                    next_state = np.zeros((84,84), dtype=np.int)
                    next_state, stacked_frames = stack_frames(stacked_frames, next_state, False)

                    
                    step = max_steps

            
                    total_reward = np.sum(episode_rewards)

                    print('Episode: {}'.format(episode),
                              'Total reward: {}'.format(total_reward),
                              'Training loss: {:.4f}'.format(loss),
                              'Explore P: {:.4f}'.format(explore_probability))

                    memory.add((state, action, reward, next_state, done))

                else:
                   
                    next_state = game.get_state().screen_buffer
                    
                    
                    next_state, stacked_frames = stack_frames(stacked_frames, next_state, False)
                    

                    
                    memory.add((state, action, reward, next_state, done))
                    
                   
                    state = next_state


               
                batch = memory.sample(batch_size)
                states_mb = np.array([each[0] for each in batch], ndmin=3)
                actions_mb = np.array([each[1] for each in batch])
                rewards_mb = np.array([each[2] for each in batch]) 
                next_states_mb = np.array([each[3] for each in batch], ndmin=3)
                dones_mb = np.array([each[4] for each in batch])
#                 print(batch[0][0].shape)
#                 print(len(batch[0][1]))
#                 print(batch[0][2])
#                 print(batch[0][3].shape)
#                 print(batch[0][4])
                target_Qs_batch = []

                Qs_next_state = sess.run(DQNetwork.output, feed_dict = {DQNetwork.inputs_: next_states_mb})
                
                
                for i in range(0, len(batch)):
                    terminal = dones_mb[i]

                   
                    if terminal:
                        target_Qs_batch.append(rewards_mb[i])
                        
                    else:
                        target = rewards_mb[i] + gamma * np.max(Qs_next_state[i])
                        target_Qs_batch.append(target)
                        

                targets_mb = np.array([each for each in target_Qs_batch])

                loss, _ = sess.run([DQNetwork.loss, DQNetwork.optimizer],
                                    feed_dict={DQNetwork.inputs_: states_mb,
                                               DQNetwork.target_Q: targets_mb,
                                               DQNetwork.actions_: actions_mb})

                
                summary = sess.run(write_op, feed_dict={DQNetwork.inputs_: states_mb,
                                                   DQNetwork.target_Q: targets_mb,
                                                   DQNetwork.actions_: actions_mb})
                writer.add_summary(summary, episode)
                writer.flush()

            
            if episode % 5 == 0:
                save_path = saver.save(sess, "./models/model.ckpt")
                print("Model Saved")

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 22 Total reward: 27.0 T

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 48 Total reward: 52.0 Training loss: 3.3715 Explore P: 0.6822
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 49 Total reward: 91.0 Training loss: 4.3172 Explore P: 0.68

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 59 Total reward: 10.0 Training loss: 4.6223 Explore P: 0.6416
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 66 Total reward: 91.0 Training loss: 7.4085 Explore P: 0.6238
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 71 Total reward: 45.0 Training loss: 2.8194 Explore P: 0.6034
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 95 Total reward: 60.0 Training loss: 8.8471 Explore P: 0.5338
Model Saved

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 102 Total reward: 26.0 Training loss: 5.7208 Explore P: 0.5196
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 110 Total reward: 53.0 Training loss: 6.4330 Explore P: 0.5042
Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 117 Total reward: 3.0 Training loss: 8.6053 Explore P: 0.4895
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 118 Total reward: 89.0 Training loss: 14.6967 Explore P: 0.4889
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 127 Total reward: 59.0 Training loss: 4.7437 Explore P: 0.4756
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 135 Total reward: 28.0 Training loss: 5.3048 Explore P: 0.4623
Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 142 Total reward: 60.0 Training loss: 5.3311 Explore P: 0.4486
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 149 Total reward: 89.0 Training loss: 9.9605 Explore P: 0.4358
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 157 Total reward: 56.0 Training loss: 14.0050 Explore P: 0.4228
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640

(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 164 Total reward: 77.0 Training loss: 11.3887 Explore P: 0.4113
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 165 Total reward: 90.0 Training loss: 5.9001 Explore P: 0.4109
Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 6

(84, 84)
(44, 24)
Episode: 171 Total reward: 39.0 Training loss: 3.7736 Explore P: 0.3994
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 196 Total reward: 48.0 Training loss: 26.3125 Explore P: 0.3656
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 206 Total reward: 63.0 Training loss: 5.9469 Explore P: 0.3548
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 216 Total reward: 63.0 Training loss: 4.2995 Explore P: 0.3459
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 234 Total reward: 48.0 Training loss: 6.4232 Explore P: 0.3259
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 243 Total reward: 44.0 Training loss: 6.4814 Explore P: 0.3173
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 251 Total reward: 64.0 Training loss: 4.3040 Explore P: 0.3075
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580

(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 259 Total reward: 44.0 Training loss: 5.0174 Explore P: 0.2996
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 267 Total reward: 16.0 Training loss: 3.1992 Explore P: 0.2907
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 268 Total reward: 91.0 Training loss: 6.0912 Explore P: 0.2904
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 276 Total reward: 85.0 Training loss: 3.6016 Explore P: 0.2828
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 285 Total reward: 27.0 Training loss: 6.3637 Explore P: 0.2749
Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 286 Total reward: 86.0 Training loss: 13.2995 Explore P: 0.2745
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 6

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 295 Total reward: 67.0 Training loss: 4.3453 Explore P: 0.2669
Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 296 Total reward: 91.0 Training loss: 7.7180 Explore P: 0.2666
(480, 640, 3)
(440, 580)
(480, 64

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 305 Total reward: 80.0 Training loss: 5.3167 Explore P: 0.2600
Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 306 Total reward: 85.0 Training loss: 4.9090 Explore P: 0.2596
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 64

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 317 Total reward: 69.0 Training loss: 4.4212 Explore P: 0.2529
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 328 Total reward: 68.0 Training loss: 8.0641 Explore P: 0.2457
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 337 Total reward: 54.0 Training loss: 6.2843 Explore P: 0.2390
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 338 Total reward: 81.0 Training loss: 6.8720 Explore P: 0.

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 346 Total reward: 65.0 Training loss: 3.5062 Explore P: 0.2320
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 357 Total reward: 91.0 Training loss: 5.5025 Explore P: 0.2261
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 358 Total reward: 82.0 Training loss: 3.7850 Explore P: 0.2257
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 367 Total reward: 58.0 Training loss: 4.0219 Explore P: 0.2196
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 368 Total reward: 91.0 Training l

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 375 Total reward: -16.0 Training loss: 6.7987 Explore P: 0.2137
Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 58

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 383 Total reward: 63.0 Training loss: 4.5497 Explore P: 0.2077
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 391 Total reward: 48.0 Training loss: 8.5493 Explore P: 0.2016
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 401 Total reward: 69.0 Training loss: 3.0075 Explore P: 0.1960
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580

Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 411 Total reward: 61.0 Training loss: 3.6439 Explore P: 0.1905
(480, 640, 3)
(440, 580

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 421 Total reward: 15.0 Training loss: 11.1848 Explore P: 0.1858
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)


(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 430 Total reward: 64.0 Training loss: 4.3108 Explore P: 0.1806
Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 431 Total reward: 81.0 Training loss: 8.7033 Explore P: 0.1803
(480, 64

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 441 Total reward: 24.0 Training loss: 3.8546 Explore P: 0.1757
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 442 Total reward: 88.0 Training loss: 5.1525 Explore P: 0.1755
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 452 Total reward: 40.0 Training loss: 8.9069 Explore P: 0.1707
(480, 640,

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 464 Total reward: 91.0 Training loss: 4.8019 Explore P: 0.1667
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640,

Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 476 Total reward: 66.0 Training loss: 10.1523 Explore P: 0.1619
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 58

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 485 Total reward: 62.0 Training loss: 8.7027 Explore P: 0.1578
Model Saved
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 486 Total reward: 91.0 Training loss: 7.1437 Explore P: 0.1577
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 64

(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(44, 24)
Episode: 497 Total reward: 64.0 Training loss: 6.6021 Explore P: 0.1534
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(480, 640, 3)
(440, 580)
(84, 84)
(

In [0]:
with tf.Session() as sess:
    
    game, possible_actions = create_environment()
    
    totalScore = 0

    saver.restore(sess, "./models/model.ckpt")
    game.init()
    for i in range(1):
        
        done = False
        
        game.new_episode()
        
        state = game.get_state().screen_buffer
        state, stacked_frames = stack_frames(stacked_frames, state, True)
            
        while not game.is_episode_finished():
            Qs = sess.run(DQNetwork.output, feed_dict = {DQNetwork.inputs_: state.reshape((1, *state.shape))})
            
            choice = np.argmax(Qs)
            action = possible_actions[int(choice)]
            
            game.make_action(action)
            done = game.is_episode_finished()
            score = game.get_total_reward()
            
            if done:
                break  
                
            else:
                #print("else")
                next_state = game.get_state().screen_buffer
                next_state, stacked_frames = stack_frames(stacked_frames, next_state, False)
                state = next_state
                
        score = game.get_total_reward()
        print("Score: ", score)
    game.close()

INFO:tensorflow:Restoring parameters from ./models/model.ckpt
Score:  83.0
