**Proximal Policy Optimization (PPO)**

Implementation on Survival Gridworld Environment

In [1]:
import tensorflow as tf
import numpy as np
import cv2
import math
import collections
import environment as _env
import netfunctions as nf
import os

In [2]:
# Define use of value clipping and the hyperparameter
use_clip = True
clip_val = -0.80

In [3]:
# Define environment with infinite resource
env_inf_resource = False

In [4]:
def network_model(net_input, input_shape, num_output, alias='model'):
        
    # Get input shape
    wid = input_shape[0] #width
    hgt = input_shape[1] #height
    chl = input_shape[2] #channels
    
        # Build neural network
    network = nf.NetFunctions(alias)
        
    net = tf.reshape(net_input, shape=[-1, wid, hgt, chl])                            #input layer
    net = network.conv_layer(net, num_filters=16, filter_size=4, use_pooling=True)    #convolutional layer 1
    net = network.conv_layer(net, num_filters=32, filter_size=4, use_pooling=True)    #convolutional layer 2
    net = network.flatten(net)                                                        #flatten input
    net = network.dense_layer(net, num_units=64, use_relu=True)                       #dense layer 1
    net = network.dense_layer(net, num_units=128, use_relu=True)                      #dense layer 2
    net = network.dense_layer(net, num_units=num_output, use_relu=False)              #output layer
        
    return net

In [5]:
class PolicyEstimator:
    
    def __init__(self, input_shape, num_output, epsilon=0.1, beta=0.01, grad_clip=5.0, checkpoint_dir=None):
        
        # Get checkpoint directory
        self.checkpoint_dir = checkpoint_dir
        
        # Initialize input place holder
        self.states = tf.placeholder(tf.float32, 
                                     shape=[None, input_shape[0], input_shape[1], input_shape[2]], 
                                     name='input')
        
        # Initialize output placeholder
        self.targets = tf.placeholder(tf.float32, shape=[None], name='target_value')
        
        # Initialize policy action placeholder
        self.actions = tf.placeholder(tf.int32, shape=[None], name='actions')
        
        # Initialize batch size for the policy loss
        self.batch_size = tf.placeholder(tf.int32, shape=None, name='batch_size')

        # Initialize learning rate placeholder
        self.learning_rate = tf.placeholder(tf.float32, shape=None, name='alpha')
        
        # Initialize load or save global counter
        self.count_states = tf.Variable(initial_value=0, dtype=tf.int64,name='count_states')
        
        # Tensorflow operation for increasing count_states
        self.count_states_increase = tf.assign(self.count_states, self.count_states + 1)
        
        # Get size of the output layer
        self.num_output = num_output
        
        # Initialize policy network
        self.net_policy, self.net_params = self.policy_model(net_input=self.states,
                                                             input_shape=input_shape,
                                                             num_output=self.num_output,
                                                             alias='policy_model')
        # Initialize old policy
        self.old_policy, self.old_params = self.policy_model(net_input=self.states,
                                                             input_shape=input_shape,
                                                             num_output=self.num_output,
                                                             alias='old_policy')
        
        # Copy parameters from net_policy to old_policy
        self.copy_params_op = [oldpi.assign(pi) for pi, oldpi in zip(self.net_params, self.old_params)]
        
        # Set entropy function for exploration
        entropy = -tf.reduce_sum(self.net_policy * tf.log(self.net_policy), 1)
        
        # Get prediction for the chosen action from currrent and old policy 
        gather_idx = tf.range(self.batch_size) * self.num_output + self.actions
        pi_probs = tf.gather(tf.reshape(self.net_policy, [-1]), gather_idx)
        oldpi_probs = tf.gather(tf.reshape(self.old_policy, [-1]), gather_idx)

        # Set proximal policy optimization loss
        ratio = pi_probs / (oldpi_probs + 1e-6)
        loss_cpi = ratio * self.targets
        loss_ppo = tf.minimum(loss_cpi, tf.clip_by_value(ratio, 1.0 - epsilon, 1.0 + epsilon) * self.targets)
        loss_ppo_s = loss_ppo + beta * entropy
        self.loss = -tf.reduce_sum(loss_ppo_s)
        
        # Optimizer for the loss function    
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        self.grads_and_vars = self.optimizer.compute_gradients(self.loss)
        self.grads_and_vars = [[grad, var] for grad, var in self.grads_and_vars if grad is not None]
        gradients, variables = zip(*self.grads_and_vars)
        # Clip gradients
        gradients, _ = tf.clip_by_global_norm(gradients, grad_clip)
        self.opt_train = self.optimizer.apply_gradients(list(zip(gradients, variables)))
        
        # Used for saving and loading checkpoints
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
        
        # Create a tensorflow session
        self.session = tf.Session()
        
        # Initialize tensorflow variables
        self.initialize()
        
    def initialize(self):
        
        if (self.checkpoint_dir != None):
            # Load the most recent checkpoint if it exists,
            # Otherwise initialize all the variables in the tensorflow graph
            self.load_checkpoint(self.checkpoint_dir)
        else:
            # Initialize all the variables for the TensorFlow graph
            self.session.run(tf.global_variables_initializer())
    
    def policy_model(self, net_input, input_shape, num_output, alias):
        
        with tf.variable_scope(alias):
            # Get network model output layer
            net_logits = network_model(net_input, input_shape, num_output, alias)
            softmax = tf.nn.softmax(net_logits)
            output = tf.clip_by_value(softmax, 1e-6, 1.0)
        
        params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=alias)
        
        return output, params
        
    def load_checkpoint(self, checkpoint_dir):
        
        # Load all variables of the tensorflow graph from a checkpoint
        # If the checkpoint does not exist, then initialize all variables
        
        try:
            print("Trying to restore last checkpoint ...")

            # Use TensorFlow to find the latest checkpoint if any
            last_check_path = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir)

            # Try and load the data in the checkpoint
            self.saver.restore(self.session, save_path=last_check_path)
            print("Restored checkpoint from:", last_check_path)
        except:
            # If there is no checkpoint found,
            # initialize all the variables for the TensorFlow graph
            print("Failed to restore checkpoint from:", checkpoint_dir)
            print("Initializing variables instead.")
            self.session.run(tf.global_variables_initializer())
            
            # Create the checkpoint directory if it does not exist
            if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir)
            
    def save_checkpoint(self, checkpoint_dir, current_iteration):
        
        # Create the checkpoint directory if it does not exist
        if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir)
        
        # Save all variables of the TensorFlow graph to a checkpoint
        self.saver.save(self.session, save_path=checkpoint_dir, global_step=current_iteration)
        
        print("*****Policy_Net Model Saved Checkpoint...")
    
    def increase_count_states(self):
        
        #Increase the number of states that has been processed by the network
        return self.session.run(self.count_states_increase)
    
    def optimize(self, training_data, training_label, action_label, learning_rate, batch_size, epoch=1, checkpoint_dir=None):
        
        # Initialize iterations
        count_states = self.session.run(self.count_states)          
        iterations = count_states
        
        # Get total batch
        batch_num = 0
        total_batch = len(training_data) // batch_size
            
        for i in range(iterations, iterations + total_batch * epoch):
            
            # Get batch per iteration
            batch_start = batch_size * batch_num
            batch_end = batch_start + batch_size
            
            # Placeholder for variables in the tensorflow graph
            feed_dict_train = {self.states: training_data[batch_start:batch_end], 
                               self.targets: training_label[batch_start:batch_end],
                               self.actions: action_label[batch_start:batch_end],
                               self.batch_size: batch_size,
                               self.learning_rate: learning_rate}
            
            # Run optimizer
            self.session.run(self.opt_train, feed_dict=feed_dict_train)
            
            # Append batch number
            batch_num += 1
            if(batch_num % total_batch == 0): batch_num = 0
            
            # Increase the counter for the number of states that have been processed
            count_states = self.increase_count_states()
            
        #ENDFOR###################################################################################
        
        # Update old_policy
        self.session.run(self.copy_params_op)
        
        if(checkpoint_dir != None):
            # Save a checkpoint of the Neural Network so we can reload it
            self.save_checkpoint(checkpoint_dir, count_states - 1)
        
        return count_states - 1
        
    def get_estimate(self, test_input):
        
        # Get network input
        state_input = [test_input]
        
        # Get output estimate
        policy_output = self.session.run(self.old_policy, feed_dict={self.states: state_input})
        estimate = policy_output[0]
        
        return estimate
    
    """END OF POLICYESTIMATOR CLASS"""

In [6]:
class ValueEstimator:
    
    def __init__(self, input_shape, num_output, checkpoint_dir=None):
        
        # Get checkpoint directory
        self.checkpoint_dir = checkpoint_dir
        
        # Initialize input place holder
        self.states = tf.placeholder(tf.float32, 
                                shape=[None, input_shape[0], input_shape[1], input_shape[2]], 
                                name='input')
        
        # Initialize output placeholder
        self.targets = tf.placeholder(tf.float32, shape=[None], name='target_value')

        # Initialize learning rate placeholder
        self.learning_rate = tf.placeholder(tf.float32, shape=None, name='alpha')
        
        # Initialize load or save global counter
        self.count_states = tf.Variable(initial_value=0, dtype=tf.int64, name='count_states')
        
        # Tensorflow operation for increasing count_states
        self.count_states_increase = tf.assign(self.count_states, self.count_states + 1)
        
        # Get size of the output layer
        self.num_output = num_output
        
        # Initialize value network
        self.net_value = self.value_model(net_input=self.states,
                                           input_shape=input_shape,
                                           num_output=self.num_output,
                                           alias='value_model')
        
        # Define loss function
        logits = tf.squeeze(self.net_value, axis=[1])
        squared_error = tf.squared_difference(logits, self.targets)
        self.loss = tf.reduce_mean(squared_error)
        
        # Optimizer for the loss function
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        self.opt_train = self.optimizer.minimize(self.loss)
        
        # Used for saving and loading checkpoints
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
        
        # Create a tensorflow session
        self.session = tf.Session()
        
        # Initialize tensorflow variables
        self.initialize()
        
    def initialize(self):
        
        if (self.checkpoint_dir != None):
            # Load the most recent checkpoint if it exists,
            # Otherwise initialize all the variables in the tensorflow graph
            self.load_checkpoint(self.checkpoint_dir)
        else:
            # Initialize all the variables for the TensorFlow graph
            self.session.run(tf.global_variables_initializer())
    
    def value_model(self, net_input, input_shape, num_output, alias):
        
        with tf.variable_scope(alias):
            # Get network model output layer
            net_logits = network_model(net_input, input_shape, num_output, alias)
            output = net_logits
        
        return output
        
    def load_checkpoint(self, checkpoint_dir):
        
        # Load all variables of the tensorflow graph from a checkpoint
        # If the checkpoint does not exist, then initialize all variables
        
        try:
            print("Trying to restore last checkpoint ...")

            # Use TensorFlow to find the latest checkpoint if any
            last_check_path = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir)

            # Try and load the data in the checkpoint
            self.saver.restore(self.session, save_path=last_check_path)
            print("Restored checkpoint from:", last_check_path)
        except:
            # If there is no checkpoint found,
            # initialize all the variables for the TensorFlow graph
            print("Failed to restore checkpoint from:", checkpoint_dir)
            print("Initializing variables instead.")
            self.session.run(tf.global_variables_initializer())
            
            # Create the checkpoint directory if it does not exist
            if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir)
            
    def save_checkpoint(self, checkpoint_dir, current_iteration):
        
        # Create the checkpoint directory if it does not exist
        if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir)
        
        # Save all variables of the TensorFlow graph to a checkpoint
        self.saver.save(self.session, save_path=checkpoint_dir, global_step=current_iteration)
        
        print("*****Value_Net Model Saved Checkpoint...")
    
    def increase_count_states(self):
        
        #Increase the number of states that has been processed by the network
        return self.session.run(self.count_states_increase)
    
    def optimize(self, training_data, training_label, learning_rate, batch_size, epoch=1, checkpoint_dir=None):
        
        # Initialize iterations
        count_states = self.session.run(self.count_states)          
        iterations = count_states
        
        # Get total batch
        batch_num = 0
        total_batch = len(training_data) // batch_size
            
        for i in range(iterations, iterations + total_batch * epoch):
            
            # Get batch per iteration
            batch_start = batch_size * batch_num
            batch_end = batch_start + batch_size

            # Placeholder for variables in the tensorflow graph.
            feed_dict_train = {self.states: training_data[batch_start:batch_end], 
                               self.targets: training_label[batch_start:batch_end],
                               self.learning_rate: learning_rate}
            
            # Run optimizer
            self.session.run(self.opt_train, feed_dict=feed_dict_train)
            
            # Append batch number
            batch_num += 1
            if(batch_num % total_batch == 0): batch_num = 0
            
            # Increase the counter for the number of states that have been processed
            count_states = self.increase_count_states()
            
        #ENDFOR###################################################################################
        
        if(checkpoint_dir != None):
            # Save a checkpoint of the Neural Network so we can reload it
            self.save_checkpoint(checkpoint_dir, count_states - 1)
        
        return count_states - 1
        
    def get_estimate(self, test_input):
        
        # Get network input
        state_input = [test_input]
        
        # Get output estimate
        value_output = self.session.run(self.net_value, feed_dict={self.states: state_input})
        estimate = value_output[0]
        
        return estimate
    
    """END OF VALUEESTIMATOR CLASS"""

In [7]:
class LogFile:
    
    def __init__(self, save_dir):
        
        # Initialize save directory folder
        self.save_dir = save_dir
        
        # Initialize save directory
        self.create_save_dir()
        
    def create_save_dir(self):
        
        # Create the save directory if it does not already exist
        if not os.path.exists(self.save_dir):
            os.makedirs(self.save_dir)
            
    def create_file(self, file_name):
    
        # Get directory and filename
        save_filename = self.save_dir + '/ ' + file_name
        
        # Initialize write lines container
        get_lines = []
        
        # Check if file already exists
        try:
            # Using concept manager in read mode
            with open(save_filename, 'r') as rf:
                get_lines = rf.readlines()
                file_exist = True
        # If not then create new file
        except: 
            # Using context manager in write mode to create file
            with open(save_filename, 'w') as wf:
                get_lines.append('Current Global Steps=0\n')
                get_lines.append('Current Optimization=0\n')
                wf.writelines(get_lines)
                file_exist = False
        
        return file_exist
        
    def save_data(self, file_name, data):
        
        # Data structure:
        # data[0] = Global steps
        # data[1] = Optimization number
        # data[2] = Number of success
        # data[3] = Number of attempts
        # data[4] = Episode score
        # data[5] = Rewards mean
        # data[6] = Mean of minimum distance to finish
        
        # Get directory and filename
        save_filename = self.save_dir + '/ ' + file_name
        
        # Initialize read lines container
        get_lines = []
        
        # If file exists
        try:
            # Using concept manager in read mode
            with open(save_filename, 'r') as rf:
                get_lines = rf.readlines()
            
            # Using context manager in write mode
            with open(save_filename, 'w') as wf:
                get_lines[0] = 'Current Global Steps=' + str(int(data[0])) + '\n'
                get_lines[1] = 'Current Optimization=' + str(int(data[1])) + '\n'
                stp_msg = 'Global_Steps=' + str(int(data[0])) + ' '
                opt_msg = 'Optimization=' + str(int(data[1])) + ' '
                suc_msg = 'Success_Rate=' + str(int(data[2])) + '/' + str(int(data[3])) + ' '
                scr_msg = 'Score_Mean=' + str(data[4]) + ' '
                rwd_msg = 'Rewards_Mean=' + str(data[5]) + ' '
                dst_msg = 'Min_Dist_to_Finish_Mean=' + str(data[6]) + ' '
                lgt_msg = 'Episode_Length=' + str(data[7]) + '\n'
                get_msg = stp_msg + opt_msg  + suc_msg + scr_msg + rwd_msg + dst_msg + lgt_msg
                get_lines.append(get_msg)
                wf.writelines(get_lines)
                print('logfile on global steps=' + str(data[0]) + ' saved...')
            
        # If file does not exist yet
        except:
            print('ERROR: File object does not exist')
            
    def get_header_info(self, file_name):
        
        # Get directory and filename
        save_filename = self.save_dir + '/ ' + file_name
        
        # Initialize read lines container
        get_lines = []
        
        # Initialize return variables
        stp = -1
        opt = -1
        
        # If file exists
        try:
            # Using concept manager in read mode
            with open(save_filename, 'r') as rf:
                get_lines = rf.readlines()
                
            stp = get_lines[0].split("=")
            opt = get_lines[1].split("=")
                
        # If file does not exist yet
        except:
            print('ERROR: File object does not exist')
            
        return (int(stp[1]), int(opt[1]))
    
    def get_saved_data(self, file_name):
        
        # Get directory and filename
        save_filename = self.save_dir + '/ ' + file_name
        
        # Initialize read lines container
        get_lines = []
        
        # Initialize return array
        data = [-1]
        
        # If file exists
        try:
            # Using concept manager in read mode
            with open(save_filename, 'r') as rf:
                get_lines = rf.readlines()
                
            # Reinitialize return array
            data = [[0 for c in range(8)] for r in range(len(get_lines) - 2)]
            
            # Loop through data lines
            for i in range(2, len(get_lines)):
                split_line = get_lines[i].split(" ")
                
                # Get inividual data
                stp = split_line[0].split('=')
                opt = split_line[1].split('=')
                div_srt = split_line[2].split('=')
                srt = div_srt[1].split("/")
                scr = split_line[3].split('=')
                rwd = split_line[4].split('=')
                mdf = split_line[5].split('=')
                lgt = split_line[6].split('=')
                
                # Store data in the return array
                data[i - 2][0] = int(stp[1])
                data[i - 2][1] = int(opt[1])
                data[i - 2][2] = int(srt[0])
                data[i - 2][3] = int(srt[1])
                data[i - 2][4] = float(scr[1])
                data[i - 2][5] = float(rwd[1])
                data[i - 2][6] = float(mdf[1])
                data[i - 2][7] = float(lgt[1])
                
        # If file does not exist yet
        except:
            print('ERROR: File object does not exist')
            
        return data
    
        """END OF LOGFILE CLASS"""

In [8]:
Experience = collections.namedtuple("Experience", ["state", "action", "reward", "next_state", "end_episode"])

In [9]:
Target = collections.namedtuple("Target", ["states", "policy_targets", "value_targets", "actions"])

In [10]:
class Worker:
    
    def __init__(self, env, policy_net, value_net, steps=5, discount_factor=0.99):
        
        # Set policy network
        self.policy_net = policy_net
        
        # Set value network
        self.value_net = value_net
        
        # Set action size
        self.action_size = 4
        
        # Initialize environment
        self.env = env
        
        # Set agent start energy
        self.start_energy = 10.0
        
        # Initialize state array
        self.state_array = []
        
        # Define model input shape
        self.input_shape = [16, 16, 4]
        
        # Set step size for training
        self.steps = steps
        
        # Set discount factor for experience evaluation
        self.discount_factor = discount_factor
        
    def set_env(self, setup_tup):
        
        gridmatrix = setup_tup[0]
        delta_s = setup_tup[1]
        start = setup_tup[2]
        end = setup_tup[3]
        
        self.env.gridworld.custom_environment(gridmatrix, delta_s, start, end)
        
    def run_steps(self, is_training, render=False):
        
        # Initialize experience array
        experience = []

        # Initialize step counter
        step_counter = 0
        
        # Initialize test statistics
        rewards = 0
        distance = 1000
        score = 0
        success = 0
        length = 0
        
        # Reset environment and get initial state
        if(not(is_training)):
            self.state_array = self.env.reset(initial_energy=self.start_energy)

        while (step_counter < self.steps):
            
            # Render game environment during testing
            if(not(is_training) and render): self.env.render(delay=300)
                
            # Using the policy network to estimate the action probabilites
            p_value = self.policy_net.get_estimate(self.state_array)
            
            # Choose action according to p_value probabilities
            action = np.random.choice(a=np.arange(self.action_size), p=p_value)
            
            # Take a step in the environment
            next_state, path_reward, reward, end_episode = self.env.step(action)
            
            # Store to experience array
            experience.append(Experience(self.state_array, action, reward, next_state, end_episode))
            
            if(is_training): 
                # Append step_counter
                step_counter += 1
            else:
                # Get statistics
                rewards += reward
                length = self.env.step_count
                
                # Get minimum distance to finish
                dsx = math.pow(self.env.endpoint[0] - self.env.control_position[0], 2)
                dsy = math.pow(self.env.endpoint[1] - self.env.control_position[1], 2)
                dst = math.sqrt(dsx + dsy)
                if(dst < distance): distance = int(dst)
                
            if(end_episode):                
                if(not(is_training)):  
                    if(self.env.get_success()): success = 1
                        
                    # Get final score
                    score = self.env.get_score()
                        
                    if(render):
                        self.env.render(delay=1000)
                        self.env.close_render()
                        
                    # Exit the loop
                    break
                else:                                  
                    # Reset environment
                    self.state_array = self.env.reset(initial_energy=self.start_energy)
            else:
                self.state_array = next_state
                    
        #ENDWHILE#########################################################
        
        return experience, [success, score, rewards, distance, length]
    
    def get_target(self, experience):
        
        exp_len = len(experience)
        
        # Initialize containers
        states = np.zeros(shape=[exp_len] + self.input_shape, dtype=np.float)
        policy_targets = np.zeros(shape=[exp_len], dtype=np.float)
        value_targets = np.zeros(shape=[exp_len], dtype=np.float)
        actions = np.zeros(shape=[exp_len], dtype=np.int)
        
        # Initialize reward iteration
        reward = self.value_net.get_estimate(experience[-1].next_state)

        # Accumulate minibatch
        for i in reversed(range(exp_len)):
            if(experience[i].end_episode): reward = 0.0
            if(use_clip): reward = max(clip_val, reward) # Value clipping
            reward = experience[i].reward + self.discount_factor * reward
            value = self.value_net.get_estimate(experience[i].state)
            advantage = reward - value
            
            # Append network optimization inputs
            states[i] = experience[i].state
            policy_targets[i] = advantage
            value_targets[i] = reward
            actions[i] = experience[i].action
        
        return Target(states, policy_targets, value_targets, actions)
    
    def run(self, iterations, is_training, render=False):
        
        # Initialize state for training
        if(is_training and np.size(self.state_array, 0) == 0):
            self.state_array = self.env.reset(initial_energy=self.start_energy)
            
        # Initialize target array
        targets = []
        
        # Initialize stats array
        test_stats = []
            
        for t in range(iterations):
            
            # Collect experience
            experience, stats = self.run_steps(is_training=is_training, render=render)
            target = self.get_target(experience)
            
            # Print test result
            if(not(is_training)):
                result_msg = "Episode: {0:3}\t Final Score: {1:5.2f}\t Acc. Reward: {2:5.2f}\t Min_Dist: {3:6.2f}\t Epi_Len: {4:6.2f}"
                print(result_msg.format(t, stats[1], stats[2], stats[3], stats[4]))
            
            # Get targets for training
            if(is_training): targets.append(target)
            else: test_stats.append(stats)
            
        return targets, test_stats
                
    """END OF WORKER CLASS"""

In [11]:
class Play:
    
    def __init__(self, num_workers=4, checkpoint_dir=None, custom_env=None):
        
        # Get check point directory
        self.checkpoint_dir = checkpoint_dir
        
        # Set discount factor for experience evaluation
        self.discount_factor = 0.99
        
        # Set step size for training
        self.steps = 5
        
        # Initialize optimization counter
        self.opt_counter = 0
        
        # Initialize step counter
        self.step_counter = 0

        #################LEARNING#MODEL#################
        
        # Set training variables
        self.input_shape = [16, 16, 4]
        self.action_size = 4
        self.policy_lr = 1e-3
        self.value_lr = 1e-3
        self.batch_size = self.steps
        self.epoch = 3
        self.epsilon = 0.2
        self.beta = 0.03
        self.grad_clip = 5.0
        
        # Initialize training counter
        self.train_counter = 0
        
        # Initialize policy model
        self.policy_model = PolicyEstimator(input_shape=self.input_shape,
                                            num_output=self.action_size,
                                            epsilon=self.epsilon,
                                            beta=self.beta,
                                            grad_clip=self.grad_clip,
                                            checkpoint_dir=self.checkpoint_dir + "/actor")
        
        # Initialize value model
        self.value_model = ValueEstimator(input_shape=self.input_shape,
                                          num_output=1,
                                          checkpoint_dir=self.checkpoint_dir + "/critic")
        
        ##################AGENT#WORKER##################
        
        # Get number of workers
        self.num_workers = num_workers
        
        # Initialize workers
        self.workers = []
            
        # Initialize test worker
        self.test_worker = None
        
        # Set workers and test worker
        self.init_worker(custom_env)
            
        ####################TEST#LOG####################
        
        # Initialize test data log array
        self.test_log = np.zeros(shape=8, dtype=np.float)
        
        # Initialize test data log filename
        self.log_filename = 'logfile'
        
        # Initialize test data logfile
        self.logfile = None
        if(checkpoint_dir != None):
            save_dir = self.checkpoint_dir + "/test_log"
            self.logfile = LogFile(save_dir)
            check_exist = self.logfile.create_file(self.log_filename)
            
            # Set log counters
            stp, opt = self.logfile.get_header_info('logfile')
            self.opt_counter = opt
            self.step_counter = stp

    def make_env(self, env_id, is_default=True, custom_env=None):
        
        env = _env.Environment(env_id=env_id, is_default=is_default, grid_size=[10, 10])
        if custom_env != None:
            env.custom_environment(gridmatrix=custom_env[0], delta_s=custom_env[1], start=custom_env[3], end=custom_env[4])
            env.fstate_size = custom_env[5]
        
        # Set environment with infinite resource
        if(env_inf_resource):
            env.set_inf_resource()
        
        return env
    
    def init_worker(self, custom_env):
        
        for i in range(self.num_workers):
            worker = Worker(env=self.make_env(env_id=i + 1, custom_env=custom_env), 
                            policy_net=self.policy_model, 
                            value_net=self.value_model, 
                            steps=self.steps, 
                            discount_factor=self.discount_factor)
        
            # Set worker steps
            worker.steps = self.steps
            # Set worker action size
            worker.action_size = self.action_size
            
            # Set worker starting energy
            if custom_env != None:
                worker.start_energy = custom_env[2]
            
            self.workers.append(worker)
            
        # Initialize test worker
        self.test_worker = Worker(env=self.make_env(env_id=0, custom_env=custom_env), 
                                  policy_net=self.policy_model, 
                                  value_net=self.value_model, 
                                  steps=self.steps, 
                                  discount_factor=self.discount_factor)
        
        # Set test worker action size
        self.test_worker.action_size = self.action_size
        
        # Set test worker starting energy
        if custom_env != None:
            self.test_worker.start_energy = custom_env[2]
            
    def update(self, targets, is_save=False):
        
        # Unpack targets: arr[arr[tup(np_arr[])]]
        t_size = len(targets) * len(targets[0]) * len(targets[0][0].states)
        
        # Initialize containers
        states_train = np.zeros(shape=[t_size] + self.input_shape, dtype=np.float)
        policy_train = np.zeros(shape=[t_size], dtype=np.float)
        value_train = np.zeros(shape=[t_size], dtype=np.float)
        actions_train = np.zeros(shape=[t_size], dtype=np.int)
        
        # Generate random index array
        rand = []
        for i in range(t_size): rand.append(i)
        for i in range(3): np.random.shuffle(rand)
        
        # Transfer targets to containers
        n = 0
        for i in range(len(targets)):
            for j in range(len(targets[0])):
                for k in range(len(targets[0][0].states)):
                    idx = rand[n]
                    states_train[idx] = targets[i][j].states[k]
                    policy_train[idx] = targets[i][j].policy_targets[k]
                    value_train[idx] = targets[i][j].value_targets[k]
                    actions_train[idx] = targets[i][j].actions[k]
                    n += 1
                    
        # Optimize policy model
        actor_dir = None
        if(is_save): actor_dir = self.checkpoint_dir + "/actor/"
        policy_iteration = self.policy_model.optimize(training_data=states_train,
                                                      training_label=policy_train,
                                                      action_label=actions_train,
                                                      learning_rate=self.policy_lr,
                                                      batch_size=self.batch_size,
                                                      epoch=self.epoch,
                                                      checkpoint_dir=actor_dir)
        
        # Optimize value model
        critic_dir = None
        if(is_save): critic_dir = self.checkpoint_dir + "/critic/"
        value_iteration = self.value_model.optimize(training_data=states_train, 
                                                    training_label=value_train, 
                                                    learning_rate=self.value_lr, 
                                                    batch_size=self.batch_size, 
                                                    epoch=self.epoch, 
                                                    checkpoint_dir=critic_dir)
        
        return policy_iteration
        
    def train(self, max_global_steps, step_iter=1, eval_every=1000, test_epi=10, log_test=False):
        
        # Correct eval_every based on number of workers, step_size, and step_iter
        get_quotient = int(np.ceil(eval_every / (self.steps * self.num_workers * step_iter)))
        eval_every = get_quotient * (self.steps * self.num_workers * step_iter)
        
        # Bool variable to avoid testing and saving at the beginning of the training
        allow_test = False
        
        while (self.step_counter < max_global_steps):
            # Run multiple workers
            targets = [] 
            for worker in self.workers:
                target, _ = worker.run(iterations=step_iter, is_training=True)
                targets.append(target)
                
            print("Worker Experience @ Global Step {}".format(self.step_counter))
                
            # Update policy and value networks
            save_checkpoint = False
            if(self.step_counter % eval_every == 0 and allow_test): 
                save_checkpoint = True
            self.opt_counter = self.update(targets, is_save=save_checkpoint)
            
            # Test the current policy
            if(self.step_counter % eval_every == 0 and allow_test):
                print("***Testing the Policy @ Global Step {}".format(self.step_counter))
                stats = self.test(iterations=test_epi, render=False)

                # Log history of test results
                if(log_test and self.checkpoint_dir != None): self.log(stats)
                    
            # Update allow_test
            allow_test = True
                
            # Append global counters
            self.step_counter += self.steps * step_iter * self.num_workers
            
    def log(self, stat):
        
        # Log test results
        self.test_log[0] = float(self.step_counter)
        self.test_log[1] = math.floor(self.opt_counter)
        self.test_log[2] = math.floor(stat[0])
        self.test_log[3] = math.floor(stat[1])
        self.test_log[4] = round(stat[2], 2)
        self.test_log[5] = round(stat[3], 2)
        self.test_log[6] = round(stat[4], 2)
        self.test_log[7] = round(stat[5], 2)
        
        # Save test results
        self.logfile.save_data(self.log_filename, self.test_log)
    
    def test(self, iterations, render=False):
        
        _, stats = self.test_worker.run(iterations=iterations, is_training=False, render=render)
        
        # Get total
        t_score    = 0
        t_reward   = 0
        t_distance = 0
        t_success  = 0
        t_length   = 0
        for i in range(len(stats)):
            t_success  += stats[i][0]
            t_score    += stats[i][1]
            t_reward   += stats[i][2]
            t_distance += stats[i][3]
            t_length   += stats[i][4]
        
        # Get average
        ave_score    = t_score / iterations
        ave_reward   = t_reward / iterations
        ave_distance = t_distance / iterations
        ave_length   = t_length / iterations

        # Print overall test results
        test_msg = "Overall Results: Success Rate: {0:2}/{1}, Ave. Score: {2:5.2f}, Ave. Rewards: {3:5.2f}, Ave. Min_Dist {4:5.2f}, Ave. Epi_Length {5:6.2f}"
        print(test_msg.format(t_success, iterations, ave_score, ave_reward, ave_distance, ave_length))
        
        return [t_success, iterations, ave_score, ave_reward, ave_distance, ave_length]

        """END OF PLAY CLASS"""

# Select Prebuilt Test Environment

In [12]:
_Gridworld = collections.namedtuple("_Gridworld", ["gmatrix", "delta_s", "energy", "start", "end", "fstate"])
def Gridworld(gmatrix, delta_s, energy, start, end, fstate=[5, 5]):
    return _Gridworld(gmatrix, delta_s, energy, start, end, fstate)

In [13]:
# Custom GRIDWORLD A (SPARSE REWARDS TEST)
# Create custom gridworld matrix (9 x 14)
# Maximum Score = 2
gmatrix = [[  0,   0,   0, -10, -20, -20, -20, -20, -20, -20, -20, -10,   0,   0],
           [  0,   0,   0, -10, -10, -10, -10, -10, -10, -10, -10, -10,   0,   0],
           [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
           [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
           [  0,   0,   0,  10,   0,   0,   0,   0,   0,   0,   5,   0,   7,   0],
           [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
           [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
           [  0,   0,   0, -10, -10, -10, -10, -10, -10, -10, -10, -10,   0,   0],
           [  0,   0,   0, -10, -20, -20, -20, -20, -20, -20, -20, -10,   0,   0]]
           
env = Gridworld(gmatrix=gmatrix, delta_s=1.0, energy=10.0, start=[0, 8], end=[13, 8], fstate=[7, 7])
_max_global_steps = 1000000

In [None]:
# Custom GRIDWORLD B (REWARD TRACING TEST)
# Create custom gridworld matrix (12 x 12)
# Maximum Score = 18
gmatrix = [[  0,   0,   0,   5,   0,   0,   0,   0,   0,   0,   0,   0], 
           [  0,   0,   0,   0,   0,   0,   3,   0,   0,   0,   0,   0],  
           [  0,  10,   0,   0, -20, -20, -20, -20, -20, -20, -20, -20],  
           [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],  
           [  0,   0,   0,   0,   5,   0,   0,   0,   5,   0,   0,   0],  
           [  0,   0,   0, -10, -10, -20, -20, -20,   0,   0,  10,   0],  
           [-20, -20, -20, -10, -10,   0,   0,   0,   0,   0,   0,   0],  
           [  0,   0,   0,   0,   5,   0,   0,   0,   5,   0,   0,   0],  
           [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],  
           [  0,  10,   0,   0, -20, -20, -20, -20, -20, -20, -20, -20],  
           [  0,   0,   0,   0,   0,   0,   3,   0,   0,   0,   0,   0],  
           [  0,   0,   0,   5,   0,   0,   0,   0,   0,   0,   0,   0]]

env = Gridworld(gmatrix=gmatrix, delta_s=1.0, energy=10.0, start=[9, 11], end=[9, 0], fstate=[7, 7])
_max_global_steps = 1000000

In [None]:
# Custom GRIDWORLD C (MULTIPATH SELECTION TEST)
# Create custom gridworld matrix (12 x 13)
# Maximum Score = 24
gmatrix = [[  0, -10,   0,   0,   0,   0,   0,   0,   0,   3,   0,   0,   0],
           [  0, -10,   0,   0,   5,   0,   0,  10,   0,   0,   0,   0,   0],
           [  0, -10,   0,   5, -10,   0,   3, -10,   0,   5, -20,   5,   0],
           [  0, -10,   0,   0, -10,   0,   0, -10,   0,   0, -20,   0,   0],
           [-20, -20,   0,   0, -20,   0,   0, -20, -20, -20, -20,   3,   0],
           [ 10,   0,   3,   0, -20,   5,   0,   0,   0,   0, -20,   0,   5],
           [  0,   0,   0,   0, -20,   0,   0,   5,   0,   3, -20,   0,   0],
           [  0,   0, -20, -20, -20, -20, -20, -20,   0,   0, -20,   0,   0],
           [  0,   0,   0,   0, -20,   0,  10,   0,   0,   0,   0,   3,   0],
           [  0,   5,   0,   0, -20,   0,   0,   0,   0,   5,   0,   0,   0],
           [  0,   0,   0,   0,   0,   0,   0, -20, -10, -10, -10, -10, -10],
           [  0,   0,   0,   0,   5,   0,   0, -20,   0,   0,   0,   0,   0]]

env = Gridworld(gmatrix=gmatrix, delta_s=1.0, energy=5.0, start=[2, 11], end=[9, 3], fstate=[7, 7])
_max_global_steps = 1000000

In [None]:
# Custom GRIDWORLD D (GOAL PERSEVERANCE TEST)
# Create custom gridworld matrix (14 x 14)
# Maximum Score = 7
gmatrix = [[  0,   0,   0,   0,   0,   0,   0,   0,   5,   0,   0,   0,   0,   0],
           [  0,  10,   0,   0,   0,   5,   0,   0,   0,   0,   0,   0,   0,   0],
           [  0,   0,   0,   0,   0, -20, -20, -20, -20, -20, -20, -20,   5,   0],
           [  0,   0,   0, -20, -20, -20,   0,   0,   5,   0,   0, -20,   0,   0],
           [  0,   0,   0, -20,   0,   0,   0,   0,   0,   0,   0, -20,   0,   0],
           [  0,   5,   0, -20,   0,   5,   0, -20, -20,   0,   0, -20,   0,   0],
           [  0,   0,   0, -20,   0,   0,   0,   0, -20,   0,   0, -20,   0,   0],
           [  0,   0,   0, -20, -20, -20,   0, -20, -20,   0,   5, -20,   0,   5],
           [  0,   0,   0,   0,   0, -20, -20, -20,   0,   0,   0, -20,   0,   0],
           [  0,   0,   5,   0,   0,   0,   0,   0,   0,   0,   0, -20,   0,   0],
           [-20, -20, -20, -20,   0,   0,   5,   0,   0, -20, -20, -20,   0,   0],
           [  0,   0,   0, -20, -20, -20, -20, -20, -20, -20,   0,   0,   0,   0],
           [  0,   0,   0,   0,   0,   0,   0,   0,   0,   5,   0,   0,  10,   0],
           [  0,   0,   0,   0,   0,   5,   0,   0,   0,   0,   0,   0,   0,   0]]

env = Gridworld(gmatrix=gmatrix, delta_s=1.0, energy=5.0, start=[0, 13], end=[6, 7], fstate=[7, 7])
_max_global_steps = 1500000

In [None]:
# Custom GRIDWORLD E (ROUTINE DISCOVERY TEST)
# Create custom gridworld matrix (10 x 12)
# Maximum Score = 16
gmatrix = [[  0,   0,   0,   0,   0, -20,   0,   0,   0,   0,   7,  10],
           [  0,   0,   0,   0,   0, -20,   0,   7,   0,   0,   0,   0],
           [  0,   0,   0,   0,   0, -20,   0,   0,   0,   0,   0,   0],
           [  0,   5,   0,   0,   0,  -9,   0,   0, -20,  -9,  -9, -20],
           [  0,   0,   0,   0,   0,  -9,   0,   0, -20,   0,   0,   0],
           [  0,   0,   0,   0,   0,  -9,   0,   5, -20,   0,   0,   0],
           [  0,   0,   0,   0,   0,  -9,   0,   0, -20,   0,   5,   0],
           [  0,   0,   0,   0,   0, -20, -20, -20, -20,   0,   0,   0],
           [  0,   5,   0,  10,   0, -20,   0,   0,   0,   0,   5,   0],
           [  0,   0,   0,   0,   0, -20,   0,  10,   0,   0,   0,   0]]

env = Gridworld(gmatrix=gmatrix, delta_s=1.0, energy=10.0, start=[0, 0], end=[11, 9], fstate=[7, 7])
_max_global_steps = 1000000

In [None]:
# Custom GRIDWORLD F (TIGHT NAVIGATION TEST)
# Create custom gridworld matrix (13 x 10)
# Maximum Score = 7
gmatrix = [[  0, -10, -20, -20, -20, -20, -20, -20, -20, -20],
           [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
           [  0,   5,   0,   0,   5,   0,   0,   0,   0,   0],
           [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
           [-20, -20, -20, -20, -20, -20, -20, -10,   0,   0],
           [  0,   0,   0,   0,   5,   0,   0,   0,   0,   0],
           [  0,   0,   0,   0,   0,   0,   0,   0,   0,  10],
           [ 10,   0, -20, -20, -20, -20, -20, -20, -20, -20],
           [  0,   0, -20,   0,   0,   0,   0,   0,   0,   0],
           [  0,   0, -20,   0,   0,   5,   0, -10,   0,   0],
           [  0,   0, -20,   0,   0,   0,   0, -20,   5,   0],
           [  0,   5, -10,   0,   5,   0,   0, -20,   0,   0],
           [  0,   0,   0,   0,   0,   0,   0, -20,   0,   0]]

env = Gridworld(gmatrix=gmatrix, delta_s=1.0, energy=5.0, start=[9, 12], end=[0, 0], fstate=[7, 7])
_max_global_steps = 1500000

In [None]:
# Custom GRIDWORLD G (OPTIMAL POLICY DISCOVERY TEST)
# Create custom gridworld matrix (16 x 16)
# Maximum Observed Machine Score = 77
gmatrix = [[-18,   0,   0,   0,   0,   0, -10,   5,   3,   0,  10,   0,   0,   0,   3,   0],
           [  0,   0,  -9,   3,   0,   0,   0,   0,  -5,   9,  -8,  -4, -19,   0,   0,   0],
           [  6,   5,   7,   1,   0,   4,   0, -17, -11,  -3,   2,   8,   0,   4,   1, -15],
           [-10,   0,  -7,   1, -13,   0,   0,   6,   0, -11, -10,   0,   0,  -7,   0,  -8],
           [  0, -18,   0, -10,  10,  -1,  -9,   0, -16, -18,  -2,   0,   0, -12, -13,   2],
           [ -2,  -2,   8,   0,   0, -10,   0,   0,   0,  -5,   0,   7,   0,   0,   0,   0],
           [ -8, -16,   0,   0,   0,  -9, -13,   5,  -8,   0,   0,   8,   2,   0,   6, -20],
           [  0,   0, -12,   4, -19,   8, -15,  10, -13, -18,   0, -17, -10,   0,   0,   0],
           [-19,   0,   7,   1,   0,  -5,   0,  -9,   0,   5,   3,   0,   0,  -9,  10,   0],
           [ -3,   8,   0,  -5,   0,   0,   5,   0,   0,   0,   0,   0, -13,   0,   0, -12],
           [-14,   0,   0,   0,   9,   0,   0, -15,   0,  -6,   4,   0,   0,   0,  -5, -20],
           [-17,   1,   0,   0,   4, -10,   0,   8, -19,   0,   0,   0,   0,  -4,   9,   0],
           [  0,   7,   0,   0,   0,   0,   4,   0,  -5, -19,   0,   8,   0,  -5, -12,  -8],
           [ -9,   0,  10,   0, -18,   0, -14,   0,   0,  -1,  -5,  -1,  -9,   0, -18, -10],
           [  1,  -3,   0,  -5,   0,   4,  -3,  10,   5,   2,   0,  -5,   0,   6, -12,   0],
           [  0,   0,   0,   4,  -9,   0,  -2,   0, -15, -10,   0,   0, -15,   0,  -2,   0]]

env = Gridworld(gmatrix=gmatrix, delta_s=1.5, energy=10.0, start=[13, 15], end=[2, 0], fstate=[7, 7])
_max_global_steps = 1000000

# Training and Testing

In [14]:
# Create checkpoints save folder
checkpoint_base = nf.create_savefolder(base_directory="ppo_models/env_A/training")

In [15]:
# Initialize play
play = Play(num_workers=10, checkpoint_dir=checkpoint_base, custom_env=env)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Use keras.layers.max_pooling2d instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Trying to restore last checkpoint ...
Failed to restore checkpoint from: ppo_models/env_A/training/actor
Initializing variables instead.
Trying to restore last checkpoint ...
Failed to restore checkpoint from: ppo_models/env_A/training/critic
Initializing variables instead.


In [16]:
# Set training parameters
play.discount_factor = 0.98
play.policy_lr = 4e-4 #5e-4
play.value_lr = 5e-4  #6e-4
play.steps = 8
play.batch_size = 40

# Train and improve the policy
play.train(max_global_steps=_max_global_steps, step_iter=1, eval_every=10000, test_epi=30, log_test=True)

Worker Experience @ Global Step 0
Worker Experience @ Global Step 80
Worker Experience @ Global Step 160
Worker Experience @ Global Step 240
Worker Experience @ Global Step 320
Worker Experience @ Global Step 400
Worker Experience @ Global Step 480
Worker Experience @ Global Step 560
Worker Experience @ Global Step 640
Worker Experience @ Global Step 720
Worker Experience @ Global Step 800
Worker Experience @ Global Step 880
Worker Experience @ Global Step 960
Worker Experience @ Global Step 1040
Worker Experience @ Global Step 1120
Worker Experience @ Global Step 1200
Worker Experience @ Global Step 1280
Worker Experience @ Global Step 1360
Worker Experience @ Global Step 1440
Worker Experience @ Global Step 1520
Worker Experience @ Global Step 1600
Worker Experience @ Global Step 1680
Worker Experience @ Global Step 1760
Worker Experience @ Global Step 1840
Worker Experience @ Global Step 1920
Worker Experience @ Global Step 2000
Worker Experience @ Global Step 2080
Worker Experience

Worker Experience @ Global Step 11440
Worker Experience @ Global Step 11520
Worker Experience @ Global Step 11600
Worker Experience @ Global Step 11680
Worker Experience @ Global Step 11760
Worker Experience @ Global Step 11840
Worker Experience @ Global Step 11920
Worker Experience @ Global Step 12000
Worker Experience @ Global Step 12080
Worker Experience @ Global Step 12160
Worker Experience @ Global Step 12240
Worker Experience @ Global Step 12320
Worker Experience @ Global Step 12400
Worker Experience @ Global Step 12480
Worker Experience @ Global Step 12560
Worker Experience @ Global Step 12640
Worker Experience @ Global Step 12720
Worker Experience @ Global Step 12800
Worker Experience @ Global Step 12880
Worker Experience @ Global Step 12960
Worker Experience @ Global Step 13040
Worker Experience @ Global Step 13120
Worker Experience @ Global Step 13200
Worker Experience @ Global Step 13280
Worker Experience @ Global Step 13360
Worker Experience @ Global Step 13440
Worker Exper

Worker Experience @ Global Step 21840
Worker Experience @ Global Step 21920
Worker Experience @ Global Step 22000
Worker Experience @ Global Step 22080
Worker Experience @ Global Step 22160
Worker Experience @ Global Step 22240
Worker Experience @ Global Step 22320
Worker Experience @ Global Step 22400
Worker Experience @ Global Step 22480
Worker Experience @ Global Step 22560
Worker Experience @ Global Step 22640
Worker Experience @ Global Step 22720
Worker Experience @ Global Step 22800
Worker Experience @ Global Step 22880
Worker Experience @ Global Step 22960
Worker Experience @ Global Step 23040
Worker Experience @ Global Step 23120
Worker Experience @ Global Step 23200
Worker Experience @ Global Step 23280
Worker Experience @ Global Step 23360
Worker Experience @ Global Step 23440
Worker Experience @ Global Step 23520
Worker Experience @ Global Step 23600
Worker Experience @ Global Step 23680
Worker Experience @ Global Step 23760
Worker Experience @ Global Step 23840
Worker Exper

Worker Experience @ Global Step 32960
Worker Experience @ Global Step 33040
Worker Experience @ Global Step 33120
Worker Experience @ Global Step 33200
Worker Experience @ Global Step 33280
Worker Experience @ Global Step 33360
Worker Experience @ Global Step 33440
Worker Experience @ Global Step 33520
Worker Experience @ Global Step 33600
Worker Experience @ Global Step 33680
Worker Experience @ Global Step 33760
Worker Experience @ Global Step 33840
Worker Experience @ Global Step 33920
Worker Experience @ Global Step 34000
Worker Experience @ Global Step 34080
Worker Experience @ Global Step 34160
Worker Experience @ Global Step 34240
Worker Experience @ Global Step 34320
Worker Experience @ Global Step 34400
Worker Experience @ Global Step 34480
Worker Experience @ Global Step 34560
Worker Experience @ Global Step 34640
Worker Experience @ Global Step 34720
Worker Experience @ Global Step 34800
Worker Experience @ Global Step 34880
Worker Experience @ Global Step 34960
Worker Exper

Worker Experience @ Global Step 44080
Worker Experience @ Global Step 44160
Worker Experience @ Global Step 44240
Worker Experience @ Global Step 44320
Worker Experience @ Global Step 44400
Worker Experience @ Global Step 44480
Worker Experience @ Global Step 44560
Worker Experience @ Global Step 44640
Worker Experience @ Global Step 44720
Worker Experience @ Global Step 44800
Worker Experience @ Global Step 44880
Worker Experience @ Global Step 44960
Worker Experience @ Global Step 45040
Worker Experience @ Global Step 45120
Worker Experience @ Global Step 45200
Worker Experience @ Global Step 45280
Worker Experience @ Global Step 45360
Worker Experience @ Global Step 45440
Worker Experience @ Global Step 45520
Worker Experience @ Global Step 45600
Worker Experience @ Global Step 45680
Worker Experience @ Global Step 45760
Worker Experience @ Global Step 45840
Worker Experience @ Global Step 45920
Worker Experience @ Global Step 46000
Worker Experience @ Global Step 46080
Worker Exper

Worker Experience @ Global Step 55200
Worker Experience @ Global Step 55280
Worker Experience @ Global Step 55360
Worker Experience @ Global Step 55440
Worker Experience @ Global Step 55520
Worker Experience @ Global Step 55600
Worker Experience @ Global Step 55680
Worker Experience @ Global Step 55760
Worker Experience @ Global Step 55840
Worker Experience @ Global Step 55920
Worker Experience @ Global Step 56000
Worker Experience @ Global Step 56080
Worker Experience @ Global Step 56160
Worker Experience @ Global Step 56240
Worker Experience @ Global Step 56320
Worker Experience @ Global Step 56400
Worker Experience @ Global Step 56480
Worker Experience @ Global Step 56560
Worker Experience @ Global Step 56640
Worker Experience @ Global Step 56720
Worker Experience @ Global Step 56800
Worker Experience @ Global Step 56880
Worker Experience @ Global Step 56960
Worker Experience @ Global Step 57040
Worker Experience @ Global Step 57120
Worker Experience @ Global Step 57200
Worker Exper

Worker Experience @ Global Step 66320
Worker Experience @ Global Step 66400
Worker Experience @ Global Step 66480
Worker Experience @ Global Step 66560
Worker Experience @ Global Step 66640
Worker Experience @ Global Step 66720
Worker Experience @ Global Step 66800
Worker Experience @ Global Step 66880
Worker Experience @ Global Step 66960
Worker Experience @ Global Step 67040
Worker Experience @ Global Step 67120
Worker Experience @ Global Step 67200
Worker Experience @ Global Step 67280
Worker Experience @ Global Step 67360
Worker Experience @ Global Step 67440
Worker Experience @ Global Step 67520
Worker Experience @ Global Step 67600
Worker Experience @ Global Step 67680
Worker Experience @ Global Step 67760
Worker Experience @ Global Step 67840
Worker Experience @ Global Step 67920
Worker Experience @ Global Step 68000
Worker Experience @ Global Step 68080
Worker Experience @ Global Step 68160
Worker Experience @ Global Step 68240
Worker Experience @ Global Step 68320
Worker Exper

Worker Experience @ Global Step 77440
Worker Experience @ Global Step 77520
Worker Experience @ Global Step 77600
Worker Experience @ Global Step 77680
Worker Experience @ Global Step 77760
Worker Experience @ Global Step 77840
Worker Experience @ Global Step 77920
Worker Experience @ Global Step 78000
Worker Experience @ Global Step 78080
Worker Experience @ Global Step 78160
Worker Experience @ Global Step 78240
Worker Experience @ Global Step 78320
Worker Experience @ Global Step 78400
Worker Experience @ Global Step 78480
Worker Experience @ Global Step 78560
Worker Experience @ Global Step 78640
Worker Experience @ Global Step 78720
Worker Experience @ Global Step 78800
Worker Experience @ Global Step 78880
Worker Experience @ Global Step 78960
Worker Experience @ Global Step 79040
Worker Experience @ Global Step 79120
Worker Experience @ Global Step 79200
Worker Experience @ Global Step 79280
Worker Experience @ Global Step 79360
Worker Experience @ Global Step 79440
Worker Exper

Worker Experience @ Global Step 88560
Worker Experience @ Global Step 88640
Worker Experience @ Global Step 88720
Worker Experience @ Global Step 88800
Worker Experience @ Global Step 88880
Worker Experience @ Global Step 88960
Worker Experience @ Global Step 89040
Worker Experience @ Global Step 89120
Worker Experience @ Global Step 89200
Worker Experience @ Global Step 89280
Worker Experience @ Global Step 89360
Worker Experience @ Global Step 89440
Worker Experience @ Global Step 89520
Worker Experience @ Global Step 89600
Worker Experience @ Global Step 89680
Worker Experience @ Global Step 89760
Worker Experience @ Global Step 89840
Worker Experience @ Global Step 89920
Worker Experience @ Global Step 90000
*****Policy_Net Model Saved Checkpoint...
*****Value_Net Model Saved Checkpoint...
***Testing the Policy @ Global Step 90000
Episode:   0	 Final Score:  0.00	 Acc. Reward: -0.84	 Min_Dist:  10.00	 Epi_Len:  14.00
Episode:   1	 Final Score:  0.00	 Acc. Reward: -1.30	 Min_Dist:  

Worker Experience @ Global Step 99680
Worker Experience @ Global Step 99760
Worker Experience @ Global Step 99840
Worker Experience @ Global Step 99920
Worker Experience @ Global Step 100000
*****Policy_Net Model Saved Checkpoint...
*****Value_Net Model Saved Checkpoint...
***Testing the Policy @ Global Step 100000
Episode:   0	 Final Score:  0.00	 Acc. Reward: -1.61	 Min_Dist:  12.00	 Epi_Len:  10.00
Episode:   1	 Final Score:  0.00	 Acc. Reward: -2.13	 Min_Dist:  11.00	 Epi_Len:  10.00
Episode:   2	 Final Score:  0.00	 Acc. Reward: -0.76	 Min_Dist:  10.00	 Epi_Len:  12.00
Episode:   3	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  10.00	 Epi_Len:  10.00
Episode:   4	 Final Score:  0.00	 Acc. Reward: -1.61	 Min_Dist:  12.00	 Epi_Len:  10.00
Episode:   5	 Final Score:  0.00	 Acc. Reward: -1.07	 Min_Dist:  10.00	 Epi_Len:   3.00
Episode:   6	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:   7	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  12

Episode:   2	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:   3	 Final Score:  0.00	 Acc. Reward: -1.87	 Min_Dist:  12.00	 Epi_Len:  10.00
Episode:   4	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:   5	 Final Score:  0.00	 Acc. Reward: -1.61	 Min_Dist:  11.00	 Epi_Len:  10.00
Episode:   6	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  11.00	 Epi_Len:  10.00
Episode:   7	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  12.00	 Epi_Len:  10.00
Episode:   8	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  11.00	 Epi_Len:  10.00
Episode:   9	 Final Score:  0.00	 Acc. Reward: -1.61	 Min_Dist:  12.00	 Epi_Len:  10.00
Episode:  10	 Final Score:  0.00	 Acc. Reward: -1.04	 Min_Dist:   9.00	 Epi_Len:  19.00
Episode:  11	 Final Score:  0.00	 Acc. Reward: -1.61	 Min_Dist:  12.00	 Epi_Len:  10.00
Episode:  12	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  13	 Final Score:  0.00

Episode:   7	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:   8	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:   9	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  10	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   7.00	 Epi_Len:  20.00
Episode:  11	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:  12	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  13	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  14	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  15	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  16	 Final Score:  0.00	 Acc. Reward: -0.76	 Min_Dist:   9.00	 Epi_Len:  12.00
Episode:  17	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  18	 Final Score:  0.00

Episode:  12	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  13	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  14	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  15	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  16	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:  17	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:   7.00	 Epi_Len:  20.00
Episode:  18	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  19	 Final Score:  0.00	 Acc. Reward: -1.61	 Min_Dist:  12.00	 Epi_Len:  10.00
Episode:  20	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  21	 Final Score:  0.00	 Acc. Reward: -1.61	 Min_Dist:  12.00	 Epi_Len:  10.00
Episode:  22	 Final Score:  0.00	 Acc. Reward: -0.92	 Min_Dist:   8.00	 Epi_Len:  16.00
Episode:  23	 Final Score:  0.00

Episode:  17	 Final Score:  0.00	 Acc. Reward: -1.06	 Min_Dist:  10.00	 Epi_Len:  13.00
Episode:  18	 Final Score:  0.00	 Acc. Reward: -1.04	 Min_Dist:   7.00	 Epi_Len:  19.00
Episode:  19	 Final Score:  0.00	 Acc. Reward: -0.68	 Min_Dist:  10.00	 Epi_Len:  10.00
Episode:  20	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:  21	 Final Score:  0.00	 Acc. Reward: -2.13	 Min_Dist:  10.00	 Epi_Len:  10.00
Episode:  22	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  23	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  11.00	 Epi_Len:  10.00
Episode:  24	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:   8.00	 Epi_Len:  10.00
Episode:  25	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  26	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  27	 Final Score:  0.00	 Acc. Reward: -0.84	 Min_Dist:   8.00	 Epi_Len:  14.00
Episode:  28	 Final Score:  0.00

Episode:  23	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  24	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  25	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  26	 Final Score:  0.00	 Acc. Reward: -1.61	 Min_Dist:  12.00	 Epi_Len:  10.00
Episode:  27	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  28	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  29	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Overall Results: Success Rate:  0/30, Ave. Score:  0.00, Ave. Rewards: -1.18, Ave. Min_Dist  9.53, Ave. Epi_Length  17.80
logfile on global steps=150000.0 saved...
Worker Experience @ Global Step 150080
Worker Experience @ Global Step 150160
Worker Experience @ Global Step 150240
Worker Experience @ Global Step 150320
Worker Experience @ Global Step 150400
Worker Experience @ Globa

Episode:  28	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  29	 Final Score:  0.00	 Acc. Reward: -0.92	 Min_Dist:  10.00	 Epi_Len:  16.00
Overall Results: Success Rate:  0/30, Ave. Score:  0.00, Ave. Rewards: -1.15, Ave. Min_Dist  8.80, Ave. Epi_Length  16.93
logfile on global steps=160000.0 saved...
Worker Experience @ Global Step 160080
Worker Experience @ Global Step 160160
Worker Experience @ Global Step 160240
Worker Experience @ Global Step 160320
Worker Experience @ Global Step 160400
Worker Experience @ Global Step 160480
Worker Experience @ Global Step 160560
Worker Experience @ Global Step 160640
Worker Experience @ Global Step 160720
Worker Experience @ Global Step 160800
Worker Experience @ Global Step 160880
Worker Experience @ Global Step 160960
Worker Experience @ Global Step 161040
Worker Experience @ Global Step 161120
Worker Experience @ Global Step 161200
Worker Experience @ Global Step 161280
Worker Experience @ Global Step 1613

Worker Experience @ Global Step 170240
Worker Experience @ Global Step 170320
Worker Experience @ Global Step 170400
Worker Experience @ Global Step 170480
Worker Experience @ Global Step 170560
Worker Experience @ Global Step 170640
Worker Experience @ Global Step 170720
Worker Experience @ Global Step 170800
Worker Experience @ Global Step 170880
Worker Experience @ Global Step 170960
Worker Experience @ Global Step 171040
Worker Experience @ Global Step 171120
Worker Experience @ Global Step 171200
Worker Experience @ Global Step 171280
Worker Experience @ Global Step 171360
Worker Experience @ Global Step 171440
Worker Experience @ Global Step 171520
Worker Experience @ Global Step 171600
Worker Experience @ Global Step 171680
Worker Experience @ Global Step 171760
Worker Experience @ Global Step 171840
Worker Experience @ Global Step 171920
Worker Experience @ Global Step 172000
Worker Experience @ Global Step 172080
Worker Experience @ Global Step 172160
Worker Experience @ Globa

Worker Experience @ Global Step 181040
Worker Experience @ Global Step 181120
Worker Experience @ Global Step 181200
Worker Experience @ Global Step 181280
Worker Experience @ Global Step 181360
Worker Experience @ Global Step 181440
Worker Experience @ Global Step 181520
Worker Experience @ Global Step 181600
Worker Experience @ Global Step 181680
Worker Experience @ Global Step 181760
Worker Experience @ Global Step 181840
Worker Experience @ Global Step 181920
Worker Experience @ Global Step 182000
Worker Experience @ Global Step 182080
Worker Experience @ Global Step 182160
Worker Experience @ Global Step 182240
Worker Experience @ Global Step 182320
Worker Experience @ Global Step 182400
Worker Experience @ Global Step 182480
Worker Experience @ Global Step 182560
Worker Experience @ Global Step 182640
Worker Experience @ Global Step 182720
Worker Experience @ Global Step 182800
Worker Experience @ Global Step 182880
Worker Experience @ Global Step 182960
Worker Experience @ Globa

Worker Experience @ Global Step 191840
Worker Experience @ Global Step 191920
Worker Experience @ Global Step 192000
Worker Experience @ Global Step 192080
Worker Experience @ Global Step 192160
Worker Experience @ Global Step 192240
Worker Experience @ Global Step 192320
Worker Experience @ Global Step 192400
Worker Experience @ Global Step 192480
Worker Experience @ Global Step 192560
Worker Experience @ Global Step 192640
Worker Experience @ Global Step 192720
Worker Experience @ Global Step 192800
Worker Experience @ Global Step 192880
Worker Experience @ Global Step 192960
Worker Experience @ Global Step 193040
Worker Experience @ Global Step 193120
Worker Experience @ Global Step 193200
Worker Experience @ Global Step 193280
Worker Experience @ Global Step 193360
Worker Experience @ Global Step 193440
Worker Experience @ Global Step 193520
Worker Experience @ Global Step 193600
Worker Experience @ Global Step 193680
Worker Experience @ Global Step 193760
Worker Experience @ Globa

Worker Experience @ Global Step 202640
Worker Experience @ Global Step 202720
Worker Experience @ Global Step 202800
Worker Experience @ Global Step 202880
Worker Experience @ Global Step 202960
Worker Experience @ Global Step 203040
Worker Experience @ Global Step 203120
Worker Experience @ Global Step 203200
Worker Experience @ Global Step 203280
Worker Experience @ Global Step 203360
Worker Experience @ Global Step 203440
Worker Experience @ Global Step 203520
Worker Experience @ Global Step 203600
Worker Experience @ Global Step 203680
Worker Experience @ Global Step 203760
Worker Experience @ Global Step 203840
Worker Experience @ Global Step 203920
Worker Experience @ Global Step 204000
Worker Experience @ Global Step 204080
Worker Experience @ Global Step 204160
Worker Experience @ Global Step 204240
Worker Experience @ Global Step 204320
Worker Experience @ Global Step 204400
Worker Experience @ Global Step 204480
Worker Experience @ Global Step 204560
Worker Experience @ Globa

Worker Experience @ Global Step 213440
Worker Experience @ Global Step 213520
Worker Experience @ Global Step 213600
Worker Experience @ Global Step 213680
Worker Experience @ Global Step 213760
Worker Experience @ Global Step 213840
Worker Experience @ Global Step 213920
Worker Experience @ Global Step 214000
Worker Experience @ Global Step 214080
Worker Experience @ Global Step 214160
Worker Experience @ Global Step 214240
Worker Experience @ Global Step 214320
Worker Experience @ Global Step 214400
Worker Experience @ Global Step 214480
Worker Experience @ Global Step 214560
Worker Experience @ Global Step 214640
Worker Experience @ Global Step 214720
Worker Experience @ Global Step 214800
Worker Experience @ Global Step 214880
Worker Experience @ Global Step 214960
Worker Experience @ Global Step 215040
Worker Experience @ Global Step 215120
Worker Experience @ Global Step 215200
Worker Experience @ Global Step 215280
Worker Experience @ Global Step 215360
Worker Experience @ Globa

Worker Experience @ Global Step 224240
Worker Experience @ Global Step 224320
Worker Experience @ Global Step 224400
Worker Experience @ Global Step 224480
Worker Experience @ Global Step 224560
Worker Experience @ Global Step 224640
Worker Experience @ Global Step 224720
Worker Experience @ Global Step 224800
Worker Experience @ Global Step 224880
Worker Experience @ Global Step 224960
Worker Experience @ Global Step 225040
Worker Experience @ Global Step 225120
Worker Experience @ Global Step 225200
Worker Experience @ Global Step 225280
Worker Experience @ Global Step 225360
Worker Experience @ Global Step 225440
Worker Experience @ Global Step 225520
Worker Experience @ Global Step 225600
Worker Experience @ Global Step 225680
Worker Experience @ Global Step 225760
Worker Experience @ Global Step 225840
Worker Experience @ Global Step 225920
Worker Experience @ Global Step 226000
Worker Experience @ Global Step 226080
Worker Experience @ Global Step 226160
Worker Experience @ Globa

Worker Experience @ Global Step 235040
Worker Experience @ Global Step 235120
Worker Experience @ Global Step 235200
Worker Experience @ Global Step 235280
Worker Experience @ Global Step 235360
Worker Experience @ Global Step 235440
Worker Experience @ Global Step 235520
Worker Experience @ Global Step 235600
Worker Experience @ Global Step 235680
Worker Experience @ Global Step 235760
Worker Experience @ Global Step 235840
Worker Experience @ Global Step 235920
Worker Experience @ Global Step 236000
Worker Experience @ Global Step 236080
Worker Experience @ Global Step 236160
Worker Experience @ Global Step 236240
Worker Experience @ Global Step 236320
Worker Experience @ Global Step 236400
Worker Experience @ Global Step 236480
Worker Experience @ Global Step 236560
Worker Experience @ Global Step 236640
Worker Experience @ Global Step 236720
Worker Experience @ Global Step 236800
Worker Experience @ Global Step 236880
Worker Experience @ Global Step 236960
Worker Experience @ Globa

Worker Experience @ Global Step 245840
Worker Experience @ Global Step 245920
Worker Experience @ Global Step 246000
Worker Experience @ Global Step 246080
Worker Experience @ Global Step 246160
Worker Experience @ Global Step 246240
Worker Experience @ Global Step 246320
Worker Experience @ Global Step 246400
Worker Experience @ Global Step 246480
Worker Experience @ Global Step 246560
Worker Experience @ Global Step 246640
Worker Experience @ Global Step 246720
Worker Experience @ Global Step 246800
Worker Experience @ Global Step 246880
Worker Experience @ Global Step 246960
Worker Experience @ Global Step 247040
Worker Experience @ Global Step 247120
Worker Experience @ Global Step 247200
Worker Experience @ Global Step 247280
Worker Experience @ Global Step 247360
Worker Experience @ Global Step 247440
Worker Experience @ Global Step 247520
Worker Experience @ Global Step 247600
Worker Experience @ Global Step 247680
Worker Experience @ Global Step 247760
Worker Experience @ Globa

Worker Experience @ Global Step 256640
Worker Experience @ Global Step 256720
Worker Experience @ Global Step 256800
Worker Experience @ Global Step 256880
Worker Experience @ Global Step 256960
Worker Experience @ Global Step 257040
Worker Experience @ Global Step 257120
Worker Experience @ Global Step 257200
Worker Experience @ Global Step 257280
Worker Experience @ Global Step 257360
Worker Experience @ Global Step 257440
Worker Experience @ Global Step 257520
Worker Experience @ Global Step 257600
Worker Experience @ Global Step 257680
Worker Experience @ Global Step 257760
Worker Experience @ Global Step 257840
Worker Experience @ Global Step 257920
Worker Experience @ Global Step 258000
Worker Experience @ Global Step 258080
Worker Experience @ Global Step 258160
Worker Experience @ Global Step 258240
Worker Experience @ Global Step 258320
Worker Experience @ Global Step 258400
Worker Experience @ Global Step 258480
Worker Experience @ Global Step 258560
Worker Experience @ Globa

Worker Experience @ Global Step 267440
Worker Experience @ Global Step 267520
Worker Experience @ Global Step 267600
Worker Experience @ Global Step 267680
Worker Experience @ Global Step 267760
Worker Experience @ Global Step 267840
Worker Experience @ Global Step 267920
Worker Experience @ Global Step 268000
Worker Experience @ Global Step 268080
Worker Experience @ Global Step 268160
Worker Experience @ Global Step 268240
Worker Experience @ Global Step 268320
Worker Experience @ Global Step 268400
Worker Experience @ Global Step 268480
Worker Experience @ Global Step 268560
Worker Experience @ Global Step 268640
Worker Experience @ Global Step 268720
Worker Experience @ Global Step 268800
Worker Experience @ Global Step 268880
Worker Experience @ Global Step 268960
Worker Experience @ Global Step 269040
Worker Experience @ Global Step 269120
Worker Experience @ Global Step 269200
Worker Experience @ Global Step 269280
Worker Experience @ Global Step 269360
Worker Experience @ Globa

Worker Experience @ Global Step 278240
Worker Experience @ Global Step 278320
Worker Experience @ Global Step 278400
Worker Experience @ Global Step 278480
Worker Experience @ Global Step 278560
Worker Experience @ Global Step 278640
Worker Experience @ Global Step 278720
Worker Experience @ Global Step 278800
Worker Experience @ Global Step 278880
Worker Experience @ Global Step 278960
Worker Experience @ Global Step 279040
Worker Experience @ Global Step 279120
Worker Experience @ Global Step 279200
Worker Experience @ Global Step 279280
Worker Experience @ Global Step 279360
Worker Experience @ Global Step 279440
Worker Experience @ Global Step 279520
Worker Experience @ Global Step 279600
Worker Experience @ Global Step 279680
Worker Experience @ Global Step 279760
Worker Experience @ Global Step 279840
Worker Experience @ Global Step 279920
Worker Experience @ Global Step 280000
*****Policy_Net Model Saved Checkpoint...
*****Value_Net Model Saved Checkpoint...
***Testing the Polic

Worker Experience @ Global Step 289040
Worker Experience @ Global Step 289120
Worker Experience @ Global Step 289200
Worker Experience @ Global Step 289280
Worker Experience @ Global Step 289360
Worker Experience @ Global Step 289440
Worker Experience @ Global Step 289520
Worker Experience @ Global Step 289600
Worker Experience @ Global Step 289680
Worker Experience @ Global Step 289760
Worker Experience @ Global Step 289840
Worker Experience @ Global Step 289920
Worker Experience @ Global Step 290000
*****Policy_Net Model Saved Checkpoint...
*****Value_Net Model Saved Checkpoint...
***Testing the Policy @ Global Step 290000
Episode:   0	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:   7.00	 Epi_Len:  20.00
Episode:   1	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:   2	 Final Score:  0.00	 Acc. Reward: -1.60	 Min_Dist:   6.00	 Epi_Len:  20.00
Episode:   3	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  11.00	 Epi_Len:  10.00
Episode:   4	 F

Worker Experience @ Global Step 299840
Worker Experience @ Global Step 299920
Worker Experience @ Global Step 300000
*****Policy_Net Model Saved Checkpoint...
*****Value_Net Model Saved Checkpoint...
***Testing the Policy @ Global Step 300000
Episode:   0	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   6.00	 Epi_Len:  20.00
Episode:   1	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:   2	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:   3	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:   4	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   7.00	 Epi_Len:  20.00
Episode:   5	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:   6	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:   7	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:   8	 Final Score:  0.00	 Acc. Reward: -1.08	

Episode:   2	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:   3	 Final Score:  0.00	 Acc. Reward: -1.61	 Min_Dist:  10.00	 Epi_Len:  10.00
Episode:   4	 Final Score:  0.00	 Acc. Reward: -0.68	 Min_Dist:  10.00	 Epi_Len:  10.00
Episode:   5	 Final Score:  0.00	 Acc. Reward: -1.61	 Min_Dist:  10.00	 Epi_Len:  10.00
Episode:   6	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  11.00	 Epi_Len:  10.00
Episode:   7	 Final Score:  0.00	 Acc. Reward: -1.60	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:   8	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:   9	 Final Score:  0.00	 Acc. Reward: -1.11	 Min_Dist:  10.00	 Epi_Len:   4.00
Episode:  10	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  11	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  12	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  13	 Final Score:  0.00

Episode:   7	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   7.00	 Epi_Len:  20.00
Episode:   8	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:   6.00	 Epi_Len:  20.00
Episode:   9	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  10	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   7.00	 Epi_Len:  20.00
Episode:  11	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  12	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  13	 Final Score:  0.00	 Acc. Reward: -1.11	 Min_Dist:  10.00	 Epi_Len:   4.00
Episode:  14	 Final Score:  0.00	 Acc. Reward: -1.00	 Min_Dist:  10.00	 Epi_Len:  18.00
Episode:  15	 Final Score:  0.00	 Acc. Reward: -1.00	 Min_Dist:  10.00	 Epi_Len:  18.00
Episode:  16	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  17	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   5.00	 Epi_Len:  20.00
Episode:  18	 Final Score:  0.00

Episode:  12	 Final Score:  0.00	 Acc. Reward: -0.96	 Min_Dist:   8.00	 Epi_Len:  17.00
Episode:  13	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  14	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   6.00	 Epi_Len:  20.00
Episode:  15	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  16	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:   6.00	 Epi_Len:  20.00
Episode:  17	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  18	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  11.00	 Epi_Len:  10.00
Episode:  19	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  20	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  21	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:  22	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  23	 Final Score:  0.00

Episode:  17	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  18	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  19	 Final Score:  0.00	 Acc. Reward: -0.76	 Min_Dist:  10.00	 Epi_Len:  12.00
Episode:  20	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  10.00	 Epi_Len:  10.00
Episode:  21	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   7.00	 Epi_Len:  20.00
Episode:  22	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  23	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  24	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   7.00	 Epi_Len:  20.00
Episode:  25	 Final Score:  0.00	 Acc. Reward: -1.19	 Min_Dist:  10.00	 Epi_Len:   6.00
Episode:  26	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  27	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:  28	 Final Score:  0.00

Episode:  22	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  23	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  24	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   7.00	 Epi_Len:  20.00
Episode:  25	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  26	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  27	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   5.00	 Epi_Len:  20.00
Episode:  28	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  29	 Final Score:  0.00	 Acc. Reward: -1.04	 Min_Dist:   9.00	 Epi_Len:  19.00
Overall Results: Success Rate:  0/30, Ave. Score:  0.00, Ave. Rewards: -1.14, Ave. Min_Dist  9.20, Ave. Epi_Length  18.33
logfile on global steps=350000.0 saved...
Worker Experience @ Global Step 350080
Worker Experience @ Global Step 350160
Worker Experience @ Global Step 350240
Worker Experien

Episode:  27	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   7.00	 Epi_Len:  20.00
Episode:  28	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  29	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   7.00	 Epi_Len:  20.00
Overall Results: Success Rate:  0/30, Ave. Score:  0.00, Ave. Rewards: -1.10, Ave. Min_Dist  8.27, Ave. Epi_Length  19.13
logfile on global steps=360000.0 saved...
Worker Experience @ Global Step 360080
Worker Experience @ Global Step 360160
Worker Experience @ Global Step 360240
Worker Experience @ Global Step 360320
Worker Experience @ Global Step 360400
Worker Experience @ Global Step 360480
Worker Experience @ Global Step 360560
Worker Experience @ Global Step 360640
Worker Experience @ Global Step 360720
Worker Experience @ Global Step 360800
Worker Experience @ Global Step 360880
Worker Experience @ Global Step 360960
Worker Experience @ Global Step 361040
Worker Experience @ Global Step 361120
Worker Experience @ Global

Worker Experience @ Global Step 370080
Worker Experience @ Global Step 370160
Worker Experience @ Global Step 370240
Worker Experience @ Global Step 370320
Worker Experience @ Global Step 370400
Worker Experience @ Global Step 370480
Worker Experience @ Global Step 370560
Worker Experience @ Global Step 370640
Worker Experience @ Global Step 370720
Worker Experience @ Global Step 370800
Worker Experience @ Global Step 370880
Worker Experience @ Global Step 370960
Worker Experience @ Global Step 371040
Worker Experience @ Global Step 371120
Worker Experience @ Global Step 371200
Worker Experience @ Global Step 371280
Worker Experience @ Global Step 371360
Worker Experience @ Global Step 371440
Worker Experience @ Global Step 371520
Worker Experience @ Global Step 371600
Worker Experience @ Global Step 371680
Worker Experience @ Global Step 371760
Worker Experience @ Global Step 371840
Worker Experience @ Global Step 371920
Worker Experience @ Global Step 372000
Worker Experience @ Globa

Worker Experience @ Global Step 380880
Worker Experience @ Global Step 380960
Worker Experience @ Global Step 381040
Worker Experience @ Global Step 381120
Worker Experience @ Global Step 381200
Worker Experience @ Global Step 381280
Worker Experience @ Global Step 381360
Worker Experience @ Global Step 381440
Worker Experience @ Global Step 381520
Worker Experience @ Global Step 381600
Worker Experience @ Global Step 381680
Worker Experience @ Global Step 381760
Worker Experience @ Global Step 381840
Worker Experience @ Global Step 381920
Worker Experience @ Global Step 382000
Worker Experience @ Global Step 382080
Worker Experience @ Global Step 382160
Worker Experience @ Global Step 382240
Worker Experience @ Global Step 382320
Worker Experience @ Global Step 382400
Worker Experience @ Global Step 382480
Worker Experience @ Global Step 382560
Worker Experience @ Global Step 382640
Worker Experience @ Global Step 382720
Worker Experience @ Global Step 382800
Worker Experience @ Globa

Worker Experience @ Global Step 391680
Worker Experience @ Global Step 391760
Worker Experience @ Global Step 391840
Worker Experience @ Global Step 391920
Worker Experience @ Global Step 392000
Worker Experience @ Global Step 392080
Worker Experience @ Global Step 392160
Worker Experience @ Global Step 392240
Worker Experience @ Global Step 392320
Worker Experience @ Global Step 392400
Worker Experience @ Global Step 392480
Worker Experience @ Global Step 392560
Worker Experience @ Global Step 392640
Worker Experience @ Global Step 392720
Worker Experience @ Global Step 392800
Worker Experience @ Global Step 392880
Worker Experience @ Global Step 392960
Worker Experience @ Global Step 393040
Worker Experience @ Global Step 393120
Worker Experience @ Global Step 393200
Worker Experience @ Global Step 393280
Worker Experience @ Global Step 393360
Worker Experience @ Global Step 393440
Worker Experience @ Global Step 393520
Worker Experience @ Global Step 393600
Worker Experience @ Globa

Worker Experience @ Global Step 402480
Worker Experience @ Global Step 402560
Worker Experience @ Global Step 402640
Worker Experience @ Global Step 402720
Worker Experience @ Global Step 402800
Worker Experience @ Global Step 402880
Worker Experience @ Global Step 402960
Worker Experience @ Global Step 403040
Worker Experience @ Global Step 403120
Worker Experience @ Global Step 403200
Worker Experience @ Global Step 403280
Worker Experience @ Global Step 403360
Worker Experience @ Global Step 403440
Worker Experience @ Global Step 403520
Worker Experience @ Global Step 403600
Worker Experience @ Global Step 403680
Worker Experience @ Global Step 403760
Worker Experience @ Global Step 403840
Worker Experience @ Global Step 403920
Worker Experience @ Global Step 404000
Worker Experience @ Global Step 404080
Worker Experience @ Global Step 404160
Worker Experience @ Global Step 404240
Worker Experience @ Global Step 404320
Worker Experience @ Global Step 404400
Worker Experience @ Globa

Worker Experience @ Global Step 413280
Worker Experience @ Global Step 413360
Worker Experience @ Global Step 413440
Worker Experience @ Global Step 413520
Worker Experience @ Global Step 413600
Worker Experience @ Global Step 413680
Worker Experience @ Global Step 413760
Worker Experience @ Global Step 413840
Worker Experience @ Global Step 413920
Worker Experience @ Global Step 414000
Worker Experience @ Global Step 414080
Worker Experience @ Global Step 414160
Worker Experience @ Global Step 414240
Worker Experience @ Global Step 414320
Worker Experience @ Global Step 414400
Worker Experience @ Global Step 414480
Worker Experience @ Global Step 414560
Worker Experience @ Global Step 414640
Worker Experience @ Global Step 414720
Worker Experience @ Global Step 414800
Worker Experience @ Global Step 414880
Worker Experience @ Global Step 414960
Worker Experience @ Global Step 415040
Worker Experience @ Global Step 415120
Worker Experience @ Global Step 415200
Worker Experience @ Globa

Worker Experience @ Global Step 424080
Worker Experience @ Global Step 424160
Worker Experience @ Global Step 424240
Worker Experience @ Global Step 424320
Worker Experience @ Global Step 424400
Worker Experience @ Global Step 424480
Worker Experience @ Global Step 424560
Worker Experience @ Global Step 424640
Worker Experience @ Global Step 424720
Worker Experience @ Global Step 424800
Worker Experience @ Global Step 424880
Worker Experience @ Global Step 424960
Worker Experience @ Global Step 425040
Worker Experience @ Global Step 425120
Worker Experience @ Global Step 425200
Worker Experience @ Global Step 425280
Worker Experience @ Global Step 425360
Worker Experience @ Global Step 425440
Worker Experience @ Global Step 425520
Worker Experience @ Global Step 425600
Worker Experience @ Global Step 425680
Worker Experience @ Global Step 425760
Worker Experience @ Global Step 425840
Worker Experience @ Global Step 425920
Worker Experience @ Global Step 426000
Worker Experience @ Globa

Worker Experience @ Global Step 434880
Worker Experience @ Global Step 434960
Worker Experience @ Global Step 435040
Worker Experience @ Global Step 435120
Worker Experience @ Global Step 435200
Worker Experience @ Global Step 435280
Worker Experience @ Global Step 435360
Worker Experience @ Global Step 435440
Worker Experience @ Global Step 435520
Worker Experience @ Global Step 435600
Worker Experience @ Global Step 435680
Worker Experience @ Global Step 435760
Worker Experience @ Global Step 435840
Worker Experience @ Global Step 435920
Worker Experience @ Global Step 436000
Worker Experience @ Global Step 436080
Worker Experience @ Global Step 436160
Worker Experience @ Global Step 436240
Worker Experience @ Global Step 436320
Worker Experience @ Global Step 436400
Worker Experience @ Global Step 436480
Worker Experience @ Global Step 436560
Worker Experience @ Global Step 436640
Worker Experience @ Global Step 436720
Worker Experience @ Global Step 436800
Worker Experience @ Globa

Worker Experience @ Global Step 445680
Worker Experience @ Global Step 445760
Worker Experience @ Global Step 445840
Worker Experience @ Global Step 445920
Worker Experience @ Global Step 446000
Worker Experience @ Global Step 446080
Worker Experience @ Global Step 446160
Worker Experience @ Global Step 446240
Worker Experience @ Global Step 446320
Worker Experience @ Global Step 446400
Worker Experience @ Global Step 446480
Worker Experience @ Global Step 446560
Worker Experience @ Global Step 446640
Worker Experience @ Global Step 446720
Worker Experience @ Global Step 446800
Worker Experience @ Global Step 446880
Worker Experience @ Global Step 446960
Worker Experience @ Global Step 447040
Worker Experience @ Global Step 447120
Worker Experience @ Global Step 447200
Worker Experience @ Global Step 447280
Worker Experience @ Global Step 447360
Worker Experience @ Global Step 447440
Worker Experience @ Global Step 447520
Worker Experience @ Global Step 447600
Worker Experience @ Globa

Worker Experience @ Global Step 456480
Worker Experience @ Global Step 456560
Worker Experience @ Global Step 456640
Worker Experience @ Global Step 456720
Worker Experience @ Global Step 456800
Worker Experience @ Global Step 456880
Worker Experience @ Global Step 456960
Worker Experience @ Global Step 457040
Worker Experience @ Global Step 457120
Worker Experience @ Global Step 457200
Worker Experience @ Global Step 457280
Worker Experience @ Global Step 457360
Worker Experience @ Global Step 457440
Worker Experience @ Global Step 457520
Worker Experience @ Global Step 457600
Worker Experience @ Global Step 457680
Worker Experience @ Global Step 457760
Worker Experience @ Global Step 457840
Worker Experience @ Global Step 457920
Worker Experience @ Global Step 458000
Worker Experience @ Global Step 458080
Worker Experience @ Global Step 458160
Worker Experience @ Global Step 458240
Worker Experience @ Global Step 458320
Worker Experience @ Global Step 458400
Worker Experience @ Globa

Worker Experience @ Global Step 467280
Worker Experience @ Global Step 467360
Worker Experience @ Global Step 467440
Worker Experience @ Global Step 467520
Worker Experience @ Global Step 467600
Worker Experience @ Global Step 467680
Worker Experience @ Global Step 467760
Worker Experience @ Global Step 467840
Worker Experience @ Global Step 467920
Worker Experience @ Global Step 468000
Worker Experience @ Global Step 468080
Worker Experience @ Global Step 468160
Worker Experience @ Global Step 468240
Worker Experience @ Global Step 468320
Worker Experience @ Global Step 468400
Worker Experience @ Global Step 468480
Worker Experience @ Global Step 468560
Worker Experience @ Global Step 468640
Worker Experience @ Global Step 468720
Worker Experience @ Global Step 468800
Worker Experience @ Global Step 468880
Worker Experience @ Global Step 468960
Worker Experience @ Global Step 469040
Worker Experience @ Global Step 469120
Worker Experience @ Global Step 469200
Worker Experience @ Globa

Worker Experience @ Global Step 478080
Worker Experience @ Global Step 478160
Worker Experience @ Global Step 478240
Worker Experience @ Global Step 478320
Worker Experience @ Global Step 478400
Worker Experience @ Global Step 478480
Worker Experience @ Global Step 478560
Worker Experience @ Global Step 478640
Worker Experience @ Global Step 478720
Worker Experience @ Global Step 478800
Worker Experience @ Global Step 478880
Worker Experience @ Global Step 478960
Worker Experience @ Global Step 479040
Worker Experience @ Global Step 479120
Worker Experience @ Global Step 479200
Worker Experience @ Global Step 479280
Worker Experience @ Global Step 479360
Worker Experience @ Global Step 479440
Worker Experience @ Global Step 479520
Worker Experience @ Global Step 479600
Worker Experience @ Global Step 479680
Worker Experience @ Global Step 479760
Worker Experience @ Global Step 479840
Worker Experience @ Global Step 479920
Worker Experience @ Global Step 480000
*****Policy_Net Model Sav

Worker Experience @ Global Step 488880
Worker Experience @ Global Step 488960
Worker Experience @ Global Step 489040
Worker Experience @ Global Step 489120
Worker Experience @ Global Step 489200
Worker Experience @ Global Step 489280
Worker Experience @ Global Step 489360
Worker Experience @ Global Step 489440
Worker Experience @ Global Step 489520
Worker Experience @ Global Step 489600
Worker Experience @ Global Step 489680
Worker Experience @ Global Step 489760
Worker Experience @ Global Step 489840
Worker Experience @ Global Step 489920
Worker Experience @ Global Step 490000
*****Policy_Net Model Saved Checkpoint...
*****Value_Net Model Saved Checkpoint...
***Testing the Policy @ Global Step 490000
Episode:   0	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:   1	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:   2	 Final Score:  0.00	 Acc. Reward: -1.00	 Min_Dist:   7.00	 Epi_Len:  18.00
Episode:   3	 Final Score

Worker Experience @ Global Step 499680
Worker Experience @ Global Step 499760
Worker Experience @ Global Step 499840
Worker Experience @ Global Step 499920
Worker Experience @ Global Step 500000
*****Policy_Net Model Saved Checkpoint...
*****Value_Net Model Saved Checkpoint...
***Testing the Policy @ Global Step 500000
Episode:   0	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:   1	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:   2	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:   3	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:   4	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:   5	 Final Score:  0.00	 Acc. Reward: -0.84	 Min_Dist:  10.00	 Epi_Len:  14.00
Episode:   6	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:   7	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:

Episode:   1	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:   2	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:   3	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:   4	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:   5	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  10.00	 Epi_Len:  10.00
Episode:   6	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   6.00	 Epi_Len:  20.00
Episode:   7	 Final Score:  0.00	 Acc. Reward: -0.68	 Min_Dist:  10.00	 Epi_Len:  10.00
Episode:   8	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:   9	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  10	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  11	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  12	 Final Score:  0.00

Episode:   6	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:   7	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:   8	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:   9	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  10	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   6.00	 Epi_Len:  20.00
Episode:  11	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  12	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  13	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  14	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  15	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  16	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:  17	 Final Score:  0.00

Episode:  12	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  13	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:   9.00	 Epi_Len:  10.00
Episode:  14	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  15	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:  16	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   6.00	 Epi_Len:  20.00
Episode:  17	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   5.00	 Epi_Len:  20.00
Episode:  18	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  19	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  20	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  21	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  22	 Final Score:  0.00	 Acc. Reward: -0.80	 Min_Dist:   9.00	 Epi_Len:  13.00
Episode:  23	 Final Score:  0.00

Episode:  17	 Final Score:  0.00	 Acc. Reward: -1.04	 Min_Dist:   7.00	 Epi_Len:  19.00
Episode:  18	 Final Score:  0.00	 Acc. Reward: -1.07	 Min_Dist:  10.00	 Epi_Len:   3.00
Episode:  19	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  20	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  21	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  22	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  23	 Final Score:  0.00	 Acc. Reward: -0.72	 Min_Dist:   9.00	 Epi_Len:  11.00
Episode:  24	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  25	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  26	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  27	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   8.00	 Epi_Len:  20.00
Episode:  28	 Final Score:  0.00

Episode:  22	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  23	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  11.00	 Epi_Len:  10.00
Episode:  24	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:   9.00	 Epi_Len:  10.00
Episode:  25	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   5.00	 Epi_Len:  20.00
Episode:  26	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   9.00	 Epi_Len:  20.00
Episode:  27	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   5.00	 Epi_Len:  20.00
Episode:  28	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Episode:  29	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:  10.00	 Epi_Len:  20.00
Overall Results: Success Rate:  4/30, Ave. Score:  0.17, Ave. Rewards: -0.79, Ave. Min_Dist  7.43, Ave. Epi_Length  18.50
logfile on global steps=550000.0 saved...
Worker Experience @ Global Step 550080
Worker Experience @ Global Step 550160
Worker Experience @ Global Step 550240
Worker Experien

Episode:  27	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  28	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  29	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Overall Results: Success Rate: 26/30, Ave. Score:  1.27, Ave. Rewards:  1.20, Ave. Min_Dist  0.73, Ave. Epi_Length  21.77
logfile on global steps=560000.0 saved...
Worker Experience @ Global Step 560080
Worker Experience @ Global Step 560160
Worker Experience @ Global Step 560240
Worker Experience @ Global Step 560320
Worker Experience @ Global Step 560400
Worker Experience @ Global Step 560480
Worker Experience @ Global Step 560560
Worker Experience @ Global Step 560640
Worker Experience @ Global Step 560720
Worker Experience @ Global Step 560800
Worker Experience @ Global Step 560880
Worker Experience @ Global Step 560960
Worker Experience @ Global Step 561040
Worker Experience @ Global Step 561120
Worker Experience @ Global

Worker Experience @ Global Step 570080
Worker Experience @ Global Step 570160
Worker Experience @ Global Step 570240
Worker Experience @ Global Step 570320
Worker Experience @ Global Step 570400
Worker Experience @ Global Step 570480
Worker Experience @ Global Step 570560
Worker Experience @ Global Step 570640
Worker Experience @ Global Step 570720
Worker Experience @ Global Step 570800
Worker Experience @ Global Step 570880
Worker Experience @ Global Step 570960
Worker Experience @ Global Step 571040
Worker Experience @ Global Step 571120
Worker Experience @ Global Step 571200
Worker Experience @ Global Step 571280
Worker Experience @ Global Step 571360
Worker Experience @ Global Step 571440
Worker Experience @ Global Step 571520
Worker Experience @ Global Step 571600
Worker Experience @ Global Step 571680
Worker Experience @ Global Step 571760
Worker Experience @ Global Step 571840
Worker Experience @ Global Step 571920
Worker Experience @ Global Step 572000
Worker Experience @ Globa

Worker Experience @ Global Step 580880
Worker Experience @ Global Step 580960
Worker Experience @ Global Step 581040
Worker Experience @ Global Step 581120
Worker Experience @ Global Step 581200
Worker Experience @ Global Step 581280
Worker Experience @ Global Step 581360
Worker Experience @ Global Step 581440
Worker Experience @ Global Step 581520
Worker Experience @ Global Step 581600
Worker Experience @ Global Step 581680
Worker Experience @ Global Step 581760
Worker Experience @ Global Step 581840
Worker Experience @ Global Step 581920
Worker Experience @ Global Step 582000
Worker Experience @ Global Step 582080
Worker Experience @ Global Step 582160
Worker Experience @ Global Step 582240
Worker Experience @ Global Step 582320
Worker Experience @ Global Step 582400
Worker Experience @ Global Step 582480
Worker Experience @ Global Step 582560
Worker Experience @ Global Step 582640
Worker Experience @ Global Step 582720
Worker Experience @ Global Step 582800
Worker Experience @ Globa

Worker Experience @ Global Step 591680
Worker Experience @ Global Step 591760
Worker Experience @ Global Step 591840
Worker Experience @ Global Step 591920
Worker Experience @ Global Step 592000
Worker Experience @ Global Step 592080
Worker Experience @ Global Step 592160
Worker Experience @ Global Step 592240
Worker Experience @ Global Step 592320
Worker Experience @ Global Step 592400
Worker Experience @ Global Step 592480
Worker Experience @ Global Step 592560
Worker Experience @ Global Step 592640
Worker Experience @ Global Step 592720
Worker Experience @ Global Step 592800
Worker Experience @ Global Step 592880
Worker Experience @ Global Step 592960
Worker Experience @ Global Step 593040
Worker Experience @ Global Step 593120
Worker Experience @ Global Step 593200
Worker Experience @ Global Step 593280
Worker Experience @ Global Step 593360
Worker Experience @ Global Step 593440
Worker Experience @ Global Step 593520
Worker Experience @ Global Step 593600
Worker Experience @ Globa

Worker Experience @ Global Step 602480
Worker Experience @ Global Step 602560
Worker Experience @ Global Step 602640
Worker Experience @ Global Step 602720
Worker Experience @ Global Step 602800
Worker Experience @ Global Step 602880
Worker Experience @ Global Step 602960
Worker Experience @ Global Step 603040
Worker Experience @ Global Step 603120
Worker Experience @ Global Step 603200
Worker Experience @ Global Step 603280
Worker Experience @ Global Step 603360
Worker Experience @ Global Step 603440
Worker Experience @ Global Step 603520
Worker Experience @ Global Step 603600
Worker Experience @ Global Step 603680
Worker Experience @ Global Step 603760
Worker Experience @ Global Step 603840
Worker Experience @ Global Step 603920
Worker Experience @ Global Step 604000
Worker Experience @ Global Step 604080
Worker Experience @ Global Step 604160
Worker Experience @ Global Step 604240
Worker Experience @ Global Step 604320
Worker Experience @ Global Step 604400
Worker Experience @ Globa

Worker Experience @ Global Step 613280
Worker Experience @ Global Step 613360
Worker Experience @ Global Step 613440
Worker Experience @ Global Step 613520
Worker Experience @ Global Step 613600
Worker Experience @ Global Step 613680
Worker Experience @ Global Step 613760
Worker Experience @ Global Step 613840
Worker Experience @ Global Step 613920
Worker Experience @ Global Step 614000
Worker Experience @ Global Step 614080
Worker Experience @ Global Step 614160
Worker Experience @ Global Step 614240
Worker Experience @ Global Step 614320
Worker Experience @ Global Step 614400
Worker Experience @ Global Step 614480
Worker Experience @ Global Step 614560
Worker Experience @ Global Step 614640
Worker Experience @ Global Step 614720
Worker Experience @ Global Step 614800
Worker Experience @ Global Step 614880
Worker Experience @ Global Step 614960
Worker Experience @ Global Step 615040
Worker Experience @ Global Step 615120
Worker Experience @ Global Step 615200
Worker Experience @ Globa

Worker Experience @ Global Step 624080
Worker Experience @ Global Step 624160
Worker Experience @ Global Step 624240
Worker Experience @ Global Step 624320
Worker Experience @ Global Step 624400
Worker Experience @ Global Step 624480
Worker Experience @ Global Step 624560
Worker Experience @ Global Step 624640
Worker Experience @ Global Step 624720
Worker Experience @ Global Step 624800
Worker Experience @ Global Step 624880
Worker Experience @ Global Step 624960
Worker Experience @ Global Step 625040
Worker Experience @ Global Step 625120
Worker Experience @ Global Step 625200
Worker Experience @ Global Step 625280
Worker Experience @ Global Step 625360
Worker Experience @ Global Step 625440
Worker Experience @ Global Step 625520
Worker Experience @ Global Step 625600
Worker Experience @ Global Step 625680
Worker Experience @ Global Step 625760
Worker Experience @ Global Step 625840
Worker Experience @ Global Step 625920
Worker Experience @ Global Step 626000
Worker Experience @ Globa

Worker Experience @ Global Step 634880
Worker Experience @ Global Step 634960
Worker Experience @ Global Step 635040
Worker Experience @ Global Step 635120
Worker Experience @ Global Step 635200
Worker Experience @ Global Step 635280
Worker Experience @ Global Step 635360
Worker Experience @ Global Step 635440
Worker Experience @ Global Step 635520
Worker Experience @ Global Step 635600
Worker Experience @ Global Step 635680
Worker Experience @ Global Step 635760
Worker Experience @ Global Step 635840
Worker Experience @ Global Step 635920
Worker Experience @ Global Step 636000
Worker Experience @ Global Step 636080
Worker Experience @ Global Step 636160
Worker Experience @ Global Step 636240
Worker Experience @ Global Step 636320
Worker Experience @ Global Step 636400
Worker Experience @ Global Step 636480
Worker Experience @ Global Step 636560
Worker Experience @ Global Step 636640
Worker Experience @ Global Step 636720
Worker Experience @ Global Step 636800
Worker Experience @ Globa

Worker Experience @ Global Step 645680
Worker Experience @ Global Step 645760
Worker Experience @ Global Step 645840
Worker Experience @ Global Step 645920
Worker Experience @ Global Step 646000
Worker Experience @ Global Step 646080
Worker Experience @ Global Step 646160
Worker Experience @ Global Step 646240
Worker Experience @ Global Step 646320
Worker Experience @ Global Step 646400
Worker Experience @ Global Step 646480
Worker Experience @ Global Step 646560
Worker Experience @ Global Step 646640
Worker Experience @ Global Step 646720
Worker Experience @ Global Step 646800
Worker Experience @ Global Step 646880
Worker Experience @ Global Step 646960
Worker Experience @ Global Step 647040
Worker Experience @ Global Step 647120
Worker Experience @ Global Step 647200
Worker Experience @ Global Step 647280
Worker Experience @ Global Step 647360
Worker Experience @ Global Step 647440
Worker Experience @ Global Step 647520
Worker Experience @ Global Step 647600
Worker Experience @ Globa

Worker Experience @ Global Step 656480
Worker Experience @ Global Step 656560
Worker Experience @ Global Step 656640
Worker Experience @ Global Step 656720
Worker Experience @ Global Step 656800
Worker Experience @ Global Step 656880
Worker Experience @ Global Step 656960
Worker Experience @ Global Step 657040
Worker Experience @ Global Step 657120
Worker Experience @ Global Step 657200
Worker Experience @ Global Step 657280
Worker Experience @ Global Step 657360
Worker Experience @ Global Step 657440
Worker Experience @ Global Step 657520
Worker Experience @ Global Step 657600
Worker Experience @ Global Step 657680
Worker Experience @ Global Step 657760
Worker Experience @ Global Step 657840
Worker Experience @ Global Step 657920
Worker Experience @ Global Step 658000
Worker Experience @ Global Step 658080
Worker Experience @ Global Step 658160
Worker Experience @ Global Step 658240
Worker Experience @ Global Step 658320
Worker Experience @ Global Step 658400
Worker Experience @ Globa

Worker Experience @ Global Step 667280
Worker Experience @ Global Step 667360
Worker Experience @ Global Step 667440
Worker Experience @ Global Step 667520
Worker Experience @ Global Step 667600
Worker Experience @ Global Step 667680
Worker Experience @ Global Step 667760
Worker Experience @ Global Step 667840
Worker Experience @ Global Step 667920
Worker Experience @ Global Step 668000
Worker Experience @ Global Step 668080
Worker Experience @ Global Step 668160
Worker Experience @ Global Step 668240
Worker Experience @ Global Step 668320
Worker Experience @ Global Step 668400
Worker Experience @ Global Step 668480
Worker Experience @ Global Step 668560
Worker Experience @ Global Step 668640
Worker Experience @ Global Step 668720
Worker Experience @ Global Step 668800
Worker Experience @ Global Step 668880
Worker Experience @ Global Step 668960
Worker Experience @ Global Step 669040
Worker Experience @ Global Step 669120
Worker Experience @ Global Step 669200
Worker Experience @ Globa

Worker Experience @ Global Step 678080
Worker Experience @ Global Step 678160
Worker Experience @ Global Step 678240
Worker Experience @ Global Step 678320
Worker Experience @ Global Step 678400
Worker Experience @ Global Step 678480
Worker Experience @ Global Step 678560
Worker Experience @ Global Step 678640
Worker Experience @ Global Step 678720
Worker Experience @ Global Step 678800
Worker Experience @ Global Step 678880
Worker Experience @ Global Step 678960
Worker Experience @ Global Step 679040
Worker Experience @ Global Step 679120
Worker Experience @ Global Step 679200
Worker Experience @ Global Step 679280
Worker Experience @ Global Step 679360
Worker Experience @ Global Step 679440
Worker Experience @ Global Step 679520
Worker Experience @ Global Step 679600
Worker Experience @ Global Step 679680
Worker Experience @ Global Step 679760
Worker Experience @ Global Step 679840
Worker Experience @ Global Step 679920
Worker Experience @ Global Step 680000
*****Policy_Net Model Sav

Worker Experience @ Global Step 688880
Worker Experience @ Global Step 688960
Worker Experience @ Global Step 689040
Worker Experience @ Global Step 689120
Worker Experience @ Global Step 689200
Worker Experience @ Global Step 689280
Worker Experience @ Global Step 689360
Worker Experience @ Global Step 689440
Worker Experience @ Global Step 689520
Worker Experience @ Global Step 689600
Worker Experience @ Global Step 689680
Worker Experience @ Global Step 689760
Worker Experience @ Global Step 689840
Worker Experience @ Global Step 689920
Worker Experience @ Global Step 690000
*****Policy_Net Model Saved Checkpoint...
*****Value_Net Model Saved Checkpoint...
***Testing the Policy @ Global Step 690000
Episode:   0	 Final Score:  1.00	 Acc. Reward:  1.28	 Min_Dist:   0.00	 Epi_Len:  24.00
Episode:   1	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Episode:   2	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:   3	 Final Score

Worker Experience @ Global Step 699680
Worker Experience @ Global Step 699760
Worker Experience @ Global Step 699840
Worker Experience @ Global Step 699920
Worker Experience @ Global Step 700000
*****Policy_Net Model Saved Checkpoint...
*****Value_Net Model Saved Checkpoint...
***Testing the Policy @ Global Step 700000
Episode:   0	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:   1	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Episode:   2	 Final Score:  0.00	 Acc. Reward: -1.21	 Min_Dist:   4.00	 Epi_Len:  25.00
Episode:   3	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:   4	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:   5	 Final Score:  1.00	 Acc. Reward:  1.28	 Min_Dist:   0.00	 Epi_Len:  24.00
Episode:   6	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:   7	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:

Episode:   1	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:   2	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:   3	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   6.00	 Epi_Len:  20.00
Episode:   4	 Final Score:  1.00	 Acc. Reward:  1.12	 Min_Dist:   0.00	 Epi_Len:  28.00
Episode:   5	 Final Score:  0.00	 Acc. Reward: -1.19	 Min_Dist:  10.00	 Epi_Len:   6.00
Episode:   6	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:   7	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  11.00	 Epi_Len:  10.00
Episode:   8	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Episode:   9	 Final Score:  1.00	 Acc. Reward:  1.20	 Min_Dist:   0.00	 Epi_Len:  26.00
Episode:  10	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  11	 Final Score:  1.00	 Acc. Reward:  1.36	 Min_Dist:   0.00	 Epi_Len:  22.00
Episode:  12	 Final Score:  1.00

Episode:   6	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:   7	 Final Score:  1.00	 Acc. Reward:  1.12	 Min_Dist:   0.00	 Epi_Len:  28.00
Episode:   8	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:   9	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  10	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  11.00	 Epi_Len:  10.00
Episode:  11	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  12	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  13	 Final Score:  0.00	 Acc. Reward: -0.36	 Min_Dist:   1.00	 Epi_Len:  22.00
Episode:  14	 Final Score:  1.00	 Acc. Reward:  1.34	 Min_Dist:   0.00	 Epi_Len:  29.00
Episode:  15	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  16	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   6.00	 Epi_Len:  20.00
Episode:  17	 Final Score:  2.00

Episode:  11	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  12	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  13	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  14	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  15	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  16	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  17	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  18	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  19	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  20	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  21	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  22	 Final Score:  2.00

Episode:  16	 Final Score:  1.00	 Acc. Reward:  0.82	 Min_Dist:   0.00	 Epi_Len:  29.00
Episode:  17	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Episode:  18	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  19	 Final Score:  1.00	 Acc. Reward:  1.28	 Min_Dist:   0.00	 Epi_Len:  24.00
Episode:  20	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   5.00	 Epi_Len:  20.00
Episode:  21	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  22	 Final Score:  0.00	 Acc. Reward: -1.02	 Min_Dist:   2.00	 Epi_Len:  32.00
Episode:  23	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  24	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  25	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   6.00	 Epi_Len:  20.00
Episode:  26	 Final Score:  1.00	 Acc. Reward:  1.34	 Min_Dist:   0.00	 Epi_Len:  29.00
Episode:  27	 Final Score:  1.00

Episode:  21	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Episode:  22	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  23	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  24	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  25	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  26	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  27	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  28	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  29	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Overall Results: Success Rate: 30/30, Ave. Score:  1.27, Ave. Rewards:  1.56, Ave. Min_Dist  0.00, Ave. Epi_Length  23.47
logfile on global steps=750000.0 saved...
Worker Experience @ Global Step 750080
Worke

Episode:  26	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  27	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  28	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  29	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Overall Results: Success Rate: 29/30, Ave. Score:  1.53, Ave. Rewards:  1.51, Ave. Min_Dist  0.33, Ave. Epi_Length  21.53
logfile on global steps=760000.0 saved...
Worker Experience @ Global Step 760080
Worker Experience @ Global Step 760160
Worker Experience @ Global Step 760240
Worker Experience @ Global Step 760320
Worker Experience @ Global Step 760400
Worker Experience @ Global Step 760480
Worker Experience @ Global Step 760560
Worker Experience @ Global Step 760640
Worker Experience @ Global Step 760720
Worker Experience @ Global Step 760800
Worker Experience @ Global Step 760880
Worker Experience @ Global Step 760960
Worker Experienc

Worker Experience @ Global Step 770080
Worker Experience @ Global Step 770160
Worker Experience @ Global Step 770240
Worker Experience @ Global Step 770320
Worker Experience @ Global Step 770400
Worker Experience @ Global Step 770480
Worker Experience @ Global Step 770560
Worker Experience @ Global Step 770640
Worker Experience @ Global Step 770720
Worker Experience @ Global Step 770800
Worker Experience @ Global Step 770880
Worker Experience @ Global Step 770960
Worker Experience @ Global Step 771040
Worker Experience @ Global Step 771120
Worker Experience @ Global Step 771200
Worker Experience @ Global Step 771280
Worker Experience @ Global Step 771360
Worker Experience @ Global Step 771440
Worker Experience @ Global Step 771520
Worker Experience @ Global Step 771600
Worker Experience @ Global Step 771680
Worker Experience @ Global Step 771760
Worker Experience @ Global Step 771840
Worker Experience @ Global Step 771920
Worker Experience @ Global Step 772000
Worker Experience @ Globa

Worker Experience @ Global Step 780880
Worker Experience @ Global Step 780960
Worker Experience @ Global Step 781040
Worker Experience @ Global Step 781120
Worker Experience @ Global Step 781200
Worker Experience @ Global Step 781280
Worker Experience @ Global Step 781360
Worker Experience @ Global Step 781440
Worker Experience @ Global Step 781520
Worker Experience @ Global Step 781600
Worker Experience @ Global Step 781680
Worker Experience @ Global Step 781760
Worker Experience @ Global Step 781840
Worker Experience @ Global Step 781920
Worker Experience @ Global Step 782000
Worker Experience @ Global Step 782080
Worker Experience @ Global Step 782160
Worker Experience @ Global Step 782240
Worker Experience @ Global Step 782320
Worker Experience @ Global Step 782400
Worker Experience @ Global Step 782480
Worker Experience @ Global Step 782560
Worker Experience @ Global Step 782640
Worker Experience @ Global Step 782720
Worker Experience @ Global Step 782800
Worker Experience @ Globa

Worker Experience @ Global Step 791680
Worker Experience @ Global Step 791760
Worker Experience @ Global Step 791840
Worker Experience @ Global Step 791920
Worker Experience @ Global Step 792000
Worker Experience @ Global Step 792080
Worker Experience @ Global Step 792160
Worker Experience @ Global Step 792240
Worker Experience @ Global Step 792320
Worker Experience @ Global Step 792400
Worker Experience @ Global Step 792480
Worker Experience @ Global Step 792560
Worker Experience @ Global Step 792640
Worker Experience @ Global Step 792720
Worker Experience @ Global Step 792800
Worker Experience @ Global Step 792880
Worker Experience @ Global Step 792960
Worker Experience @ Global Step 793040
Worker Experience @ Global Step 793120
Worker Experience @ Global Step 793200
Worker Experience @ Global Step 793280
Worker Experience @ Global Step 793360
Worker Experience @ Global Step 793440
Worker Experience @ Global Step 793520
Worker Experience @ Global Step 793600
Worker Experience @ Globa

Worker Experience @ Global Step 802480
Worker Experience @ Global Step 802560
Worker Experience @ Global Step 802640
Worker Experience @ Global Step 802720
Worker Experience @ Global Step 802800
Worker Experience @ Global Step 802880
Worker Experience @ Global Step 802960
Worker Experience @ Global Step 803040
Worker Experience @ Global Step 803120
Worker Experience @ Global Step 803200
Worker Experience @ Global Step 803280
Worker Experience @ Global Step 803360
Worker Experience @ Global Step 803440
Worker Experience @ Global Step 803520
Worker Experience @ Global Step 803600
Worker Experience @ Global Step 803680
Worker Experience @ Global Step 803760
Worker Experience @ Global Step 803840
Worker Experience @ Global Step 803920
Worker Experience @ Global Step 804000
Worker Experience @ Global Step 804080
Worker Experience @ Global Step 804160
Worker Experience @ Global Step 804240
Worker Experience @ Global Step 804320
Worker Experience @ Global Step 804400
Worker Experience @ Globa

Worker Experience @ Global Step 813280
Worker Experience @ Global Step 813360
Worker Experience @ Global Step 813440
Worker Experience @ Global Step 813520
Worker Experience @ Global Step 813600
Worker Experience @ Global Step 813680
Worker Experience @ Global Step 813760
Worker Experience @ Global Step 813840
Worker Experience @ Global Step 813920
Worker Experience @ Global Step 814000
Worker Experience @ Global Step 814080
Worker Experience @ Global Step 814160
Worker Experience @ Global Step 814240
Worker Experience @ Global Step 814320
Worker Experience @ Global Step 814400
Worker Experience @ Global Step 814480
Worker Experience @ Global Step 814560
Worker Experience @ Global Step 814640
Worker Experience @ Global Step 814720
Worker Experience @ Global Step 814800
Worker Experience @ Global Step 814880
Worker Experience @ Global Step 814960
Worker Experience @ Global Step 815040
Worker Experience @ Global Step 815120
Worker Experience @ Global Step 815200
Worker Experience @ Globa

Worker Experience @ Global Step 824080
Worker Experience @ Global Step 824160
Worker Experience @ Global Step 824240
Worker Experience @ Global Step 824320
Worker Experience @ Global Step 824400
Worker Experience @ Global Step 824480
Worker Experience @ Global Step 824560
Worker Experience @ Global Step 824640
Worker Experience @ Global Step 824720
Worker Experience @ Global Step 824800
Worker Experience @ Global Step 824880
Worker Experience @ Global Step 824960
Worker Experience @ Global Step 825040
Worker Experience @ Global Step 825120
Worker Experience @ Global Step 825200
Worker Experience @ Global Step 825280
Worker Experience @ Global Step 825360
Worker Experience @ Global Step 825440
Worker Experience @ Global Step 825520
Worker Experience @ Global Step 825600
Worker Experience @ Global Step 825680
Worker Experience @ Global Step 825760
Worker Experience @ Global Step 825840
Worker Experience @ Global Step 825920
Worker Experience @ Global Step 826000
Worker Experience @ Globa

Worker Experience @ Global Step 834880
Worker Experience @ Global Step 834960
Worker Experience @ Global Step 835040
Worker Experience @ Global Step 835120
Worker Experience @ Global Step 835200
Worker Experience @ Global Step 835280
Worker Experience @ Global Step 835360
Worker Experience @ Global Step 835440
Worker Experience @ Global Step 835520
Worker Experience @ Global Step 835600
Worker Experience @ Global Step 835680
Worker Experience @ Global Step 835760
Worker Experience @ Global Step 835840
Worker Experience @ Global Step 835920
Worker Experience @ Global Step 836000
Worker Experience @ Global Step 836080
Worker Experience @ Global Step 836160
Worker Experience @ Global Step 836240
Worker Experience @ Global Step 836320
Worker Experience @ Global Step 836400
Worker Experience @ Global Step 836480
Worker Experience @ Global Step 836560
Worker Experience @ Global Step 836640
Worker Experience @ Global Step 836720
Worker Experience @ Global Step 836800
Worker Experience @ Globa

Worker Experience @ Global Step 845680
Worker Experience @ Global Step 845760
Worker Experience @ Global Step 845840
Worker Experience @ Global Step 845920
Worker Experience @ Global Step 846000
Worker Experience @ Global Step 846080
Worker Experience @ Global Step 846160
Worker Experience @ Global Step 846240
Worker Experience @ Global Step 846320
Worker Experience @ Global Step 846400
Worker Experience @ Global Step 846480
Worker Experience @ Global Step 846560
Worker Experience @ Global Step 846640
Worker Experience @ Global Step 846720
Worker Experience @ Global Step 846800
Worker Experience @ Global Step 846880
Worker Experience @ Global Step 846960
Worker Experience @ Global Step 847040
Worker Experience @ Global Step 847120
Worker Experience @ Global Step 847200
Worker Experience @ Global Step 847280
Worker Experience @ Global Step 847360
Worker Experience @ Global Step 847440
Worker Experience @ Global Step 847520
Worker Experience @ Global Step 847600
Worker Experience @ Globa

Worker Experience @ Global Step 856480
Worker Experience @ Global Step 856560
Worker Experience @ Global Step 856640
Worker Experience @ Global Step 856720
Worker Experience @ Global Step 856800
Worker Experience @ Global Step 856880
Worker Experience @ Global Step 856960
Worker Experience @ Global Step 857040
Worker Experience @ Global Step 857120
Worker Experience @ Global Step 857200
Worker Experience @ Global Step 857280
Worker Experience @ Global Step 857360
Worker Experience @ Global Step 857440
Worker Experience @ Global Step 857520
Worker Experience @ Global Step 857600
Worker Experience @ Global Step 857680
Worker Experience @ Global Step 857760
Worker Experience @ Global Step 857840
Worker Experience @ Global Step 857920
Worker Experience @ Global Step 858000
Worker Experience @ Global Step 858080
Worker Experience @ Global Step 858160
Worker Experience @ Global Step 858240
Worker Experience @ Global Step 858320
Worker Experience @ Global Step 858400
Worker Experience @ Globa

Worker Experience @ Global Step 867280
Worker Experience @ Global Step 867360
Worker Experience @ Global Step 867440
Worker Experience @ Global Step 867520
Worker Experience @ Global Step 867600
Worker Experience @ Global Step 867680
Worker Experience @ Global Step 867760
Worker Experience @ Global Step 867840
Worker Experience @ Global Step 867920
Worker Experience @ Global Step 868000
Worker Experience @ Global Step 868080
Worker Experience @ Global Step 868160
Worker Experience @ Global Step 868240
Worker Experience @ Global Step 868320
Worker Experience @ Global Step 868400
Worker Experience @ Global Step 868480
Worker Experience @ Global Step 868560
Worker Experience @ Global Step 868640
Worker Experience @ Global Step 868720
Worker Experience @ Global Step 868800
Worker Experience @ Global Step 868880
Worker Experience @ Global Step 868960
Worker Experience @ Global Step 869040
Worker Experience @ Global Step 869120
Worker Experience @ Global Step 869200
Worker Experience @ Globa

Worker Experience @ Global Step 878080
Worker Experience @ Global Step 878160
Worker Experience @ Global Step 878240
Worker Experience @ Global Step 878320
Worker Experience @ Global Step 878400
Worker Experience @ Global Step 878480
Worker Experience @ Global Step 878560
Worker Experience @ Global Step 878640
Worker Experience @ Global Step 878720
Worker Experience @ Global Step 878800
Worker Experience @ Global Step 878880
Worker Experience @ Global Step 878960
Worker Experience @ Global Step 879040
Worker Experience @ Global Step 879120
Worker Experience @ Global Step 879200
Worker Experience @ Global Step 879280
Worker Experience @ Global Step 879360
Worker Experience @ Global Step 879440
Worker Experience @ Global Step 879520
Worker Experience @ Global Step 879600
Worker Experience @ Global Step 879680
Worker Experience @ Global Step 879760
Worker Experience @ Global Step 879840
Worker Experience @ Global Step 879920
Worker Experience @ Global Step 880000
*****Policy_Net Model Sav

Worker Experience @ Global Step 888880
Worker Experience @ Global Step 888960
Worker Experience @ Global Step 889040
Worker Experience @ Global Step 889120
Worker Experience @ Global Step 889200
Worker Experience @ Global Step 889280
Worker Experience @ Global Step 889360
Worker Experience @ Global Step 889440
Worker Experience @ Global Step 889520
Worker Experience @ Global Step 889600
Worker Experience @ Global Step 889680
Worker Experience @ Global Step 889760
Worker Experience @ Global Step 889840
Worker Experience @ Global Step 889920
Worker Experience @ Global Step 890000
*****Policy_Net Model Saved Checkpoint...
*****Value_Net Model Saved Checkpoint...
***Testing the Policy @ Global Step 890000
Episode:   0	 Final Score:  0.00	 Acc. Reward: -1.35	 Min_Dist:  10.00	 Epi_Len:  10.00
Episode:   1	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Episode:   2	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Episode:   3	 Final Score

Worker Experience @ Global Step 899680
Worker Experience @ Global Step 899760
Worker Experience @ Global Step 899840
Worker Experience @ Global Step 899920
Worker Experience @ Global Step 900000
*****Policy_Net Model Saved Checkpoint...
*****Value_Net Model Saved Checkpoint...
***Testing the Policy @ Global Step 900000
Episode:   0	 Final Score:  1.00	 Acc. Reward:  1.36	 Min_Dist:   0.00	 Epi_Len:  22.00
Episode:   1	 Final Score:  1.00	 Acc. Reward:  1.20	 Min_Dist:   0.00	 Epi_Len:  26.00
Episode:   2	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:   3	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:   4	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:   5	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:   6	 Final Score:  1.00	 Acc. Reward:  1.04	 Min_Dist:   0.00	 Epi_Len:  30.00
Episode:   7	 Final Score:  1.00	 Acc. Reward:  0.90	 Min_Dist:

Episode:   1	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:   2	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:   3	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:   4	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:   5	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:   6	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:   7	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Episode:   8	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:   9	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  10	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  11	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  12	 Final Score:  1.00

Episode:   6	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   6.00	 Epi_Len:  20.00
Episode:   7	 Final Score:  1.00	 Acc. Reward:  1.26	 Min_Dist:   0.00	 Epi_Len:  31.00
Episode:   8	 Final Score:  0.00	 Acc. Reward: -0.76	 Min_Dist:   3.00	 Epi_Len:  32.00
Episode:   9	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Episode:  10	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  11	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  12	 Final Score:  0.00	 Acc. Reward: -1.34	 Min_Dist:   5.00	 Epi_Len:  20.00
Episode:  13	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   6.00	 Epi_Len:  20.00
Episode:  14	 Final Score:  1.00	 Acc. Reward:  1.26	 Min_Dist:   0.00	 Epi_Len:  31.00
Episode:  15	 Final Score:  1.00	 Acc. Reward:  1.34	 Min_Dist:   0.00	 Epi_Len:  29.00
Episode:  16	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   5.00	 Epi_Len:  20.00
Episode:  17	 Final Score:  0.00

Episode:  11	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  12	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  13	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  14	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  15	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  16	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  17	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  18	 Final Score:  1.00	 Acc. Reward:  1.36	 Min_Dist:   0.00	 Epi_Len:  22.00
Episode:  19	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  20	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  21	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  22	 Final Score:  1.00

Episode:  16	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  17	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  18	 Final Score:  1.00	 Acc. Reward:  1.34	 Min_Dist:   0.00	 Epi_Len:  29.00
Episode:  19	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Episode:  20	 Final Score:  1.00	 Acc. Reward:  1.36	 Min_Dist:   0.00	 Epi_Len:  22.00
Episode:  21	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Episode:  22	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  23	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Episode:  24	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  25	 Final Score:  1.00	 Acc. Reward:  1.34	 Min_Dist:   0.00	 Epi_Len:  29.00
Episode:  26	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  27	 Final Score:  1.00

Episode:  21	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  22	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  23	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  24	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:  25	 Final Score:  0.00	 Acc. Reward: -1.08	 Min_Dist:   6.00	 Epi_Len:  20.00
Episode:  26	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  27	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  28	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  29	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Overall Results: Success Rate: 29/30, Ave. Score:  1.20, Ave. Rewards:  1.46, Ave. Min_Dist  0.20, Ave. Epi_Length  23.57
logfile on global steps=950000.0 saved...
Worker Experience @ Global Step 950080
Worke

Episode:  26	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  27	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:  28	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Episode:  29	 Final Score:  2.00	 Acc. Reward:  1.66	 Min_Dist:   0.00	 Epi_Len:  21.00
Overall Results: Success Rate: 30/30, Ave. Score:  1.33, Ave. Rewards:  1.54, Ave. Min_Dist  0.00, Ave. Epi_Length  23.53
logfile on global steps=960000.0 saved...
Worker Experience @ Global Step 960080
Worker Experience @ Global Step 960160
Worker Experience @ Global Step 960240
Worker Experience @ Global Step 960320
Worker Experience @ Global Step 960400
Worker Experience @ Global Step 960480
Worker Experience @ Global Step 960560
Worker Experience @ Global Step 960640
Worker Experience @ Global Step 960720
Worker Experience @ Global Step 960800
Worker Experience @ Global Step 960880
Worker Experience @ Global Step 960960
Worker Experienc

Worker Experience @ Global Step 970080
Worker Experience @ Global Step 970160
Worker Experience @ Global Step 970240
Worker Experience @ Global Step 970320
Worker Experience @ Global Step 970400
Worker Experience @ Global Step 970480
Worker Experience @ Global Step 970560
Worker Experience @ Global Step 970640
Worker Experience @ Global Step 970720
Worker Experience @ Global Step 970800
Worker Experience @ Global Step 970880
Worker Experience @ Global Step 970960
Worker Experience @ Global Step 971040
Worker Experience @ Global Step 971120
Worker Experience @ Global Step 971200
Worker Experience @ Global Step 971280
Worker Experience @ Global Step 971360
Worker Experience @ Global Step 971440
Worker Experience @ Global Step 971520
Worker Experience @ Global Step 971600
Worker Experience @ Global Step 971680
Worker Experience @ Global Step 971760
Worker Experience @ Global Step 971840
Worker Experience @ Global Step 971920
Worker Experience @ Global Step 972000
Worker Experience @ Globa

Worker Experience @ Global Step 980880
Worker Experience @ Global Step 980960
Worker Experience @ Global Step 981040
Worker Experience @ Global Step 981120
Worker Experience @ Global Step 981200
Worker Experience @ Global Step 981280
Worker Experience @ Global Step 981360
Worker Experience @ Global Step 981440
Worker Experience @ Global Step 981520
Worker Experience @ Global Step 981600
Worker Experience @ Global Step 981680
Worker Experience @ Global Step 981760
Worker Experience @ Global Step 981840
Worker Experience @ Global Step 981920
Worker Experience @ Global Step 982000
Worker Experience @ Global Step 982080
Worker Experience @ Global Step 982160
Worker Experience @ Global Step 982240
Worker Experience @ Global Step 982320
Worker Experience @ Global Step 982400
Worker Experience @ Global Step 982480
Worker Experience @ Global Step 982560
Worker Experience @ Global Step 982640
Worker Experience @ Global Step 982720
Worker Experience @ Global Step 982800
Worker Experience @ Globa

Worker Experience @ Global Step 991680
Worker Experience @ Global Step 991760
Worker Experience @ Global Step 991840
Worker Experience @ Global Step 991920
Worker Experience @ Global Step 992000
Worker Experience @ Global Step 992080
Worker Experience @ Global Step 992160
Worker Experience @ Global Step 992240
Worker Experience @ Global Step 992320
Worker Experience @ Global Step 992400
Worker Experience @ Global Step 992480
Worker Experience @ Global Step 992560
Worker Experience @ Global Step 992640
Worker Experience @ Global Step 992720
Worker Experience @ Global Step 992800
Worker Experience @ Global Step 992880
Worker Experience @ Global Step 992960
Worker Experience @ Global Step 993040
Worker Experience @ Global Step 993120
Worker Experience @ Global Step 993200
Worker Experience @ Global Step 993280
Worker Experience @ Global Step 993360
Worker Experience @ Global Step 993440
Worker Experience @ Global Step 993520
Worker Experience @ Global Step 993600
Worker Experience @ Globa

In [17]:
# Test the policy
stats = play.test(iterations=5, render=True)

Episode:   0	 Final Score:  1.00	 Acc. Reward:  1.36	 Min_Dist:   0.00	 Epi_Len:  22.00
Episode:   1	 Final Score:  1.00	 Acc. Reward:  1.58	 Min_Dist:   0.00	 Epi_Len:  23.00
Episode:   2	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Episode:   3	 Final Score:  1.00	 Acc. Reward:  1.50	 Min_Dist:   0.00	 Epi_Len:  25.00
Episode:   4	 Final Score:  1.00	 Acc. Reward:  1.42	 Min_Dist:   0.00	 Epi_Len:  27.00
Overall Results: Success Rate:  5/5, Ave. Score:  1.00, Ave. Rewards:  1.45, Ave. Min_Dist  0.00, Ave. Epi_Length  24.80


# Plotting Log Results

In [18]:
test_log = LogFile('ppo_models/env_A/training/test_log')

In [19]:
data = test_log.get_saved_data('logfile')

In [20]:
# Get global steps
stp = []
for i in range(np.size(data, 0)):
    stp.append(data[i][0])
    
print(stp)
print("Array Length: {}".format(len(stp)))

[10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000, 100000, 110000, 120000, 130000, 140000, 150000, 160000, 170000, 180000, 190000, 200000, 210000, 220000, 230000, 240000, 250000, 260000, 270000, 280000, 290000, 300000, 310000, 320000, 330000, 340000, 350000, 360000, 370000, 380000, 390000, 400000, 410000, 420000, 430000, 440000, 450000, 460000, 470000, 480000, 490000, 500000, 510000, 520000, 530000, 540000, 550000, 560000, 570000, 580000, 590000, 600000, 610000, 620000, 630000, 640000, 650000, 660000, 670000, 680000, 690000, 700000, 710000, 720000, 730000, 740000, 750000, 760000, 770000, 780000, 790000, 800000, 810000, 820000, 830000, 840000, 850000, 860000, 870000, 880000, 890000, 900000, 910000, 920000, 930000, 940000, 950000, 960000, 970000, 980000, 990000]
Array Length: 99


In [21]:
# Get mean rewards
rwd = []
for i in range(np.size(data, 0)):
    rwd.append(data[i][5])
    
# Get mean score
scr = []
for i in range(np.size(data, 0)):
    scr.append(data[i][4])

# Get mean distance to goal
dst = []
for i in range(np.size(data, 0)):
    dst.append(data[i][6])

In [22]:
import matplotlib.pyplot as plot

In [30]:
# Adjust depending on the results
_range = 99 # Length of result to be plotted
_scale = 10.0 # ( max_steps / _range)
x_step = 50
x_stop = 1000

In [31]:
# Plot rewards

# Define plot size
plot.figure(num=None, figsize=(10,6))

x_axis = []
for i in range(_range):
    x_axis.append(i * _scale)

plot.plot(x_axis, rwd[0:_range], color='teal')

# Title
plot.title('Total Rewards')

# Labels
plot.xlabel('steps (1x1000)')
plot.ylabel('ave. total rewards')
plot.xticks(np.arange(start=0.0, stop=x_stop, step=x_step))
plot.yticks(np.arange(start=-2.0, stop=2.2, step=0.2))

# Add grid
plot.grid(False)

# show plot
#plot.show()

# save plot
plot.savefig('ppo_models/plots/gworldA_c_rewards.jpg')
plot.close()

In [32]:
# Plot distance to goal

# Define plot size
plot.figure(num=None, figsize=(10,6))

x_axis = []
for i in range(_range):
    x_axis.append(i * _scale)

plot.plot(x_axis, dst[0:_range], color='green')

# Title
plot.title('Distance To Goal')

# Labels
plot.xlabel('steps (1x1000)')
plot.ylabel('ave. distance to endpoint')
plot.xticks(np.arange(start=0.0, stop=x_stop, step=x_step))
plot.yticks(np.arange(start=0.0, stop=12.0, step=1.0))

# Add grid
plot.grid(False)

# show plot
#plot.show()

# save plot
plot.savefig('ppo_models/plots/gworldA_c_distance.jpg')
plot.close()

In [33]:
# Plot score

# Define plot size
plot.figure(num=None, figsize=(10,6))

x_axis = []
for i in range(_range):
    x_axis.append(i * _scale)

plot.plot(x_axis, scr[0:_range], color='orange')

# Title
plot.title('Average Score')

# Labels
plot.xlabel('steps (1x1000)')
plot.ylabel('average score')
plot.xticks(np.arange(start=0.0, stop=x_stop, step=x_step))
plot.yticks(np.arange(start=0.0, stop=2.2, step=0.2))

# Add grid
plot.grid(False)

# show plot
#plot.show()

# save plot
plot.savefig('ppo_models/plots/gworldA_c_score.jpg')
plot.close()