In [1]:
#Importing the necessary packages
import gym 
import tensorflow as tf 
import numpy as np 
import random 
import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters


In [2]:
#quick exploration of our environment
env = gym.make('Pong-v0')
print(env.action_space)
print(env.observation_space)


Discrete(6)
Box(210, 160, 3)


In [95]:
class ExperienceBuffer: 
    
    def __init__(self): 
        self.buffer = []
        self.buffer_capacity = 1000000
        self.current_length = 0
    def write(self, obs): 
        if self.current_length == self.buffer_capacity: 
            self.buffer.pop()
            self.current_length -= 1
        self.buffer.append(obs)
        self.current_length +=1
    def sample_buffer(self, batch_to_sample): 
        return random.sample(self.buffer, min(batch_to_sample, self.current_length))

In [66]:
class Network:
    
    def __init__(self, session , n_out):
        self.session = session
        self.n_out = n_out
        # data placeholders
        self.x = tf.placeholder(tf.float32, [None, 84,84,4], name='x')
        self.y = tf.placeholder(tf.float32, [None, n_out], name='y')
        self.conv_1 = tf.layers.conv2d(self.x, filters = 32, kernel_size = [8,8], strides = 4, activation = tf.nn.relu)
        self.conv_2 = tf.layers.conv2d(self.conv_1, filters = 64, kernel_size = [4,4], strides = 2, activation = tf.nn.relu)
        #self.max_pool_1 = tf.layers.MaxPooling2D(pool_size = 2, strides = 2)
        self.flat_layer = tf.layers.flatten(self.conv_2)
        self.bottleneck_layer = tf.layers.dense(self.flat_layer, 512)
        self.q = tf.layers.dense(self.bottleneck_layer, n_out)
    
    def compute(self, input_frames):
        # evaluate the network and return the action values [q(s,a=0),q(s,a=1)]
        return self.session.run(self.q, feed_dict={self.x:input_frames})
    

In [67]:
class ReshapeImage: 
    #Source for data processing: this is also the same as the openAI baseline https://github.com/fg91/Deep-Q-Learning/blob/master/DQN.ipynb
    def __init__(self, target_height = 84, target_width = 84):
        self.target_height = target_height
        self.target_width = target_width
        self.input_rgb_image = tf.placeholder(tf.uint8, [210,160,3])
        self.grayscale_img = tf.image.rgb_to_grayscale(self.input_rgb_image)
        self.bounded_img = tf.image.crop_to_bounding_box(self.grayscale_img, 34, 0, 160, 160)
        self.final_processed = tf.image.resize_images(self.bounded_img, 
                                                [self.target_height, self.target_width], 
                                                method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    def return_processed_img(self, session, input_img):
        return session.run(self.final_processed, feed_dict = {self.input_rgb_image: input_img})

In [99]:
class Agent: 
    
    def __init__(self, session, policy = 'DQN'): 
        #Want a pretty high gamma at first
        self.gamma = 0.99
        #Want pure exploration at first 
        self.epsilon = 1.0
        #want massive adjustments to Q function at first
        self.alpha = 1.0
        #There are six possible outputs
        self.n_out = 6
        #Tune this
        self.batch_size = 100
        self.total_reward = 0
        self.policy = policy
        self.session = session
        self.experience_buffer = ExperienceBuffer()
        self.q = Network(session, self.n_out)
        self.image_reshaper = ReshapeImage()
    def get_action(self, observation): 
        if self.policy == 'DQN':
            if np.random.randn() > self.epsilon: 
                #reshaped_obs = self.image_reshaper.return_processed_img(self.session, observation)
                #need to fix this to feed in 4 stacked frames
               # pos = np.argmax(self.q.compute(reshaped_obs), axis = 1)
                #return pos[0];
                return random.randint(0,5)
            else: 
                return random.randint(0,5)
    def adjust_epsilon(self, episode_number):
        self.epsilon = 0.01 + (1.0 - 0.01)*np.exp(-.001*episode_number)
    def update_q(): 
        self.batch_to_train_upon = self.experience_buffer.sample_buffer(self.batch_size);
    def gather_buffer_exp(self, last_obs, action, reward, obs): 
        self.experience_buffer.write((last_obs, action, reward, obs))
    def gather_reward(self, reward): 
        self.total_reward += reward
    def get_total_reward(self): 
        return self.total_reward
    def set_reward(self, new_reward): 
        self.total_reward = new_reward

In [100]:
#quick test of the agents functions
agent = Agent('random')
act = agent.get_action('hehehe')
new_rew = agent.gather_reward(15)
new_tot = agent.get_total_reward()
print(new_tot)
print(act)

15
0


In [None]:
#Code to test the actual env:
with tf.Graph().as_default():
    with tf.Session() as session: 
        agent = Agent(session)
        session.run(tf.global_variables_initializer())
        episode_rewards = []
        for i_episode in range(20):
            observation = env.reset()
            agent.set_reward(0)
            for t in range(3000):
                env.render()
                action = agent.get_action(observation)
                new_observation, reward, done, info = env.step(action)
                print("the reward at stage {} was {} and the action was {}".format(t, reward, action))
                agent.gather_reward(reward)
                agent.gather_buffer_exp(observation, action, reward, new_observation)
                observation = new_observation
                if done:
                    the_ep_reward = agent.get_total_reward()
                    print("Episode {} finished after {} timesteps with total reward {}".format(i_episode, t+1, agent.get_total_reward()))
                    episode_rewards.append(the_ep_reward)
                    break
            agent.adjust_epsilon(i_episode)
plt.plot(episode_rewards)
plt.show()

the reward at stage 0 was 0.0 and the action was 2
the reward at stage 1 was 0.0 and the action was 3
the reward at stage 2 was 0.0 and the action was 1
the reward at stage 3 was 0.0 and the action was 3
the reward at stage 4 was 0.0 and the action was 4
the reward at stage 5 was 0.0 and the action was 5
the reward at stage 6 was 0.0 and the action was 5
the reward at stage 7 was 0.0 and the action was 4
the reward at stage 8 was 0.0 and the action was 1
the reward at stage 9 was 0.0 and the action was 0
the reward at stage 10 was 0.0 and the action was 5
the reward at stage 11 was 0.0 and the action was 1
the reward at stage 12 was 0.0 and the action was 4
the reward at stage 13 was 0.0 and the action was 2
the reward at stage 14 was 0.0 and the action was 2
the reward at stage 15 was 0.0 and the action was 0
the reward at stage 16 was 0.0 and the action was 2
the reward at stage 17 was 0.0 and the action was 4
the reward at stage 18 was 0.0 and the action was 4
the reward at stage 19

the reward at stage 180 was 0.0 and the action was 0
the reward at stage 181 was 0.0 and the action was 5
the reward at stage 182 was 0.0 and the action was 4
the reward at stage 183 was 0.0 and the action was 0
the reward at stage 184 was 0.0 and the action was 2
the reward at stage 185 was 0.0 and the action was 3
the reward at stage 186 was 0.0 and the action was 5
the reward at stage 187 was 0.0 and the action was 2
the reward at stage 188 was 0.0 and the action was 3
the reward at stage 189 was 0.0 and the action was 5
the reward at stage 190 was 0.0 and the action was 4
the reward at stage 191 was 0.0 and the action was 2
the reward at stage 192 was 0.0 and the action was 3
the reward at stage 193 was 0.0 and the action was 2
the reward at stage 194 was 0.0 and the action was 5
the reward at stage 195 was 0.0 and the action was 5
the reward at stage 196 was 0.0 and the action was 2
the reward at stage 197 was 0.0 and the action was 3
the reward at stage 198 was 0.0 and the action

the reward at stage 337 was 0.0 and the action was 5
the reward at stage 338 was 0.0 and the action was 2
the reward at stage 339 was 0.0 and the action was 1
the reward at stage 340 was 0.0 and the action was 5
the reward at stage 341 was 0.0 and the action was 0
the reward at stage 342 was 0.0 and the action was 1
the reward at stage 343 was 0.0 and the action was 4
the reward at stage 344 was 0.0 and the action was 4
the reward at stage 345 was 0.0 and the action was 1
the reward at stage 346 was 0.0 and the action was 1
the reward at stage 347 was 0.0 and the action was 4
the reward at stage 348 was 0.0 and the action was 3
the reward at stage 349 was 0.0 and the action was 3
the reward at stage 350 was 0.0 and the action was 1
the reward at stage 351 was 0.0 and the action was 3
the reward at stage 352 was 0.0 and the action was 1
the reward at stage 353 was 0.0 and the action was 2
the reward at stage 354 was 0.0 and the action was 4
the reward at stage 355 was -1.0 and the actio

the reward at stage 493 was 0.0 and the action was 2
the reward at stage 494 was 0.0 and the action was 0
the reward at stage 495 was -1.0 and the action was 2
the reward at stage 496 was 0.0 and the action was 5
the reward at stage 497 was 0.0 and the action was 5
the reward at stage 498 was 0.0 and the action was 2
the reward at stage 499 was 0.0 and the action was 1
the reward at stage 500 was 0.0 and the action was 4
the reward at stage 501 was 0.0 and the action was 4
the reward at stage 502 was 0.0 and the action was 1
the reward at stage 503 was 0.0 and the action was 5
the reward at stage 504 was 0.0 and the action was 2
the reward at stage 505 was 0.0 and the action was 1
the reward at stage 506 was 0.0 and the action was 4
the reward at stage 507 was 0.0 and the action was 1
the reward at stage 508 was 0.0 and the action was 5
the reward at stage 509 was 0.0 and the action was 4
the reward at stage 510 was 0.0 and the action was 4
the reward at stage 511 was 0.0 and the actio

the reward at stage 674 was 0.0 and the action was 1
the reward at stage 675 was 0.0 and the action was 0
the reward at stage 676 was 0.0 and the action was 0
the reward at stage 677 was 0.0 and the action was 2
the reward at stage 678 was 0.0 and the action was 2
the reward at stage 679 was 0.0 and the action was 1
the reward at stage 680 was 0.0 and the action was 2
the reward at stage 681 was 0.0 and the action was 2
the reward at stage 682 was 0.0 and the action was 4
the reward at stage 683 was 0.0 and the action was 2
the reward at stage 684 was 0.0 and the action was 2
the reward at stage 685 was 0.0 and the action was 4
the reward at stage 686 was 0.0 and the action was 2
the reward at stage 687 was 0.0 and the action was 4
the reward at stage 688 was 0.0 and the action was 4
the reward at stage 689 was 0.0 and the action was 2
the reward at stage 690 was 0.0 and the action was 5
the reward at stage 691 was 0.0 and the action was 0
the reward at stage 692 was 0.0 and the action

the reward at stage 830 was 0.0 and the action was 0
the reward at stage 831 was 0.0 and the action was 1
the reward at stage 832 was 0.0 and the action was 4
the reward at stage 833 was 0.0 and the action was 5
the reward at stage 834 was 0.0 and the action was 1
the reward at stage 835 was 0.0 and the action was 0
the reward at stage 836 was 0.0 and the action was 4
the reward at stage 837 was 0.0 and the action was 1
the reward at stage 838 was 0.0 and the action was 0
the reward at stage 839 was 0.0 and the action was 2
the reward at stage 840 was 0.0 and the action was 4
the reward at stage 841 was 0.0 and the action was 1
the reward at stage 842 was 0.0 and the action was 0
the reward at stage 843 was 0.0 and the action was 4
the reward at stage 844 was 0.0 and the action was 5
the reward at stage 845 was 0.0 and the action was 2
the reward at stage 846 was 0.0 and the action was 2
the reward at stage 847 was 0.0 and the action was 2
the reward at stage 848 was 0.0 and the action

the reward at stage 988 was 0.0 and the action was 2
the reward at stage 989 was 0.0 and the action was 5
the reward at stage 990 was 0.0 and the action was 0
the reward at stage 991 was 0.0 and the action was 1
the reward at stage 992 was 0.0 and the action was 4
the reward at stage 993 was 0.0 and the action was 3
the reward at stage 994 was 0.0 and the action was 1
the reward at stage 995 was 0.0 and the action was 2
the reward at stage 996 was 0.0 and the action was 1
the reward at stage 997 was 0.0 and the action was 5
the reward at stage 998 was 0.0 and the action was 3
the reward at stage 999 was 0.0 and the action was 2
the reward at stage 1000 was 0.0 and the action was 0
the reward at stage 1001 was 0.0 and the action was 0
the reward at stage 1002 was 0.0 and the action was 0
the reward at stage 1003 was 0.0 and the action was 3
the reward at stage 1004 was 0.0 and the action was 2
the reward at stage 1005 was 0.0 and the action was 2
the reward at stage 1006 was 0.0 and the

the reward at stage 1147 was 0.0 and the action was 1
the reward at stage 1148 was 0.0 and the action was 2
the reward at stage 1149 was 0.0 and the action was 1
the reward at stage 1150 was 0.0 and the action was 1
the reward at stage 1151 was 0.0 and the action was 5
the reward at stage 1152 was 0.0 and the action was 2
the reward at stage 1153 was 0.0 and the action was 1
the reward at stage 1154 was 0.0 and the action was 2
the reward at stage 1155 was 0.0 and the action was 4
the reward at stage 1156 was 0.0 and the action was 2
the reward at stage 1157 was 0.0 and the action was 1
the reward at stage 1158 was 0.0 and the action was 3
the reward at stage 1159 was 0.0 and the action was 1
the reward at stage 1160 was 0.0 and the action was 5
the reward at stage 1161 was 0.0 and the action was 0
the reward at stage 1162 was 0.0 and the action was 5
the reward at stage 1163 was 0.0 and the action was 1
the reward at stage 1164 was 0.0 and the action was 2
the reward at stage 1165 was

the reward at stage 1305 was 0.0 and the action was 1
the reward at stage 1306 was 0.0 and the action was 1
the reward at stage 1307 was 0.0 and the action was 2
the reward at stage 1308 was 0.0 and the action was 3
the reward at stage 1309 was 0.0 and the action was 4
the reward at stage 1310 was 0.0 and the action was 1
the reward at stage 1311 was 0.0 and the action was 0
the reward at stage 1312 was 0.0 and the action was 5
the reward at stage 1313 was 0.0 and the action was 5
the reward at stage 1314 was 0.0 and the action was 1
the reward at stage 1315 was 0.0 and the action was 5
the reward at stage 1316 was 0.0 and the action was 5
the reward at stage 1317 was 0.0 and the action was 0
the reward at stage 1318 was 0.0 and the action was 1
the reward at stage 1319 was 0.0 and the action was 5
the reward at stage 1320 was 0.0 and the action was 2
the reward at stage 1321 was 0.0 and the action was 0
the reward at stage 1322 was 0.0 and the action was 3
the reward at stage 1323 was

the reward at stage 71 was 0.0 and the action was 1
the reward at stage 72 was 0.0 and the action was 4
the reward at stage 73 was 0.0 and the action was 5
the reward at stage 74 was 0.0 and the action was 4
the reward at stage 75 was 0.0 and the action was 1
the reward at stage 76 was 0.0 and the action was 0
the reward at stage 77 was 0.0 and the action was 2
the reward at stage 78 was 0.0 and the action was 1
the reward at stage 79 was 0.0 and the action was 1
the reward at stage 80 was 0.0 and the action was 3
the reward at stage 81 was 0.0 and the action was 1
the reward at stage 82 was 0.0 and the action was 5
the reward at stage 83 was 0.0 and the action was 3
the reward at stage 84 was 0.0 and the action was 1
the reward at stage 85 was -1.0 and the action was 0
the reward at stage 86 was 0.0 and the action was 3
the reward at stage 87 was 0.0 and the action was 4
the reward at stage 88 was 0.0 and the action was 1
the reward at stage 89 was 0.0 and the action was 0
the reward 

the reward at stage 229 was 0.0 and the action was 2
the reward at stage 230 was 0.0 and the action was 5
the reward at stage 231 was 0.0 and the action was 4
the reward at stage 232 was 0.0 and the action was 0
the reward at stage 233 was 0.0 and the action was 1
the reward at stage 234 was 0.0 and the action was 5
the reward at stage 235 was 0.0 and the action was 0
the reward at stage 236 was 0.0 and the action was 2
the reward at stage 237 was 0.0 and the action was 3
the reward at stage 238 was 0.0 and the action was 1
the reward at stage 239 was 0.0 and the action was 4
the reward at stage 240 was 0.0 and the action was 5
the reward at stage 241 was 0.0 and the action was 4
the reward at stage 242 was 0.0 and the action was 4
the reward at stage 243 was 0.0 and the action was 0
the reward at stage 244 was 0.0 and the action was 3
the reward at stage 245 was 0.0 and the action was 2
the reward at stage 246 was 0.0 and the action was 0
the reward at stage 247 was 0.0 and the action

the reward at stage 385 was 0.0 and the action was 0
the reward at stage 386 was 0.0 and the action was 3
the reward at stage 387 was 0.0 and the action was 2
the reward at stage 388 was 0.0 and the action was 4
the reward at stage 389 was 0.0 and the action was 0
the reward at stage 390 was 0.0 and the action was 3
the reward at stage 391 was 0.0 and the action was 3
the reward at stage 392 was 0.0 and the action was 5
the reward at stage 393 was 0.0 and the action was 2
the reward at stage 394 was 0.0 and the action was 3
the reward at stage 395 was 0.0 and the action was 1
the reward at stage 396 was 0.0 and the action was 2
the reward at stage 397 was 0.0 and the action was 1
the reward at stage 398 was 0.0 and the action was 3
the reward at stage 399 was 0.0 and the action was 3
the reward at stage 400 was 0.0 and the action was 1
the reward at stage 401 was 0.0 and the action was 4
the reward at stage 402 was 0.0 and the action was 2
the reward at stage 403 was 0.0 and the action

the reward at stage 548 was 0.0 and the action was 2
the reward at stage 549 was 0.0 and the action was 3
the reward at stage 550 was 0.0 and the action was 0
the reward at stage 551 was 0.0 and the action was 3
the reward at stage 552 was 0.0 and the action was 2
the reward at stage 553 was 0.0 and the action was 2
the reward at stage 554 was -1.0 and the action was 0
the reward at stage 555 was 0.0 and the action was 5
the reward at stage 556 was 0.0 and the action was 2
the reward at stage 557 was 0.0 and the action was 5
the reward at stage 558 was 0.0 and the action was 0
the reward at stage 559 was 0.0 and the action was 0
the reward at stage 560 was 0.0 and the action was 2
the reward at stage 561 was 0.0 and the action was 2
the reward at stage 562 was 0.0 and the action was 5
the reward at stage 563 was 0.0 and the action was 3
the reward at stage 564 was 0.0 and the action was 5
the reward at stage 565 was 0.0 and the action was 5
the reward at stage 566 was 0.0 and the actio

the reward at stage 713 was 0.0 and the action was 0
the reward at stage 714 was 0.0 and the action was 3
the reward at stage 715 was 0.0 and the action was 4
the reward at stage 716 was 0.0 and the action was 2
the reward at stage 717 was 0.0 and the action was 2
the reward at stage 718 was 0.0 and the action was 4
the reward at stage 719 was 0.0 and the action was 1
the reward at stage 720 was 0.0 and the action was 5
the reward at stage 721 was 0.0 and the action was 4
the reward at stage 722 was 0.0 and the action was 4
the reward at stage 723 was 0.0 and the action was 2
the reward at stage 724 was 0.0 and the action was 4
the reward at stage 725 was 0.0 and the action was 4
the reward at stage 726 was 0.0 and the action was 0
the reward at stage 727 was 0.0 and the action was 0
the reward at stage 728 was 0.0 and the action was 4
the reward at stage 729 was 0.0 and the action was 5
the reward at stage 730 was 0.0 and the action was 5
the reward at stage 731 was 0.0 and the action

the reward at stage 884 was 0.0 and the action was 1
the reward at stage 885 was 0.0 and the action was 3
the reward at stage 886 was 0.0 and the action was 3
the reward at stage 887 was 0.0 and the action was 2
the reward at stage 888 was 0.0 and the action was 2
the reward at stage 889 was 0.0 and the action was 5
the reward at stage 890 was 0.0 and the action was 2
the reward at stage 891 was 0.0 and the action was 2
the reward at stage 892 was 0.0 and the action was 2
the reward at stage 893 was 0.0 and the action was 3
the reward at stage 894 was 0.0 and the action was 1
the reward at stage 895 was 0.0 and the action was 3
the reward at stage 896 was 0.0 and the action was 1
the reward at stage 897 was 0.0 and the action was 3
the reward at stage 898 was 0.0 and the action was 4
the reward at stage 899 was 0.0 and the action was 4
the reward at stage 900 was 0.0 and the action was 5
the reward at stage 901 was 0.0 and the action was 3
the reward at stage 902 was 0.0 and the action

the reward at stage 1044 was 0.0 and the action was 5
the reward at stage 1045 was 0.0 and the action was 2
the reward at stage 1046 was 0.0 and the action was 4
the reward at stage 1047 was 0.0 and the action was 1
the reward at stage 1048 was 0.0 and the action was 2
the reward at stage 1049 was 0.0 and the action was 2
the reward at stage 1050 was 0.0 and the action was 3
the reward at stage 1051 was 0.0 and the action was 2
the reward at stage 1052 was 0.0 and the action was 3
the reward at stage 1053 was 0.0 and the action was 2
the reward at stage 1054 was 0.0 and the action was 3
the reward at stage 1055 was 0.0 and the action was 3
the reward at stage 1056 was 0.0 and the action was 3
the reward at stage 1057 was 0.0 and the action was 2
the reward at stage 1058 was 0.0 and the action was 3
the reward at stage 1059 was 0.0 and the action was 0
the reward at stage 1060 was 0.0 and the action was 4
the reward at stage 1061 was 0.0 and the action was 3
the reward at stage 1062 was

the reward at stage 130 was 0.0 and the action was 5
the reward at stage 131 was 0.0 and the action was 4
the reward at stage 132 was 0.0 and the action was 4
the reward at stage 133 was -1.0 and the action was 1
the reward at stage 134 was 0.0 and the action was 0
the reward at stage 135 was 0.0 and the action was 0
the reward at stage 136 was 0.0 and the action was 0
the reward at stage 137 was 0.0 and the action was 0
the reward at stage 138 was 0.0 and the action was 5
the reward at stage 139 was 0.0 and the action was 5
the reward at stage 140 was 0.0 and the action was 3
the reward at stage 141 was 0.0 and the action was 3
the reward at stage 142 was 0.0 and the action was 1
the reward at stage 143 was 0.0 and the action was 2
the reward at stage 144 was 0.0 and the action was 5
the reward at stage 145 was 0.0 and the action was 5
the reward at stage 146 was 0.0 and the action was 5
the reward at stage 147 was 0.0 and the action was 3
the reward at stage 148 was 0.0 and the actio

the reward at stage 294 was 0.0 and the action was 0
the reward at stage 295 was 0.0 and the action was 4
the reward at stage 296 was 0.0 and the action was 1
the reward at stage 297 was 0.0 and the action was 1
the reward at stage 298 was 0.0 and the action was 5
the reward at stage 299 was 0.0 and the action was 4
the reward at stage 300 was 0.0 and the action was 2
the reward at stage 301 was 0.0 and the action was 2
the reward at stage 302 was 0.0 and the action was 2
the reward at stage 303 was 0.0 and the action was 5
the reward at stage 304 was -1.0 and the action was 5
the reward at stage 305 was 0.0 and the action was 4
the reward at stage 306 was 0.0 and the action was 1
the reward at stage 307 was 0.0 and the action was 3
the reward at stage 308 was 0.0 and the action was 2
the reward at stage 309 was 0.0 and the action was 4
the reward at stage 310 was 0.0 and the action was 2
the reward at stage 311 was 0.0 and the action was 1
the reward at stage 312 was 0.0 and the actio

the reward at stage 461 was 0.0 and the action was 3
the reward at stage 462 was 0.0 and the action was 4
the reward at stage 463 was 0.0 and the action was 2
the reward at stage 464 was 0.0 and the action was 1
the reward at stage 465 was 0.0 and the action was 4
the reward at stage 466 was 0.0 and the action was 3
the reward at stage 467 was 0.0 and the action was 3
the reward at stage 468 was 0.0 and the action was 4
the reward at stage 469 was 0.0 and the action was 0
the reward at stage 470 was 0.0 and the action was 3
the reward at stage 471 was 0.0 and the action was 3
the reward at stage 472 was 0.0 and the action was 1
the reward at stage 473 was 0.0 and the action was 2
the reward at stage 474 was 0.0 and the action was 3
the reward at stage 475 was 0.0 and the action was 4
the reward at stage 476 was 0.0 and the action was 3
the reward at stage 477 was 0.0 and the action was 4
the reward at stage 478 was 0.0 and the action was 2
the reward at stage 479 was 0.0 and the action

the reward at stage 641 was 0.0 and the action was 5
the reward at stage 642 was 0.0 and the action was 2
the reward at stage 643 was 0.0 and the action was 4
the reward at stage 644 was 0.0 and the action was 5
the reward at stage 645 was 0.0 and the action was 4
the reward at stage 646 was 0.0 and the action was 0
the reward at stage 647 was 0.0 and the action was 3
the reward at stage 648 was 0.0 and the action was 5
the reward at stage 649 was 0.0 and the action was 3
the reward at stage 650 was 0.0 and the action was 0
the reward at stage 651 was 0.0 and the action was 2
the reward at stage 652 was 0.0 and the action was 3
the reward at stage 653 was 0.0 and the action was 5
the reward at stage 654 was 0.0 and the action was 3
the reward at stage 655 was 0.0 and the action was 2
the reward at stage 656 was 0.0 and the action was 0
the reward at stage 657 was 0.0 and the action was 3
the reward at stage 658 was 0.0 and the action was 3
the reward at stage 659 was 0.0 and the action

the reward at stage 807 was 0.0 and the action was 1
the reward at stage 808 was 0.0 and the action was 3
the reward at stage 809 was 0.0 and the action was 2
the reward at stage 810 was 0.0 and the action was 0
the reward at stage 811 was 0.0 and the action was 2
the reward at stage 812 was 0.0 and the action was 1
the reward at stage 813 was -1.0 and the action was 2
the reward at stage 814 was 0.0 and the action was 2
the reward at stage 815 was 0.0 and the action was 2
the reward at stage 816 was 0.0 and the action was 4
the reward at stage 817 was 0.0 and the action was 5
the reward at stage 818 was 0.0 and the action was 0
the reward at stage 819 was 0.0 and the action was 5
the reward at stage 820 was 0.0 and the action was 0
the reward at stage 821 was 0.0 and the action was 2
the reward at stage 822 was 0.0 and the action was 3
the reward at stage 823 was 0.0 and the action was 3
the reward at stage 824 was 0.0 and the action was 2
the reward at stage 825 was 0.0 and the actio

the reward at stage 978 was 0.0 and the action was 3
the reward at stage 979 was 0.0 and the action was 3
the reward at stage 980 was 0.0 and the action was 0
the reward at stage 981 was 0.0 and the action was 3
the reward at stage 982 was 0.0 and the action was 2
the reward at stage 983 was 0.0 and the action was 4
the reward at stage 984 was 0.0 and the action was 2
the reward at stage 985 was 0.0 and the action was 5
the reward at stage 986 was 0.0 and the action was 0
the reward at stage 987 was 0.0 and the action was 1
the reward at stage 988 was 0.0 and the action was 1
the reward at stage 989 was 0.0 and the action was 3
the reward at stage 990 was 0.0 and the action was 5
the reward at stage 991 was 0.0 and the action was 3
the reward at stage 992 was 0.0 and the action was 3
the reward at stage 993 was 0.0 and the action was 0
the reward at stage 994 was 0.0 and the action was 1
the reward at stage 995 was 0.0 and the action was 2
the reward at stage 996 was 0.0 and the action

the reward at stage 9 was 0.0 and the action was 3
the reward at stage 10 was 0.0 and the action was 5
the reward at stage 11 was 0.0 and the action was 4
the reward at stage 12 was 0.0 and the action was 4
the reward at stage 13 was 0.0 and the action was 4
the reward at stage 14 was 0.0 and the action was 0
the reward at stage 15 was 0.0 and the action was 1
the reward at stage 16 was 0.0 and the action was 3
the reward at stage 17 was 0.0 and the action was 0
the reward at stage 18 was 0.0 and the action was 1
the reward at stage 19 was 0.0 and the action was 4
the reward at stage 20 was 0.0 and the action was 4
the reward at stage 21 was 0.0 and the action was 4
the reward at stage 22 was 0.0 and the action was 2
the reward at stage 23 was 0.0 and the action was 3
the reward at stage 24 was 0.0 and the action was 4
the reward at stage 25 was 0.0 and the action was 0
the reward at stage 26 was 0.0 and the action was 1
the reward at stage 27 was 0.0 and the action was 2
the reward at

the reward at stage 191 was 0.0 and the action was 2
the reward at stage 192 was 0.0 and the action was 4
the reward at stage 193 was 0.0 and the action was 3
the reward at stage 194 was 0.0 and the action was 1
the reward at stage 195 was 0.0 and the action was 3
the reward at stage 196 was 0.0 and the action was 0
the reward at stage 197 was 0.0 and the action was 1
the reward at stage 198 was 0.0 and the action was 4
the reward at stage 199 was 0.0 and the action was 2
the reward at stage 200 was 0.0 and the action was 5
the reward at stage 201 was 0.0 and the action was 1
the reward at stage 202 was 0.0 and the action was 4
the reward at stage 203 was 0.0 and the action was 5
the reward at stage 204 was 0.0 and the action was 2
the reward at stage 205 was 0.0 and the action was 1
the reward at stage 206 was 0.0 and the action was 0
the reward at stage 207 was 0.0 and the action was 3
the reward at stage 208 was 0.0 and the action was 4
the reward at stage 209 was 0.0 and the action

the reward at stage 357 was 0.0 and the action was 0
the reward at stage 358 was 0.0 and the action was 5
the reward at stage 359 was 0.0 and the action was 0
the reward at stage 360 was 0.0 and the action was 1
the reward at stage 361 was 0.0 and the action was 4
the reward at stage 362 was 0.0 and the action was 3
the reward at stage 363 was 0.0 and the action was 4
the reward at stage 364 was -1.0 and the action was 2
the reward at stage 365 was 0.0 and the action was 2
the reward at stage 366 was 0.0 and the action was 4
the reward at stage 367 was 0.0 and the action was 4
the reward at stage 368 was 0.0 and the action was 5
the reward at stage 369 was 0.0 and the action was 2
the reward at stage 370 was 0.0 and the action was 4
the reward at stage 371 was 0.0 and the action was 3
the reward at stage 372 was 0.0 and the action was 5
the reward at stage 373 was 0.0 and the action was 1
the reward at stage 374 was 0.0 and the action was 5
the reward at stage 375 was 0.0 and the actio

the reward at stage 522 was 0.0 and the action was 5
the reward at stage 523 was 0.0 and the action was 0
the reward at stage 524 was 0.0 and the action was 3
the reward at stage 525 was 0.0 and the action was 2
the reward at stage 526 was 0.0 and the action was 1
the reward at stage 527 was 0.0 and the action was 3
the reward at stage 528 was 0.0 and the action was 3
the reward at stage 529 was 0.0 and the action was 4
the reward at stage 530 was 0.0 and the action was 5
the reward at stage 531 was 0.0 and the action was 5
the reward at stage 532 was 0.0 and the action was 2
the reward at stage 533 was 0.0 and the action was 2
the reward at stage 534 was 0.0 and the action was 5
the reward at stage 535 was 0.0 and the action was 0
the reward at stage 536 was 0.0 and the action was 3
the reward at stage 537 was 0.0 and the action was 0
the reward at stage 538 was 0.0 and the action was 0
the reward at stage 539 was 0.0 and the action was 1
the reward at stage 540 was 0.0 and the action

the reward at stage 678 was 0.0 and the action was 3
the reward at stage 679 was 0.0 and the action was 1
the reward at stage 680 was 0.0 and the action was 4
the reward at stage 681 was 0.0 and the action was 4
the reward at stage 682 was 0.0 and the action was 0
the reward at stage 683 was 0.0 and the action was 1
the reward at stage 684 was 0.0 and the action was 1
the reward at stage 685 was 0.0 and the action was 3
the reward at stage 686 was 0.0 and the action was 4
the reward at stage 687 was 0.0 and the action was 3
the reward at stage 688 was 0.0 and the action was 4
the reward at stage 689 was 0.0 and the action was 0
the reward at stage 690 was 0.0 and the action was 4
the reward at stage 691 was 0.0 and the action was 0
the reward at stage 692 was 0.0 and the action was 2
the reward at stage 693 was 0.0 and the action was 3
the reward at stage 694 was 0.0 and the action was 0
the reward at stage 695 was 0.0 and the action was 5
the reward at stage 696 was 0.0 and the action

the reward at stage 850 was 0.0 and the action was 2
the reward at stage 851 was 0.0 and the action was 3
the reward at stage 852 was 0.0 and the action was 0
the reward at stage 853 was 0.0 and the action was 0
the reward at stage 854 was 0.0 and the action was 2
the reward at stage 855 was 0.0 and the action was 3
the reward at stage 856 was 0.0 and the action was 5
the reward at stage 857 was 0.0 and the action was 1
the reward at stage 858 was 0.0 and the action was 4
the reward at stage 859 was 0.0 and the action was 5
the reward at stage 860 was 0.0 and the action was 1
the reward at stage 861 was 0.0 and the action was 1
the reward at stage 862 was 0.0 and the action was 0
the reward at stage 863 was 0.0 and the action was 0
the reward at stage 864 was 0.0 and the action was 2
the reward at stage 865 was 0.0 and the action was 3
the reward at stage 866 was 0.0 and the action was 3
the reward at stage 867 was 0.0 and the action was 4
the reward at stage 868 was 0.0 and the action

the reward at stage 1023 was 0.0 and the action was 2
the reward at stage 1024 was 0.0 and the action was 4
the reward at stage 1025 was 0.0 and the action was 3
the reward at stage 1026 was 0.0 and the action was 1
the reward at stage 1027 was 0.0 and the action was 1
the reward at stage 1028 was 0.0 and the action was 1
the reward at stage 1029 was 0.0 and the action was 5
the reward at stage 1030 was 0.0 and the action was 1
the reward at stage 1031 was 0.0 and the action was 3
the reward at stage 1032 was 0.0 and the action was 4
the reward at stage 1033 was 0.0 and the action was 0
the reward at stage 1034 was -1.0 and the action was 0
the reward at stage 1035 was 0.0 and the action was 4
the reward at stage 1036 was 0.0 and the action was 3
the reward at stage 1037 was 0.0 and the action was 3
the reward at stage 1038 was 0.0 and the action was 2
the reward at stage 1039 was 0.0 and the action was 4
the reward at stage 1040 was 0.0 and the action was 0
the reward at stage 1041 wa

the reward at stage 53 was 0.0 and the action was 5
the reward at stage 54 was 0.0 and the action was 0
the reward at stage 55 was 0.0 and the action was 3
the reward at stage 56 was 0.0 and the action was 2
the reward at stage 57 was 0.0 and the action was 2
the reward at stage 58 was 0.0 and the action was 0
the reward at stage 59 was 0.0 and the action was 5
the reward at stage 60 was 0.0 and the action was 3
the reward at stage 61 was 0.0 and the action was 0
the reward at stage 62 was 0.0 and the action was 4
the reward at stage 63 was 0.0 and the action was 2
the reward at stage 64 was 0.0 and the action was 5
the reward at stage 65 was 0.0 and the action was 0
the reward at stage 66 was 0.0 and the action was 0
the reward at stage 67 was 0.0 and the action was 0
the reward at stage 68 was 0.0 and the action was 3
the reward at stage 69 was 0.0 and the action was 1
the reward at stage 70 was 0.0 and the action was 2
the reward at stage 71 was 0.0 and the action was 5
the reward a

the reward at stage 210 was 0.0 and the action was 2
the reward at stage 211 was 0.0 and the action was 0
the reward at stage 212 was 0.0 and the action was 4
the reward at stage 213 was 0.0 and the action was 1
the reward at stage 214 was 0.0 and the action was 1
the reward at stage 215 was 0.0 and the action was 1
the reward at stage 216 was 0.0 and the action was 5
the reward at stage 217 was 0.0 and the action was 0
the reward at stage 218 was 0.0 and the action was 3
the reward at stage 219 was 0.0 and the action was 0
the reward at stage 220 was 0.0 and the action was 2
the reward at stage 221 was 0.0 and the action was 0
the reward at stage 222 was 0.0 and the action was 3
the reward at stage 223 was 0.0 and the action was 4
the reward at stage 224 was 0.0 and the action was 1
the reward at stage 225 was 0.0 and the action was 0
the reward at stage 226 was 0.0 and the action was 5
the reward at stage 227 was 0.0 and the action was 3
the reward at stage 228 was -1.0 and the actio

the reward at stage 390 was 0.0 and the action was 3
the reward at stage 391 was 0.0 and the action was 0
the reward at stage 392 was 0.0 and the action was 2
the reward at stage 393 was 0.0 and the action was 3
the reward at stage 394 was 0.0 and the action was 3
the reward at stage 395 was 0.0 and the action was 4
the reward at stage 396 was 0.0 and the action was 3
the reward at stage 397 was 0.0 and the action was 2
the reward at stage 398 was 0.0 and the action was 1
the reward at stage 399 was 0.0 and the action was 1
the reward at stage 400 was 0.0 and the action was 3
the reward at stage 401 was 0.0 and the action was 3
the reward at stage 402 was 0.0 and the action was 4
the reward at stage 403 was -1.0 and the action was 2
the reward at stage 404 was 0.0 and the action was 5
the reward at stage 405 was 0.0 and the action was 4
the reward at stage 406 was 0.0 and the action was 1
the reward at stage 407 was 0.0 and the action was 0
the reward at stage 408 was 0.0 and the actio

the reward at stage 563 was 0.0 and the action was 0
the reward at stage 564 was 0.0 and the action was 4
the reward at stage 565 was 0.0 and the action was 2
the reward at stage 566 was 0.0 and the action was 4
the reward at stage 567 was 0.0 and the action was 1
the reward at stage 568 was 0.0 and the action was 4
the reward at stage 569 was 0.0 and the action was 4
the reward at stage 570 was 0.0 and the action was 4
the reward at stage 571 was 0.0 and the action was 2
the reward at stage 572 was 0.0 and the action was 0
the reward at stage 573 was 0.0 and the action was 1
the reward at stage 574 was 0.0 and the action was 4
the reward at stage 575 was 0.0 and the action was 2
the reward at stage 576 was 0.0 and the action was 4
the reward at stage 577 was 0.0 and the action was 2
the reward at stage 578 was 0.0 and the action was 4
the reward at stage 579 was 0.0 and the action was 4
the reward at stage 580 was 0.0 and the action was 1
the reward at stage 581 was 0.0 and the action

the reward at stage 741 was 0.0 and the action was 4
the reward at stage 742 was 0.0 and the action was 2
the reward at stage 743 was 0.0 and the action was 0
the reward at stage 744 was 0.0 and the action was 5
the reward at stage 745 was 0.0 and the action was 3
the reward at stage 746 was 0.0 and the action was 2
the reward at stage 747 was 0.0 and the action was 2
the reward at stage 748 was 0.0 and the action was 3
the reward at stage 749 was 0.0 and the action was 5
the reward at stage 750 was 0.0 and the action was 5
the reward at stage 751 was 0.0 and the action was 1
the reward at stage 752 was 0.0 and the action was 1
the reward at stage 753 was 0.0 and the action was 4
the reward at stage 754 was 0.0 and the action was 5
the reward at stage 755 was 0.0 and the action was 5
the reward at stage 756 was 0.0 and the action was 2
the reward at stage 757 was 0.0 and the action was 0
the reward at stage 758 was 0.0 and the action was 5
the reward at stage 759 was 0.0 and the action

the reward at stage 901 was -1.0 and the action was 0
the reward at stage 902 was 0.0 and the action was 2
the reward at stage 903 was 0.0 and the action was 5
the reward at stage 904 was 0.0 and the action was 5
the reward at stage 905 was 0.0 and the action was 5
the reward at stage 906 was 0.0 and the action was 2
the reward at stage 907 was 0.0 and the action was 0
the reward at stage 908 was 0.0 and the action was 1
the reward at stage 909 was 0.0 and the action was 2
the reward at stage 910 was 0.0 and the action was 1
the reward at stage 911 was 0.0 and the action was 3
the reward at stage 912 was 0.0 and the action was 0
the reward at stage 913 was 0.0 and the action was 1
the reward at stage 914 was 0.0 and the action was 2
the reward at stage 915 was 0.0 and the action was 3
the reward at stage 916 was 0.0 and the action was 5
the reward at stage 917 was 0.0 and the action was 5
the reward at stage 918 was 0.0 and the action was 3
the reward at stage 919 was 0.0 and the actio

the reward at stage 1057 was 0.0 and the action was 4
the reward at stage 1058 was 0.0 and the action was 2
the reward at stage 1059 was 0.0 and the action was 1
the reward at stage 1060 was 0.0 and the action was 2
the reward at stage 1061 was 0.0 and the action was 5
the reward at stage 1062 was 0.0 and the action was 1
the reward at stage 1063 was 0.0 and the action was 5
the reward at stage 1064 was 0.0 and the action was 3
the reward at stage 1065 was 0.0 and the action was 1
the reward at stage 1066 was 0.0 and the action was 3
the reward at stage 1067 was 0.0 and the action was 1
the reward at stage 1068 was 0.0 and the action was 2
the reward at stage 1069 was 0.0 and the action was 3
the reward at stage 1070 was 0.0 and the action was 1
the reward at stage 1071 was 0.0 and the action was 3
the reward at stage 1072 was 0.0 and the action was 1
the reward at stage 1073 was 0.0 and the action was 4
the reward at stage 1074 was 0.0 and the action was 1
the reward at stage 1075 was

the reward at stage 1217 was 0.0 and the action was 4
the reward at stage 1218 was 0.0 and the action was 2
the reward at stage 1219 was 0.0 and the action was 4
the reward at stage 1220 was 0.0 and the action was 4
the reward at stage 1221 was 0.0 and the action was 5
the reward at stage 1222 was 0.0 and the action was 5
the reward at stage 1223 was 0.0 and the action was 5
the reward at stage 1224 was 0.0 and the action was 1
the reward at stage 1225 was 0.0 and the action was 5
the reward at stage 1226 was 0.0 and the action was 5
the reward at stage 1227 was 0.0 and the action was 2
the reward at stage 1228 was 0.0 and the action was 3
the reward at stage 1229 was 0.0 and the action was 0
the reward at stage 1230 was 0.0 and the action was 2
the reward at stage 1231 was 0.0 and the action was 4
the reward at stage 1232 was 0.0 and the action was 2
the reward at stage 1233 was 0.0 and the action was 3
the reward at stage 1234 was 0.0 and the action was 2
the reward at stage 1235 was

the reward at stage 12 was 0.0 and the action was 4
the reward at stage 13 was 0.0 and the action was 1
the reward at stage 14 was 0.0 and the action was 4
the reward at stage 15 was 0.0 and the action was 4
the reward at stage 16 was 0.0 and the action was 1
the reward at stage 17 was 0.0 and the action was 0
the reward at stage 18 was 0.0 and the action was 4
the reward at stage 19 was 0.0 and the action was 0
the reward at stage 20 was 0.0 and the action was 1
the reward at stage 21 was 0.0 and the action was 3
the reward at stage 22 was 0.0 and the action was 1
the reward at stage 23 was 0.0 and the action was 2
the reward at stage 24 was 0.0 and the action was 3
the reward at stage 25 was 0.0 and the action was 4
the reward at stage 26 was 0.0 and the action was 3
the reward at stage 27 was 0.0 and the action was 2
the reward at stage 28 was 0.0 and the action was 0
the reward at stage 29 was 0.0 and the action was 2
the reward at stage 30 was 0.0 and the action was 2
the reward a

the reward at stage 174 was 0.0 and the action was 3
the reward at stage 175 was 0.0 and the action was 2
the reward at stage 176 was 0.0 and the action was 2
the reward at stage 177 was 0.0 and the action was 4
the reward at stage 178 was 0.0 and the action was 5
the reward at stage 179 was -1.0 and the action was 2
the reward at stage 180 was 0.0 and the action was 3
the reward at stage 181 was 0.0 and the action was 3
the reward at stage 182 was 0.0 and the action was 2
the reward at stage 183 was 0.0 and the action was 3
the reward at stage 184 was 0.0 and the action was 1
the reward at stage 185 was 0.0 and the action was 3
the reward at stage 186 was 0.0 and the action was 4
the reward at stage 187 was 0.0 and the action was 1
the reward at stage 188 was 0.0 and the action was 5
the reward at stage 189 was 0.0 and the action was 1
the reward at stage 190 was 0.0 and the action was 5
the reward at stage 191 was 0.0 and the action was 5
the reward at stage 192 was 0.0 and the actio

the reward at stage 341 was 0.0 and the action was 0
the reward at stage 342 was 0.0 and the action was 2
the reward at stage 343 was 0.0 and the action was 4
the reward at stage 344 was 0.0 and the action was 2
the reward at stage 345 was 0.0 and the action was 5
the reward at stage 346 was 0.0 and the action was 1
the reward at stage 347 was 0.0 and the action was 1
the reward at stage 348 was 0.0 and the action was 3
the reward at stage 349 was 0.0 and the action was 4
the reward at stage 350 was 0.0 and the action was 5
the reward at stage 351 was 0.0 and the action was 3
the reward at stage 352 was 0.0 and the action was 1
the reward at stage 353 was 0.0 and the action was 1
the reward at stage 354 was 0.0 and the action was 4
the reward at stage 355 was 0.0 and the action was 2
the reward at stage 356 was 0.0 and the action was 2
the reward at stage 357 was 0.0 and the action was 4
the reward at stage 358 was 0.0 and the action was 0
the reward at stage 359 was 0.0 and the action

the reward at stage 502 was 0.0 and the action was 1
the reward at stage 503 was 0.0 and the action was 5
the reward at stage 504 was 0.0 and the action was 3
the reward at stage 505 was 0.0 and the action was 1
the reward at stage 506 was 0.0 and the action was 1
the reward at stage 507 was 0.0 and the action was 3
the reward at stage 508 was 0.0 and the action was 4
the reward at stage 509 was 0.0 and the action was 2
the reward at stage 510 was 0.0 and the action was 2
the reward at stage 511 was 0.0 and the action was 5
the reward at stage 512 was 0.0 and the action was 4
the reward at stage 513 was 0.0 and the action was 5
the reward at stage 514 was 0.0 and the action was 1
the reward at stage 515 was 0.0 and the action was 5
the reward at stage 516 was 0.0 and the action was 1
the reward at stage 517 was 0.0 and the action was 2
the reward at stage 518 was 0.0 and the action was 2
the reward at stage 519 was 0.0 and the action was 0
the reward at stage 520 was 0.0 and the action

the reward at stage 659 was 0.0 and the action was 5
the reward at stage 660 was 0.0 and the action was 4
the reward at stage 661 was 0.0 and the action was 1
the reward at stage 662 was 0.0 and the action was 4
the reward at stage 663 was 0.0 and the action was 3
the reward at stage 664 was 0.0 and the action was 5
the reward at stage 665 was 0.0 and the action was 5
the reward at stage 666 was 0.0 and the action was 1
the reward at stage 667 was 0.0 and the action was 5
the reward at stage 668 was 0.0 and the action was 3
the reward at stage 669 was 0.0 and the action was 2
the reward at stage 670 was 0.0 and the action was 0
the reward at stage 671 was 0.0 and the action was 2
the reward at stage 672 was 0.0 and the action was 3
the reward at stage 673 was 0.0 and the action was 4
the reward at stage 674 was 0.0 and the action was 4
the reward at stage 675 was 0.0 and the action was 5
the reward at stage 676 was 0.0 and the action was 5
the reward at stage 677 was 0.0 and the action

the reward at stage 837 was 0.0 and the action was 0
the reward at stage 838 was 0.0 and the action was 2
the reward at stage 839 was 0.0 and the action was 3
the reward at stage 840 was 0.0 and the action was 2
the reward at stage 841 was 0.0 and the action was 3
the reward at stage 842 was 0.0 and the action was 5
the reward at stage 843 was 0.0 and the action was 1
the reward at stage 844 was 0.0 and the action was 0
the reward at stage 845 was 0.0 and the action was 3
the reward at stage 846 was 0.0 and the action was 5
the reward at stage 847 was 0.0 and the action was 3
the reward at stage 848 was 0.0 and the action was 2
the reward at stage 849 was 0.0 and the action was 3
the reward at stage 850 was 0.0 and the action was 5
the reward at stage 851 was 0.0 and the action was 5
the reward at stage 852 was 0.0 and the action was 4
the reward at stage 853 was 0.0 and the action was 3
the reward at stage 854 was 0.0 and the action was 5
the reward at stage 855 was 0.0 and the action

the reward at stage 995 was 0.0 and the action was 0
the reward at stage 996 was 0.0 and the action was 5
the reward at stage 997 was 0.0 and the action was 4
the reward at stage 998 was 0.0 and the action was 4
the reward at stage 999 was 0.0 and the action was 5
the reward at stage 1000 was 0.0 and the action was 3
the reward at stage 1001 was 0.0 and the action was 3
the reward at stage 1002 was 0.0 and the action was 1
the reward at stage 1003 was 0.0 and the action was 3
the reward at stage 1004 was 0.0 and the action was 3
the reward at stage 1005 was 0.0 and the action was 1
the reward at stage 1006 was 0.0 and the action was 5
the reward at stage 1007 was 0.0 and the action was 4
the reward at stage 1008 was 0.0 and the action was 3
the reward at stage 1009 was 0.0 and the action was 1
the reward at stage 1010 was 0.0 and the action was 0
the reward at stage 1011 was 0.0 and the action was 5
the reward at stage 1012 was 0.0 and the action was 4
the reward at stage 1013 was 0.0 

the reward at stage 1168 was 0.0 and the action was 1
the reward at stage 1169 was 0.0 and the action was 4
the reward at stage 1170 was 0.0 and the action was 5
the reward at stage 1171 was 0.0 and the action was 0
the reward at stage 1172 was 0.0 and the action was 0
the reward at stage 1173 was 0.0 and the action was 4
the reward at stage 1174 was 0.0 and the action was 3
the reward at stage 1175 was 0.0 and the action was 3
the reward at stage 1176 was 0.0 and the action was 0
the reward at stage 1177 was 0.0 and the action was 3
the reward at stage 1178 was 0.0 and the action was 5
the reward at stage 1179 was 0.0 and the action was 3
the reward at stage 1180 was 0.0 and the action was 2
the reward at stage 1181 was 0.0 and the action was 4
the reward at stage 1182 was 0.0 and the action was 0
the reward at stage 1183 was 0.0 and the action was 0
the reward at stage 1184 was 0.0 and the action was 0
the reward at stage 1185 was 0.0 and the action was 5
the reward at stage 1186 was

the reward at stage 1337 was 0.0 and the action was 0
the reward at stage 1338 was 0.0 and the action was 0
the reward at stage 1339 was 0.0 and the action was 4
the reward at stage 1340 was 0.0 and the action was 2
the reward at stage 1341 was 0.0 and the action was 3
the reward at stage 1342 was 0.0 and the action was 3
the reward at stage 1343 was 0.0 and the action was 5
the reward at stage 1344 was 0.0 and the action was 4
the reward at stage 1345 was 0.0 and the action was 5
the reward at stage 1346 was 0.0 and the action was 1
the reward at stage 1347 was 0.0 and the action was 5
the reward at stage 1348 was 0.0 and the action was 5
the reward at stage 1349 was 0.0 and the action was 4
the reward at stage 1350 was 0.0 and the action was 2
the reward at stage 1351 was 0.0 and the action was 4
the reward at stage 1352 was 0.0 and the action was 1
the reward at stage 1353 was 0.0 and the action was 2
the reward at stage 1354 was 0.0 and the action was 0
the reward at stage 1355 was

the reward at stage 123 was 0.0 and the action was 3
the reward at stage 124 was 0.0 and the action was 1
the reward at stage 125 was 0.0 and the action was 0
the reward at stage 126 was 0.0 and the action was 4
the reward at stage 127 was 0.0 and the action was 5
the reward at stage 128 was 0.0 and the action was 2
the reward at stage 129 was 0.0 and the action was 5
the reward at stage 130 was 0.0 and the action was 5
the reward at stage 131 was 0.0 and the action was 0
the reward at stage 132 was -1.0 and the action was 4
the reward at stage 133 was 0.0 and the action was 5
the reward at stage 134 was 0.0 and the action was 5
the reward at stage 135 was 0.0 and the action was 1
the reward at stage 136 was 0.0 and the action was 4
the reward at stage 137 was 0.0 and the action was 5
the reward at stage 138 was 0.0 and the action was 1
the reward at stage 139 was 0.0 and the action was 2
the reward at stage 140 was 0.0 and the action was 4
the reward at stage 141 was 0.0 and the actio

the reward at stage 280 was 0.0 and the action was 4
the reward at stage 281 was 0.0 and the action was 2
the reward at stage 282 was 0.0 and the action was 0
the reward at stage 283 was 0.0 and the action was 5
the reward at stage 284 was 0.0 and the action was 2
the reward at stage 285 was 0.0 and the action was 3
the reward at stage 286 was 0.0 and the action was 3
the reward at stage 287 was 0.0 and the action was 1
the reward at stage 288 was 0.0 and the action was 0
the reward at stage 289 was 0.0 and the action was 2
the reward at stage 290 was 0.0 and the action was 3
the reward at stage 291 was 0.0 and the action was 0
the reward at stage 292 was 0.0 and the action was 4
the reward at stage 293 was 0.0 and the action was 1
the reward at stage 294 was 0.0 and the action was 3
the reward at stage 295 was 0.0 and the action was 4
the reward at stage 296 was 0.0 and the action was 3
the reward at stage 297 was 0.0 and the action was 1
the reward at stage 298 was 0.0 and the action

the reward at stage 445 was 0.0 and the action was 5
the reward at stage 446 was 0.0 and the action was 2
the reward at stage 447 was 0.0 and the action was 1
the reward at stage 448 was 0.0 and the action was 5
the reward at stage 449 was 0.0 and the action was 2
the reward at stage 450 was 0.0 and the action was 4
the reward at stage 451 was 0.0 and the action was 5
the reward at stage 452 was 0.0 and the action was 4
the reward at stage 453 was 0.0 and the action was 4
the reward at stage 454 was 0.0 and the action was 5
the reward at stage 455 was 0.0 and the action was 0
the reward at stage 456 was 0.0 and the action was 4
the reward at stage 457 was 0.0 and the action was 3
the reward at stage 458 was 0.0 and the action was 5
the reward at stage 459 was 0.0 and the action was 5
the reward at stage 460 was 0.0 and the action was 1
the reward at stage 461 was 0.0 and the action was 3
the reward at stage 462 was 0.0 and the action was 0
the reward at stage 463 was 0.0 and the action

the reward at stage 608 was 0.0 and the action was 2
the reward at stage 609 was 0.0 and the action was 5
the reward at stage 610 was 0.0 and the action was 3
the reward at stage 611 was 0.0 and the action was 3
the reward at stage 612 was 0.0 and the action was 1
the reward at stage 613 was 0.0 and the action was 5
the reward at stage 614 was 0.0 and the action was 3
the reward at stage 615 was 0.0 and the action was 3
the reward at stage 616 was 0.0 and the action was 5
the reward at stage 617 was 0.0 and the action was 1
the reward at stage 618 was 0.0 and the action was 1
the reward at stage 619 was 0.0 and the action was 4
the reward at stage 620 was 0.0 and the action was 0
the reward at stage 621 was 0.0 and the action was 1
the reward at stage 622 was 0.0 and the action was 1
the reward at stage 623 was 0.0 and the action was 2
the reward at stage 624 was 0.0 and the action was 3
the reward at stage 625 was 0.0 and the action was 5
the reward at stage 626 was 0.0 and the action

the reward at stage 771 was 0.0 and the action was 5
the reward at stage 772 was 0.0 and the action was 1
the reward at stage 773 was 0.0 and the action was 1
the reward at stage 774 was 0.0 and the action was 2
the reward at stage 775 was 0.0 and the action was 5
the reward at stage 776 was 0.0 and the action was 0
the reward at stage 777 was 0.0 and the action was 0
the reward at stage 778 was 0.0 and the action was 3
the reward at stage 779 was 0.0 and the action was 3
the reward at stage 780 was 0.0 and the action was 3
the reward at stage 781 was 0.0 and the action was 0
the reward at stage 782 was 0.0 and the action was 1
the reward at stage 783 was 0.0 and the action was 2
the reward at stage 784 was 0.0 and the action was 5
the reward at stage 785 was 0.0 and the action was 2
the reward at stage 786 was 0.0 and the action was 0
the reward at stage 787 was 0.0 and the action was 5
the reward at stage 788 was 0.0 and the action was 2
the reward at stage 789 was 0.0 and the action

the reward at stage 926 was 0.0 and the action was 0
the reward at stage 927 was 0.0 and the action was 5
the reward at stage 928 was 0.0 and the action was 0
the reward at stage 929 was 0.0 and the action was 4
the reward at stage 930 was 0.0 and the action was 1
the reward at stage 931 was 0.0 and the action was 2
the reward at stage 932 was 0.0 and the action was 0
the reward at stage 933 was -1.0 and the action was 3
the reward at stage 934 was 0.0 and the action was 1
the reward at stage 935 was 0.0 and the action was 4
the reward at stage 936 was 0.0 and the action was 1
the reward at stage 937 was 0.0 and the action was 2
the reward at stage 938 was 0.0 and the action was 0
the reward at stage 939 was 0.0 and the action was 5
the reward at stage 940 was 0.0 and the action was 3
the reward at stage 941 was 0.0 and the action was 5
the reward at stage 942 was 0.0 and the action was 1
the reward at stage 943 was 0.0 and the action was 5
the reward at stage 944 was 0.0 and the actio

the reward at stage 1087 was 0.0 and the action was 0
the reward at stage 1088 was 0.0 and the action was 2
the reward at stage 1089 was 0.0 and the action was 5
the reward at stage 1090 was 0.0 and the action was 5
the reward at stage 1091 was 0.0 and the action was 1
the reward at stage 1092 was 0.0 and the action was 0
the reward at stage 1093 was 0.0 and the action was 1
the reward at stage 1094 was 0.0 and the action was 0
the reward at stage 1095 was 0.0 and the action was 3
the reward at stage 1096 was 0.0 and the action was 4
the reward at stage 1097 was 0.0 and the action was 3
the reward at stage 1098 was 0.0 and the action was 5
the reward at stage 1099 was 0.0 and the action was 0
the reward at stage 1100 was 0.0 and the action was 5
the reward at stage 1101 was 0.0 and the action was 1
the reward at stage 1102 was 0.0 and the action was 5
the reward at stage 1103 was 0.0 and the action was 3
the reward at stage 1104 was 0.0 and the action was 1
the reward at stage 1105 was

the reward at stage 1 was 0.0 and the action was 1
the reward at stage 2 was 0.0 and the action was 1
the reward at stage 3 was 0.0 and the action was 0
the reward at stage 4 was 0.0 and the action was 4
the reward at stage 5 was 0.0 and the action was 0
the reward at stage 6 was 0.0 and the action was 4
the reward at stage 7 was 0.0 and the action was 4
the reward at stage 8 was 0.0 and the action was 3
the reward at stage 9 was 0.0 and the action was 4
the reward at stage 10 was 0.0 and the action was 0
the reward at stage 11 was 0.0 and the action was 0
the reward at stage 12 was 0.0 and the action was 4
the reward at stage 13 was 0.0 and the action was 5
the reward at stage 14 was 0.0 and the action was 4
the reward at stage 15 was 0.0 and the action was 5
the reward at stage 16 was 0.0 and the action was 2
the reward at stage 17 was 0.0 and the action was 3
the reward at stage 18 was 0.0 and the action was 5
the reward at stage 19 was 0.0 and the action was 5
the reward at stage 2

the reward at stage 168 was 0.0 and the action was 2
the reward at stage 169 was 0.0 and the action was 4
the reward at stage 170 was 0.0 and the action was 4
the reward at stage 171 was 0.0 and the action was 3
the reward at stage 172 was 0.0 and the action was 0
the reward at stage 173 was 0.0 and the action was 2
the reward at stage 174 was 0.0 and the action was 3
the reward at stage 175 was 0.0 and the action was 3
the reward at stage 176 was 0.0 and the action was 0
the reward at stage 177 was 0.0 and the action was 5
the reward at stage 178 was 0.0 and the action was 1
the reward at stage 179 was 0.0 and the action was 4
the reward at stage 180 was 0.0 and the action was 2
the reward at stage 181 was 0.0 and the action was 0
the reward at stage 182 was 0.0 and the action was 1
the reward at stage 183 was 0.0 and the action was 1
the reward at stage 184 was 0.0 and the action was 5
the reward at stage 185 was 0.0 and the action was 1
the reward at stage 186 was 0.0 and the action

the reward at stage 339 was -1.0 and the action was 0
the reward at stage 340 was 0.0 and the action was 2
the reward at stage 341 was 0.0 and the action was 5
the reward at stage 342 was 0.0 and the action was 1
the reward at stage 343 was 0.0 and the action was 5
the reward at stage 344 was 0.0 and the action was 0
the reward at stage 345 was 0.0 and the action was 0
the reward at stage 346 was 0.0 and the action was 0
the reward at stage 347 was 0.0 and the action was 0
the reward at stage 348 was 0.0 and the action was 0
the reward at stage 349 was 0.0 and the action was 5
the reward at stage 350 was 0.0 and the action was 2
the reward at stage 351 was 0.0 and the action was 4
the reward at stage 352 was 0.0 and the action was 5
the reward at stage 353 was 0.0 and the action was 2
the reward at stage 354 was 0.0 and the action was 5
the reward at stage 355 was 0.0 and the action was 5
the reward at stage 356 was 0.0 and the action was 1
the reward at stage 357 was 0.0 and the actio

the reward at stage 495 was 0.0 and the action was 1
the reward at stage 496 was 0.0 and the action was 0
the reward at stage 497 was 0.0 and the action was 0
the reward at stage 498 was 0.0 and the action was 1
the reward at stage 499 was 0.0 and the action was 4
the reward at stage 500 was 0.0 and the action was 3
the reward at stage 501 was 0.0 and the action was 2
the reward at stage 502 was 0.0 and the action was 0
the reward at stage 503 was 0.0 and the action was 5
the reward at stage 504 was 0.0 and the action was 3
the reward at stage 505 was 0.0 and the action was 2
the reward at stage 506 was 0.0 and the action was 3
the reward at stage 507 was 0.0 and the action was 1
the reward at stage 508 was 0.0 and the action was 1
the reward at stage 509 was 0.0 and the action was 1
the reward at stage 510 was 0.0 and the action was 0
the reward at stage 511 was 0.0 and the action was 3
the reward at stage 512 was 0.0 and the action was 4
the reward at stage 513 was 0.0 and the action

the reward at stage 661 was 0.0 and the action was 4
the reward at stage 662 was 0.0 and the action was 1
the reward at stage 663 was 0.0 and the action was 2
the reward at stage 664 was 0.0 and the action was 4
the reward at stage 665 was 0.0 and the action was 4
the reward at stage 666 was 0.0 and the action was 5
the reward at stage 667 was 0.0 and the action was 0
the reward at stage 668 was 0.0 and the action was 1
the reward at stage 669 was 0.0 and the action was 4
the reward at stage 670 was 0.0 and the action was 2
the reward at stage 671 was 0.0 and the action was 3
the reward at stage 672 was 0.0 and the action was 1
the reward at stage 673 was 0.0 and the action was 4
the reward at stage 674 was 0.0 and the action was 2
the reward at stage 675 was 0.0 and the action was 4
the reward at stage 676 was 0.0 and the action was 1
the reward at stage 677 was 0.0 and the action was 1
the reward at stage 678 was 0.0 and the action was 1
the reward at stage 679 was 0.0 and the action

the reward at stage 822 was 0.0 and the action was 3
the reward at stage 823 was 0.0 and the action was 1
the reward at stage 824 was 0.0 and the action was 1
the reward at stage 825 was 0.0 and the action was 0
the reward at stage 826 was 0.0 and the action was 4
the reward at stage 827 was 0.0 and the action was 1
the reward at stage 828 was 0.0 and the action was 0
the reward at stage 829 was 0.0 and the action was 1
the reward at stage 830 was 0.0 and the action was 5
the reward at stage 831 was 0.0 and the action was 2
the reward at stage 832 was 0.0 and the action was 1
the reward at stage 833 was 0.0 and the action was 0
the reward at stage 834 was 0.0 and the action was 3
the reward at stage 835 was 0.0 and the action was 2
the reward at stage 836 was 0.0 and the action was 3
the reward at stage 837 was 0.0 and the action was 4
the reward at stage 838 was 0.0 and the action was 2
the reward at stage 839 was 0.0 and the action was 0
the reward at stage 840 was 0.0 and the action

the reward at stage 990 was 0.0 and the action was 0
the reward at stage 991 was 0.0 and the action was 5
the reward at stage 992 was 0.0 and the action was 2
the reward at stage 993 was 0.0 and the action was 1
the reward at stage 994 was 0.0 and the action was 3
the reward at stage 995 was -1.0 and the action was 2
the reward at stage 996 was 0.0 and the action was 3
the reward at stage 997 was 0.0 and the action was 2
the reward at stage 998 was 0.0 and the action was 1
the reward at stage 999 was 0.0 and the action was 2
the reward at stage 1000 was 0.0 and the action was 0
the reward at stage 1001 was 0.0 and the action was 3
the reward at stage 1002 was 0.0 and the action was 2
the reward at stage 1003 was 0.0 and the action was 4
the reward at stage 1004 was 0.0 and the action was 4
the reward at stage 1005 was 0.0 and the action was 1
the reward at stage 1006 was 0.0 and the action was 2
the reward at stage 1007 was 0.0 and the action was 1
the reward at stage 1008 was 0.0 and 

the reward at stage 1153 was 0.0 and the action was 0
the reward at stage 1154 was 0.0 and the action was 5
the reward at stage 1155 was 0.0 and the action was 0
the reward at stage 1156 was 0.0 and the action was 0
the reward at stage 1157 was 0.0 and the action was 5
the reward at stage 1158 was 0.0 and the action was 4
the reward at stage 1159 was 0.0 and the action was 0
the reward at stage 1160 was 0.0 and the action was 1
the reward at stage 1161 was 0.0 and the action was 1
the reward at stage 1162 was 0.0 and the action was 3
the reward at stage 1163 was -1.0 and the action was 0
the reward at stage 1164 was 0.0 and the action was 5
the reward at stage 1165 was 0.0 and the action was 2
the reward at stage 1166 was 0.0 and the action was 0
the reward at stage 1167 was 0.0 and the action was 5
the reward at stage 1168 was 0.0 and the action was 1
the reward at stage 1169 was 0.0 and the action was 2
the reward at stage 1170 was 0.0 and the action was 2
the reward at stage 1171 wa

the reward at stage 1308 was 0.0 and the action was 3
the reward at stage 1309 was 0.0 and the action was 3
the reward at stage 1310 was 0.0 and the action was 1
the reward at stage 1311 was 0.0 and the action was 1
the reward at stage 1312 was 0.0 and the action was 4
the reward at stage 1313 was 0.0 and the action was 4
the reward at stage 1314 was 0.0 and the action was 0
the reward at stage 1315 was 0.0 and the action was 3
the reward at stage 1316 was 0.0 and the action was 0
the reward at stage 1317 was 0.0 and the action was 3
the reward at stage 1318 was 0.0 and the action was 3
the reward at stage 1319 was 0.0 and the action was 5
the reward at stage 1320 was 0.0 and the action was 2
the reward at stage 1321 was 0.0 and the action was 0
the reward at stage 1322 was 0.0 and the action was 0
the reward at stage 1323 was 0.0 and the action was 2
the reward at stage 1324 was 0.0 and the action was 4
the reward at stage 1325 was 0.0 and the action was 0
the reward at stage 1326 was

the reward at stage 129 was 0.0 and the action was 0
the reward at stage 130 was 0.0 and the action was 1
the reward at stage 131 was -1.0 and the action was 4
the reward at stage 132 was 0.0 and the action was 3
the reward at stage 133 was 0.0 and the action was 2
the reward at stage 134 was 0.0 and the action was 5
the reward at stage 135 was 0.0 and the action was 5
the reward at stage 136 was 0.0 and the action was 3
the reward at stage 137 was 0.0 and the action was 5
the reward at stage 138 was 0.0 and the action was 0
the reward at stage 139 was 0.0 and the action was 3
the reward at stage 140 was 0.0 and the action was 1
the reward at stage 141 was 0.0 and the action was 4
the reward at stage 142 was 0.0 and the action was 5
the reward at stage 143 was 0.0 and the action was 3
the reward at stage 144 was 0.0 and the action was 4
the reward at stage 145 was 0.0 and the action was 1
the reward at stage 146 was 0.0 and the action was 2
the reward at stage 147 was 0.0 and the actio

the reward at stage 307 was 0.0 and the action was 2
the reward at stage 308 was 0.0 and the action was 3
the reward at stage 309 was 0.0 and the action was 3
the reward at stage 310 was 0.0 and the action was 3
the reward at stage 311 was 0.0 and the action was 2
the reward at stage 312 was 0.0 and the action was 5
the reward at stage 313 was 0.0 and the action was 3
the reward at stage 314 was 0.0 and the action was 3
the reward at stage 315 was 0.0 and the action was 3
the reward at stage 316 was 0.0 and the action was 2
the reward at stage 317 was 0.0 and the action was 3
the reward at stage 318 was 0.0 and the action was 1
the reward at stage 319 was 0.0 and the action was 4
the reward at stage 320 was 0.0 and the action was 4
the reward at stage 321 was 0.0 and the action was 1
the reward at stage 322 was 0.0 and the action was 2
the reward at stage 323 was 0.0 and the action was 5
the reward at stage 324 was 0.0 and the action was 2
the reward at stage 325 was 0.0 and the action

the reward at stage 485 was 0.0 and the action was 1
the reward at stage 486 was 0.0 and the action was 3
the reward at stage 487 was 0.0 and the action was 0
the reward at stage 488 was 0.0 and the action was 5
the reward at stage 489 was 0.0 and the action was 3
the reward at stage 490 was 0.0 and the action was 4
the reward at stage 491 was 0.0 and the action was 0
the reward at stage 492 was 0.0 and the action was 3
the reward at stage 493 was 0.0 and the action was 0
the reward at stage 494 was 0.0 and the action was 3
the reward at stage 495 was 0.0 and the action was 0
the reward at stage 496 was 0.0 and the action was 5
the reward at stage 497 was 0.0 and the action was 1
the reward at stage 498 was 0.0 and the action was 1
the reward at stage 499 was 0.0 and the action was 3
the reward at stage 500 was 0.0 and the action was 4
the reward at stage 501 was 0.0 and the action was 1
the reward at stage 502 was 0.0 and the action was 3
the reward at stage 503 was 0.0 and the action

the reward at stage 640 was 0.0 and the action was 4
the reward at stage 641 was 0.0 and the action was 2
the reward at stage 642 was 0.0 and the action was 1
the reward at stage 643 was -1.0 and the action was 5
the reward at stage 644 was 0.0 and the action was 5
the reward at stage 645 was 0.0 and the action was 1
the reward at stage 646 was 0.0 and the action was 5
the reward at stage 647 was 0.0 and the action was 3
the reward at stage 648 was 0.0 and the action was 2
the reward at stage 649 was 0.0 and the action was 0
the reward at stage 650 was 0.0 and the action was 1
the reward at stage 651 was 0.0 and the action was 5
the reward at stage 652 was 0.0 and the action was 2
the reward at stage 653 was 0.0 and the action was 3
the reward at stage 654 was 0.0 and the action was 5
the reward at stage 655 was 0.0 and the action was 3
the reward at stage 656 was 0.0 and the action was 4
the reward at stage 657 was 0.0 and the action was 5
the reward at stage 658 was 0.0 and the actio

the reward at stage 818 was 0.0 and the action was 4
the reward at stage 819 was 0.0 and the action was 0
the reward at stage 820 was 0.0 and the action was 1
the reward at stage 821 was 0.0 and the action was 3
the reward at stage 822 was 0.0 and the action was 5
the reward at stage 823 was 0.0 and the action was 1
the reward at stage 824 was 0.0 and the action was 3
the reward at stage 825 was 0.0 and the action was 5
the reward at stage 826 was 0.0 and the action was 2
the reward at stage 827 was 0.0 and the action was 4
the reward at stage 828 was -1.0 and the action was 5
the reward at stage 829 was 0.0 and the action was 1
the reward at stage 830 was 0.0 and the action was 1
the reward at stage 831 was 0.0 and the action was 5
the reward at stage 832 was 0.0 and the action was 0
the reward at stage 833 was 0.0 and the action was 2
the reward at stage 834 was 0.0 and the action was 1
the reward at stage 835 was 0.0 and the action was 5
the reward at stage 836 was 0.0 and the actio

the reward at stage 991 was 0.0 and the action was 3
the reward at stage 992 was 0.0 and the action was 4
the reward at stage 993 was 0.0 and the action was 4
the reward at stage 994 was 0.0 and the action was 1
the reward at stage 995 was 0.0 and the action was 2
the reward at stage 996 was 0.0 and the action was 5
the reward at stage 997 was 0.0 and the action was 5
the reward at stage 998 was 0.0 and the action was 2
the reward at stage 999 was 0.0 and the action was 5
the reward at stage 1000 was 0.0 and the action was 4
the reward at stage 1001 was 0.0 and the action was 1
the reward at stage 1002 was 0.0 and the action was 2
the reward at stage 1003 was 0.0 and the action was 0
the reward at stage 1004 was 0.0 and the action was 5
the reward at stage 1005 was 0.0 and the action was 1
the reward at stage 1006 was 0.0 and the action was 0
the reward at stage 1007 was 0.0 and the action was 2
the reward at stage 1008 was 0.0 and the action was 4
the reward at stage 1009 was 0.0 and 

the reward at stage 1147 was 0.0 and the action was 0
the reward at stage 1148 was 0.0 and the action was 5
the reward at stage 1149 was 0.0 and the action was 3
the reward at stage 1150 was 0.0 and the action was 1
the reward at stage 1151 was 0.0 and the action was 3
the reward at stage 1152 was 0.0 and the action was 5
the reward at stage 1153 was 0.0 and the action was 0
the reward at stage 1154 was 0.0 and the action was 4
the reward at stage 1155 was 0.0 and the action was 1
the reward at stage 1156 was 0.0 and the action was 0
the reward at stage 1157 was 0.0 and the action was 4
the reward at stage 1158 was 0.0 and the action was 5
the reward at stage 1159 was 0.0 and the action was 5
the reward at stage 1160 was 0.0 and the action was 3
the reward at stage 1161 was 0.0 and the action was 1
the reward at stage 1162 was 0.0 and the action was 3
the reward at stage 1163 was 0.0 and the action was 1
the reward at stage 1164 was 0.0 and the action was 3
the reward at stage 1165 was

the reward at stage 1306 was 0.0 and the action was 2
the reward at stage 1307 was 0.0 and the action was 4
the reward at stage 1308 was 0.0 and the action was 4
the reward at stage 1309 was 0.0 and the action was 1
the reward at stage 1310 was 0.0 and the action was 5
the reward at stage 1311 was 0.0 and the action was 2
the reward at stage 1312 was 0.0 and the action was 1
the reward at stage 1313 was 0.0 and the action was 0
the reward at stage 1314 was 0.0 and the action was 5
the reward at stage 1315 was 0.0 and the action was 3
the reward at stage 1316 was 0.0 and the action was 4
the reward at stage 1317 was 0.0 and the action was 5
the reward at stage 1318 was 0.0 and the action was 2
the reward at stage 1319 was 0.0 and the action was 4
the reward at stage 1320 was 0.0 and the action was 2
the reward at stage 1321 was 0.0 and the action was 3
the reward at stage 1322 was 0.0 and the action was 3
the reward at stage 1323 was 0.0 and the action was 5
the reward at stage 1324 was

the reward at stage 151 was 0.0 and the action was 5
the reward at stage 152 was 0.0 and the action was 5
the reward at stage 153 was 0.0 and the action was 5
the reward at stage 154 was 0.0 and the action was 2
the reward at stage 155 was 0.0 and the action was 4
the reward at stage 156 was 0.0 and the action was 3
the reward at stage 157 was 0.0 and the action was 5
the reward at stage 158 was 0.0 and the action was 5
the reward at stage 159 was 0.0 and the action was 1
the reward at stage 160 was 0.0 and the action was 3
the reward at stage 161 was 0.0 and the action was 3
the reward at stage 162 was 0.0 and the action was 5
the reward at stage 163 was 0.0 and the action was 5
the reward at stage 164 was 0.0 and the action was 1
the reward at stage 165 was 0.0 and the action was 1
the reward at stage 166 was 0.0 and the action was 3
the reward at stage 167 was 0.0 and the action was 3
the reward at stage 168 was 0.0 and the action was 3
the reward at stage 169 was 0.0 and the action

the reward at stage 306 was 0.0 and the action was 1
the reward at stage 307 was 0.0 and the action was 1
the reward at stage 308 was 0.0 and the action was 2
the reward at stage 309 was 0.0 and the action was 3
the reward at stage 310 was 0.0 and the action was 1
the reward at stage 311 was 0.0 and the action was 3
the reward at stage 312 was 0.0 and the action was 3
the reward at stage 313 was -1.0 and the action was 4
the reward at stage 314 was 0.0 and the action was 1
the reward at stage 315 was 0.0 and the action was 3
the reward at stage 316 was 0.0 and the action was 0
the reward at stage 317 was 0.0 and the action was 0
the reward at stage 318 was 0.0 and the action was 5
the reward at stage 319 was 0.0 and the action was 0
the reward at stage 320 was 0.0 and the action was 2
the reward at stage 321 was 0.0 and the action was 0
the reward at stage 322 was 0.0 and the action was 3
the reward at stage 323 was 0.0 and the action was 5
the reward at stage 324 was 0.0 and the actio

the reward at stage 483 was 0.0 and the action was 5
the reward at stage 484 was 0.0 and the action was 5
the reward at stage 485 was 0.0 and the action was 4
the reward at stage 486 was 0.0 and the action was 0
the reward at stage 487 was 0.0 and the action was 2
the reward at stage 488 was 0.0 and the action was 1
the reward at stage 489 was 0.0 and the action was 0
the reward at stage 490 was 0.0 and the action was 5
the reward at stage 491 was 0.0 and the action was 3
the reward at stage 492 was 0.0 and the action was 3
the reward at stage 493 was 0.0 and the action was 1
the reward at stage 494 was 0.0 and the action was 0
the reward at stage 495 was 0.0 and the action was 5
the reward at stage 496 was 0.0 and the action was 3
the reward at stage 497 was 0.0 and the action was 3
the reward at stage 498 was 0.0 and the action was 5
the reward at stage 499 was 0.0 and the action was 0
the reward at stage 500 was 0.0 and the action was 0
the reward at stage 501 was 0.0 and the action

the reward at stage 639 was 0.0 and the action was 2
the reward at stage 640 was 0.0 and the action was 0
the reward at stage 641 was 0.0 and the action was 3
the reward at stage 642 was 0.0 and the action was 0
the reward at stage 643 was 0.0 and the action was 4
the reward at stage 644 was 0.0 and the action was 1
the reward at stage 645 was 0.0 and the action was 0
the reward at stage 646 was 0.0 and the action was 4
the reward at stage 647 was 0.0 and the action was 3
the reward at stage 648 was -1.0 and the action was 0
the reward at stage 649 was 0.0 and the action was 3
the reward at stage 650 was 0.0 and the action was 5
the reward at stage 651 was 0.0 and the action was 3
the reward at stage 652 was 0.0 and the action was 1
the reward at stage 653 was 0.0 and the action was 3
the reward at stage 654 was 0.0 and the action was 3
the reward at stage 655 was 0.0 and the action was 3
the reward at stage 656 was 0.0 and the action was 4
the reward at stage 657 was 0.0 and the actio

the reward at stage 796 was 0.0 and the action was 3
the reward at stage 797 was 0.0 and the action was 0
the reward at stage 798 was 0.0 and the action was 2
the reward at stage 799 was 0.0 and the action was 3
the reward at stage 800 was 0.0 and the action was 0
the reward at stage 801 was 0.0 and the action was 3
the reward at stage 802 was 0.0 and the action was 1
the reward at stage 803 was 0.0 and the action was 1
the reward at stage 804 was 0.0 and the action was 0
the reward at stage 805 was 0.0 and the action was 3
the reward at stage 806 was 0.0 and the action was 1
the reward at stage 807 was 0.0 and the action was 2
the reward at stage 808 was 0.0 and the action was 5
the reward at stage 809 was 0.0 and the action was 1
the reward at stage 810 was 0.0 and the action was 1
the reward at stage 811 was 0.0 and the action was 0
the reward at stage 812 was 0.0 and the action was 0
the reward at stage 813 was 0.0 and the action was 4
the reward at stage 814 was 0.0 and the action

the reward at stage 955 was 0.0 and the action was 2
the reward at stage 956 was 0.0 and the action was 3
the reward at stage 957 was 0.0 and the action was 4
the reward at stage 958 was 0.0 and the action was 5
the reward at stage 959 was 0.0 and the action was 4
the reward at stage 960 was 0.0 and the action was 1
the reward at stage 961 was 0.0 and the action was 3
the reward at stage 962 was 0.0 and the action was 0
the reward at stage 963 was 0.0 and the action was 1
the reward at stage 964 was 0.0 and the action was 5
the reward at stage 965 was 0.0 and the action was 5
the reward at stage 966 was 0.0 and the action was 2
the reward at stage 967 was 0.0 and the action was 3
the reward at stage 968 was 0.0 and the action was 3
the reward at stage 969 was 0.0 and the action was 0
the reward at stage 970 was 0.0 and the action was 3
the reward at stage 971 was 0.0 and the action was 5
the reward at stage 972 was 0.0 and the action was 5
the reward at stage 973 was 0.0 and the action

the reward at stage 1121 was 0.0 and the action was 5
the reward at stage 1122 was 0.0 and the action was 2
the reward at stage 1123 was 0.0 and the action was 0
the reward at stage 1124 was 0.0 and the action was 5
the reward at stage 1125 was 0.0 and the action was 3
the reward at stage 1126 was 0.0 and the action was 2
the reward at stage 1127 was 0.0 and the action was 2
the reward at stage 1128 was 0.0 and the action was 2
the reward at stage 1129 was 0.0 and the action was 1
the reward at stage 1130 was 0.0 and the action was 5
the reward at stage 1131 was 0.0 and the action was 1
the reward at stage 1132 was 0.0 and the action was 5
the reward at stage 1133 was 0.0 and the action was 3
the reward at stage 1134 was 0.0 and the action was 1
the reward at stage 1135 was 0.0 and the action was 3
the reward at stage 1136 was 0.0 and the action was 2
the reward at stage 1137 was 0.0 and the action was 5
the reward at stage 1138 was -1.0 and the action was 1
the reward at stage 1139 wa

the reward at stage 2 was 0.0 and the action was 1
the reward at stage 3 was 0.0 and the action was 1
the reward at stage 4 was 0.0 and the action was 1
the reward at stage 5 was 0.0 and the action was 2
the reward at stage 6 was 0.0 and the action was 4
the reward at stage 7 was 0.0 and the action was 3
the reward at stage 8 was 0.0 and the action was 2
the reward at stage 9 was 0.0 and the action was 3
the reward at stage 10 was 0.0 and the action was 5
the reward at stage 11 was 0.0 and the action was 3
the reward at stage 12 was 0.0 and the action was 0
the reward at stage 13 was 0.0 and the action was 4
the reward at stage 14 was 0.0 and the action was 1
the reward at stage 15 was 0.0 and the action was 4
the reward at stage 16 was 0.0 and the action was 0
the reward at stage 17 was 0.0 and the action was 2
the reward at stage 18 was 0.0 and the action was 3
the reward at stage 19 was 0.0 and the action was 5
the reward at stage 20 was 0.0 and the action was 4
the reward at stage 

the reward at stage 177 was 0.0 and the action was 5
the reward at stage 178 was 0.0 and the action was 1
the reward at stage 179 was 0.0 and the action was 4
the reward at stage 180 was 0.0 and the action was 1
the reward at stage 181 was 0.0 and the action was 5
the reward at stage 182 was -1.0 and the action was 2
the reward at stage 183 was 0.0 and the action was 5
the reward at stage 184 was 0.0 and the action was 4
the reward at stage 185 was 0.0 and the action was 1
the reward at stage 186 was 0.0 and the action was 4
the reward at stage 187 was 0.0 and the action was 5
the reward at stage 188 was 0.0 and the action was 0
the reward at stage 189 was 0.0 and the action was 4
the reward at stage 190 was 0.0 and the action was 4
the reward at stage 191 was 0.0 and the action was 3
the reward at stage 192 was 0.0 and the action was 5
the reward at stage 193 was 0.0 and the action was 5
the reward at stage 194 was 0.0 and the action was 0
the reward at stage 195 was 0.0 and the actio

the reward at stage 352 was 0.0 and the action was 2
the reward at stage 353 was 0.0 and the action was 1
the reward at stage 354 was 0.0 and the action was 2
the reward at stage 355 was 0.0 and the action was 0
the reward at stage 356 was 0.0 and the action was 0
the reward at stage 357 was 0.0 and the action was 1
the reward at stage 358 was 0.0 and the action was 2
the reward at stage 359 was 0.0 and the action was 2
the reward at stage 360 was -1.0 and the action was 2
the reward at stage 361 was 0.0 and the action was 1
the reward at stage 362 was 0.0 and the action was 4
the reward at stage 363 was 0.0 and the action was 4
the reward at stage 364 was 0.0 and the action was 4
the reward at stage 365 was 0.0 and the action was 1
the reward at stage 366 was 0.0 and the action was 5
the reward at stage 367 was 0.0 and the action was 2
the reward at stage 368 was 0.0 and the action was 0
the reward at stage 369 was 0.0 and the action was 0
the reward at stage 370 was 0.0 and the actio

the reward at stage 516 was 0.0 and the action was 1
the reward at stage 517 was 0.0 and the action was 2
the reward at stage 518 was 0.0 and the action was 0
the reward at stage 519 was 0.0 and the action was 3
the reward at stage 520 was 0.0 and the action was 5
the reward at stage 521 was 0.0 and the action was 3
the reward at stage 522 was 0.0 and the action was 5
the reward at stage 523 was 0.0 and the action was 2
the reward at stage 524 was 0.0 and the action was 5
the reward at stage 525 was 0.0 and the action was 5
the reward at stage 526 was 0.0 and the action was 2
the reward at stage 527 was 0.0 and the action was 0
the reward at stage 528 was 0.0 and the action was 3
the reward at stage 529 was 0.0 and the action was 2
the reward at stage 530 was 0.0 and the action was 2
the reward at stage 531 was 0.0 and the action was 4
the reward at stage 532 was 0.0 and the action was 5
the reward at stage 533 was 0.0 and the action was 5
the reward at stage 534 was 0.0 and the action

the reward at stage 681 was 0.0 and the action was 4
the reward at stage 682 was 0.0 and the action was 4
the reward at stage 683 was 0.0 and the action was 1
the reward at stage 684 was 0.0 and the action was 2
the reward at stage 685 was 0.0 and the action was 1
the reward at stage 686 was 0.0 and the action was 5
the reward at stage 687 was 0.0 and the action was 0
the reward at stage 688 was 0.0 and the action was 4
the reward at stage 689 was 0.0 and the action was 5
the reward at stage 690 was 0.0 and the action was 2
the reward at stage 691 was 0.0 and the action was 5
the reward at stage 692 was 0.0 and the action was 1
the reward at stage 693 was 0.0 and the action was 5
the reward at stage 694 was 0.0 and the action was 3
the reward at stage 695 was 0.0 and the action was 1
the reward at stage 696 was 0.0 and the action was 0
the reward at stage 697 was 0.0 and the action was 3
the reward at stage 698 was 0.0 and the action was 0
the reward at stage 699 was 0.0 and the action

the reward at stage 846 was 0.0 and the action was 0
the reward at stage 847 was 0.0 and the action was 4
the reward at stage 848 was 0.0 and the action was 5
the reward at stage 849 was 0.0 and the action was 0
the reward at stage 850 was 0.0 and the action was 0
the reward at stage 851 was 0.0 and the action was 4
the reward at stage 852 was 0.0 and the action was 3
the reward at stage 853 was 0.0 and the action was 5
the reward at stage 854 was 0.0 and the action was 5
the reward at stage 855 was 0.0 and the action was 3
the reward at stage 856 was 0.0 and the action was 4
the reward at stage 857 was 0.0 and the action was 2
the reward at stage 858 was 0.0 and the action was 0
the reward at stage 859 was 0.0 and the action was 3
the reward at stage 860 was 0.0 and the action was 3
the reward at stage 861 was 0.0 and the action was 5
the reward at stage 862 was -1.0 and the action was 0
the reward at stage 863 was 0.0 and the action was 4
the reward at stage 864 was 0.0 and the actio

the reward at stage 1008 was 0.0 and the action was 4
the reward at stage 1009 was 0.0 and the action was 1
the reward at stage 1010 was 0.0 and the action was 4
the reward at stage 1011 was 0.0 and the action was 2
the reward at stage 1012 was 0.0 and the action was 2
the reward at stage 1013 was 0.0 and the action was 4
the reward at stage 1014 was 0.0 and the action was 3
the reward at stage 1015 was 0.0 and the action was 1
the reward at stage 1016 was 0.0 and the action was 2
the reward at stage 1017 was 0.0 and the action was 0
the reward at stage 1018 was 0.0 and the action was 3
the reward at stage 1019 was 0.0 and the action was 4
the reward at stage 1020 was 0.0 and the action was 2
the reward at stage 1021 was 0.0 and the action was 0
the reward at stage 1022 was 0.0 and the action was 4
the reward at stage 1023 was 0.0 and the action was 3
the reward at stage 1024 was 0.0 and the action was 4
the reward at stage 1025 was 0.0 and the action was 3
the reward at stage 1026 was

the reward at stage 1166 was 0.0 and the action was 0
the reward at stage 1167 was 0.0 and the action was 0
the reward at stage 1168 was 0.0 and the action was 4
the reward at stage 1169 was 0.0 and the action was 2
the reward at stage 1170 was 0.0 and the action was 0
the reward at stage 1171 was 0.0 and the action was 2
the reward at stage 1172 was 0.0 and the action was 5
the reward at stage 1173 was 0.0 and the action was 0
the reward at stage 1174 was 0.0 and the action was 2
the reward at stage 1175 was 0.0 and the action was 0
the reward at stage 1176 was 0.0 and the action was 4
the reward at stage 1177 was 0.0 and the action was 4
the reward at stage 1178 was 0.0 and the action was 5
the reward at stage 1179 was 0.0 and the action was 1
the reward at stage 1180 was 0.0 and the action was 3
the reward at stage 1181 was 0.0 and the action was 2
the reward at stage 1182 was 0.0 and the action was 1
the reward at stage 1183 was 0.0 and the action was 5
the reward at stage 1184 was

the reward at stage 55 was 0.0 and the action was 2
the reward at stage 56 was 0.0 and the action was 5
the reward at stage 57 was 0.0 and the action was 2
the reward at stage 58 was 0.0 and the action was 2
the reward at stage 59 was 0.0 and the action was 3
the reward at stage 60 was 0.0 and the action was 5
the reward at stage 61 was 0.0 and the action was 0
the reward at stage 62 was 0.0 and the action was 2
the reward at stage 63 was 0.0 and the action was 5
the reward at stage 64 was 0.0 and the action was 1
the reward at stage 65 was 0.0 and the action was 4
the reward at stage 66 was 0.0 and the action was 1
the reward at stage 67 was 0.0 and the action was 1
the reward at stage 68 was 0.0 and the action was 4
the reward at stage 69 was 0.0 and the action was 4
the reward at stage 70 was 0.0 and the action was 2
the reward at stage 71 was 0.0 and the action was 3
the reward at stage 72 was 0.0 and the action was 5
the reward at stage 73 was 0.0 and the action was 0
the reward a

the reward at stage 237 was 0.0 and the action was 1
the reward at stage 238 was 0.0 and the action was 3
the reward at stage 239 was 0.0 and the action was 5
the reward at stage 240 was 0.0 and the action was 3
the reward at stage 241 was 0.0 and the action was 2
the reward at stage 242 was 0.0 and the action was 0
the reward at stage 243 was 0.0 and the action was 5
the reward at stage 244 was 0.0 and the action was 4
the reward at stage 245 was 0.0 and the action was 0
the reward at stage 246 was 0.0 and the action was 2
the reward at stage 247 was 0.0 and the action was 3
the reward at stage 248 was 0.0 and the action was 1
the reward at stage 249 was 0.0 and the action was 2
the reward at stage 250 was 0.0 and the action was 5
the reward at stage 251 was 0.0 and the action was 2
the reward at stage 252 was 0.0 and the action was 2
the reward at stage 253 was 0.0 and the action was 4
the reward at stage 254 was 0.0 and the action was 4
the reward at stage 255 was 0.0 and the action

the reward at stage 404 was 0.0 and the action was 4
the reward at stage 405 was 0.0 and the action was 4
the reward at stage 406 was 0.0 and the action was 4
the reward at stage 407 was 0.0 and the action was 2
the reward at stage 408 was 0.0 and the action was 1
the reward at stage 409 was 0.0 and the action was 2
the reward at stage 410 was 0.0 and the action was 0
the reward at stage 411 was 0.0 and the action was 3
the reward at stage 412 was 0.0 and the action was 3
the reward at stage 413 was 0.0 and the action was 3
the reward at stage 414 was -1.0 and the action was 4
the reward at stage 415 was 0.0 and the action was 2
the reward at stage 416 was 0.0 and the action was 3
the reward at stage 417 was 0.0 and the action was 4
the reward at stage 418 was 0.0 and the action was 3
the reward at stage 419 was 0.0 and the action was 0
the reward at stage 420 was 0.0 and the action was 4
the reward at stage 421 was 0.0 and the action was 2
the reward at stage 422 was 0.0 and the actio

the reward at stage 571 was 0.0 and the action was 0
the reward at stage 572 was 0.0 and the action was 1
the reward at stage 573 was 0.0 and the action was 1
the reward at stage 574 was 0.0 and the action was 4
the reward at stage 575 was 0.0 and the action was 5
the reward at stage 576 was 0.0 and the action was 4
the reward at stage 577 was 0.0 and the action was 3
the reward at stage 578 was 0.0 and the action was 0
the reward at stage 579 was 0.0 and the action was 0
the reward at stage 580 was 0.0 and the action was 0
the reward at stage 581 was 0.0 and the action was 1
the reward at stage 582 was 0.0 and the action was 0
the reward at stage 583 was 0.0 and the action was 1
the reward at stage 584 was 0.0 and the action was 3
the reward at stage 585 was 0.0 and the action was 2
the reward at stage 586 was -1.0 and the action was 0
the reward at stage 587 was 0.0 and the action was 5
the reward at stage 588 was 0.0 and the action was 2
the reward at stage 589 was 0.0 and the actio

the reward at stage 727 was 0.0 and the action was 0
the reward at stage 728 was 0.0 and the action was 1
the reward at stage 729 was 0.0 and the action was 4
the reward at stage 730 was 0.0 and the action was 1
the reward at stage 731 was 0.0 and the action was 3
the reward at stage 732 was 0.0 and the action was 4
the reward at stage 733 was 0.0 and the action was 2
the reward at stage 734 was 0.0 and the action was 5
the reward at stage 735 was 0.0 and the action was 3
the reward at stage 736 was 0.0 and the action was 0
the reward at stage 737 was 0.0 and the action was 5
the reward at stage 738 was 0.0 and the action was 0
the reward at stage 739 was 0.0 and the action was 0
the reward at stage 740 was 0.0 and the action was 2
the reward at stage 741 was 0.0 and the action was 2
the reward at stage 742 was 0.0 and the action was 0
the reward at stage 743 was 0.0 and the action was 2
the reward at stage 744 was 0.0 and the action was 5
the reward at stage 745 was 0.0 and the action

the reward at stage 884 was 0.0 and the action was 1
the reward at stage 885 was 0.0 and the action was 5
the reward at stage 886 was 0.0 and the action was 1
the reward at stage 887 was 0.0 and the action was 4
the reward at stage 888 was 0.0 and the action was 1
the reward at stage 889 was 0.0 and the action was 2
the reward at stage 890 was 0.0 and the action was 3
the reward at stage 891 was 0.0 and the action was 1
the reward at stage 892 was 0.0 and the action was 1
the reward at stage 893 was 0.0 and the action was 4
the reward at stage 894 was 0.0 and the action was 2
the reward at stage 895 was 0.0 and the action was 0
the reward at stage 896 was 0.0 and the action was 1
the reward at stage 897 was 0.0 and the action was 3
the reward at stage 898 was 0.0 and the action was 0
the reward at stage 899 was 1.0 and the action was 0
the reward at stage 900 was 0.0 and the action was 5
the reward at stage 901 was 0.0 and the action was 1
the reward at stage 902 was 0.0 and the action

the reward at stage 1043 was 0.0 and the action was 0
the reward at stage 1044 was 0.0 and the action was 4
the reward at stage 1045 was 0.0 and the action was 0
the reward at stage 1046 was 0.0 and the action was 5
the reward at stage 1047 was 0.0 and the action was 4
the reward at stage 1048 was 0.0 and the action was 5
the reward at stage 1049 was 0.0 and the action was 2
the reward at stage 1050 was 0.0 and the action was 2
the reward at stage 1051 was 0.0 and the action was 3
the reward at stage 1052 was 0.0 and the action was 2
the reward at stage 1053 was 0.0 and the action was 2
the reward at stage 1054 was 0.0 and the action was 1
the reward at stage 1055 was 0.0 and the action was 3
the reward at stage 1056 was 0.0 and the action was 5
the reward at stage 1057 was 0.0 and the action was 4
the reward at stage 1058 was 0.0 and the action was 4
the reward at stage 1059 was 0.0 and the action was 4
the reward at stage 1060 was 0.0 and the action was 4
the reward at stage 1061 was

the reward at stage 1199 was 0.0 and the action was 3
the reward at stage 1200 was 0.0 and the action was 0
the reward at stage 1201 was 0.0 and the action was 1
the reward at stage 1202 was 0.0 and the action was 3
the reward at stage 1203 was 0.0 and the action was 1
the reward at stage 1204 was 0.0 and the action was 3
the reward at stage 1205 was 0.0 and the action was 4
the reward at stage 1206 was -1.0 and the action was 2
the reward at stage 1207 was 0.0 and the action was 3
the reward at stage 1208 was 0.0 and the action was 0
the reward at stage 1209 was 0.0 and the action was 0
the reward at stage 1210 was 0.0 and the action was 4
the reward at stage 1211 was 0.0 and the action was 5
the reward at stage 1212 was 0.0 and the action was 4
the reward at stage 1213 was 0.0 and the action was 4
the reward at stage 1214 was 0.0 and the action was 5
the reward at stage 1215 was 0.0 and the action was 4
the reward at stage 1216 was 0.0 and the action was 0
the reward at stage 1217 wa

the reward at stage 56 was 0.0 and the action was 5
the reward at stage 57 was 0.0 and the action was 0
the reward at stage 58 was 0.0 and the action was 1
the reward at stage 59 was 0.0 and the action was 1
the reward at stage 60 was 0.0 and the action was 5
the reward at stage 61 was 0.0 and the action was 4
the reward at stage 62 was 0.0 and the action was 1
the reward at stage 63 was 0.0 and the action was 3
the reward at stage 64 was 0.0 and the action was 2
the reward at stage 65 was 0.0 and the action was 0
the reward at stage 66 was 0.0 and the action was 1
the reward at stage 67 was 0.0 and the action was 0
the reward at stage 68 was 0.0 and the action was 4
the reward at stage 69 was 0.0 and the action was 3
the reward at stage 70 was 0.0 and the action was 4
the reward at stage 71 was 0.0 and the action was 1
the reward at stage 72 was 0.0 and the action was 4
the reward at stage 73 was 0.0 and the action was 3
the reward at stage 74 was 0.0 and the action was 1
the reward a

the reward at stage 213 was 0.0 and the action was 3
the reward at stage 214 was 0.0 and the action was 0
the reward at stage 215 was 0.0 and the action was 2
the reward at stage 216 was 0.0 and the action was 1
the reward at stage 217 was 0.0 and the action was 4
the reward at stage 218 was 0.0 and the action was 4
the reward at stage 219 was 0.0 and the action was 1
the reward at stage 220 was 0.0 and the action was 3
the reward at stage 221 was 0.0 and the action was 1
the reward at stage 222 was 0.0 and the action was 4
the reward at stage 223 was 0.0 and the action was 4
the reward at stage 224 was 0.0 and the action was 5
the reward at stage 225 was 0.0 and the action was 3
the reward at stage 226 was 0.0 and the action was 4
the reward at stage 227 was 0.0 and the action was 0
the reward at stage 228 was 0.0 and the action was 3
the reward at stage 229 was 0.0 and the action was 4
the reward at stage 230 was 0.0 and the action was 5
the reward at stage 231 was 0.0 and the action

the reward at stage 370 was 0.0 and the action was 4
the reward at stage 371 was 0.0 and the action was 3
the reward at stage 372 was 0.0 and the action was 0
the reward at stage 373 was 0.0 and the action was 4
the reward at stage 374 was 0.0 and the action was 4
the reward at stage 375 was 0.0 and the action was 5
the reward at stage 376 was 0.0 and the action was 5
the reward at stage 377 was 0.0 and the action was 0
the reward at stage 378 was 0.0 and the action was 0
the reward at stage 379 was 0.0 and the action was 1
the reward at stage 380 was 0.0 and the action was 1
the reward at stage 381 was -1.0 and the action was 2
the reward at stage 382 was 0.0 and the action was 0
the reward at stage 383 was 0.0 and the action was 0
the reward at stage 384 was 0.0 and the action was 1
the reward at stage 385 was 0.0 and the action was 3
the reward at stage 386 was 0.0 and the action was 0
the reward at stage 387 was 0.0 and the action was 1
the reward at stage 388 was 0.0 and the actio

the reward at stage 526 was 0.0 and the action was 3
the reward at stage 527 was 0.0 and the action was 0
the reward at stage 528 was 0.0 and the action was 5
the reward at stage 529 was 0.0 and the action was 1
the reward at stage 530 was 0.0 and the action was 1
the reward at stage 531 was 0.0 and the action was 1
the reward at stage 532 was 0.0 and the action was 5
the reward at stage 533 was 0.0 and the action was 0
the reward at stage 534 was 0.0 and the action was 2
the reward at stage 535 was 0.0 and the action was 5
the reward at stage 536 was 0.0 and the action was 5
the reward at stage 537 was 0.0 and the action was 5
the reward at stage 538 was 0.0 and the action was 0
the reward at stage 539 was 0.0 and the action was 0
the reward at stage 540 was 0.0 and the action was 3
the reward at stage 541 was 0.0 and the action was 0
the reward at stage 542 was 0.0 and the action was 3
the reward at stage 543 was 0.0 and the action was 2
the reward at stage 544 was 0.0 and the action

the reward at stage 684 was 0.0 and the action was 4
the reward at stage 685 was 0.0 and the action was 1
the reward at stage 686 was 0.0 and the action was 5
the reward at stage 687 was 0.0 and the action was 1
the reward at stage 688 was 0.0 and the action was 5
the reward at stage 689 was 0.0 and the action was 5
the reward at stage 690 was 0.0 and the action was 0
the reward at stage 691 was 0.0 and the action was 4
the reward at stage 692 was 0.0 and the action was 4
the reward at stage 693 was 0.0 and the action was 2
the reward at stage 694 was 0.0 and the action was 3
the reward at stage 695 was 0.0 and the action was 5
the reward at stage 696 was 0.0 and the action was 3
the reward at stage 697 was 0.0 and the action was 2
the reward at stage 698 was 0.0 and the action was 5
the reward at stage 699 was 0.0 and the action was 4
the reward at stage 700 was 0.0 and the action was 0
the reward at stage 701 was 0.0 and the action was 5
the reward at stage 702 was 0.0 and the action

the reward at stage 842 was 0.0 and the action was 2
the reward at stage 843 was 0.0 and the action was 3
the reward at stage 844 was 0.0 and the action was 3
the reward at stage 845 was 0.0 and the action was 1
the reward at stage 846 was 0.0 and the action was 4
the reward at stage 847 was 0.0 and the action was 4
the reward at stage 848 was 0.0 and the action was 2
the reward at stage 849 was 0.0 and the action was 0
the reward at stage 850 was 0.0 and the action was 1
the reward at stage 851 was 0.0 and the action was 5
the reward at stage 852 was 0.0 and the action was 2
the reward at stage 853 was 0.0 and the action was 3
the reward at stage 854 was 0.0 and the action was 2
the reward at stage 855 was 0.0 and the action was 2
the reward at stage 856 was 0.0 and the action was 4
the reward at stage 857 was 0.0 and the action was 2
the reward at stage 858 was 0.0 and the action was 0
the reward at stage 859 was 0.0 and the action was 4
the reward at stage 860 was 0.0 and the action

the reward at stage 1001 was 0.0 and the action was 1
the reward at stage 1002 was 0.0 and the action was 1
the reward at stage 1003 was 0.0 and the action was 4
the reward at stage 1004 was 0.0 and the action was 4
the reward at stage 1005 was 0.0 and the action was 0
the reward at stage 1006 was 0.0 and the action was 2
the reward at stage 1007 was 0.0 and the action was 4
the reward at stage 1008 was 0.0 and the action was 0
the reward at stage 1009 was 0.0 and the action was 1
the reward at stage 1010 was 0.0 and the action was 3
the reward at stage 1011 was 0.0 and the action was 0
the reward at stage 1012 was 0.0 and the action was 0
the reward at stage 1013 was 0.0 and the action was 3
the reward at stage 1014 was 0.0 and the action was 3
the reward at stage 1015 was 0.0 and the action was 4
the reward at stage 1016 was 0.0 and the action was 2
the reward at stage 1017 was 0.0 and the action was 3
the reward at stage 1018 was 0.0 and the action was 3
the reward at stage 1019 was

the reward at stage 1157 was 0.0 and the action was 5
the reward at stage 1158 was 0.0 and the action was 3
the reward at stage 1159 was -1.0 and the action was 5
the reward at stage 1160 was 0.0 and the action was 1
the reward at stage 1161 was 0.0 and the action was 4
the reward at stage 1162 was 0.0 and the action was 0
the reward at stage 1163 was 0.0 and the action was 3
the reward at stage 1164 was 0.0 and the action was 3
the reward at stage 1165 was 0.0 and the action was 0
the reward at stage 1166 was 0.0 and the action was 4
the reward at stage 1167 was 0.0 and the action was 1
the reward at stage 1168 was 0.0 and the action was 4
the reward at stage 1169 was 0.0 and the action was 0
the reward at stage 1170 was 0.0 and the action was 2
the reward at stage 1171 was 0.0 and the action was 5
the reward at stage 1172 was 0.0 and the action was 3
the reward at stage 1173 was 0.0 and the action was 1
the reward at stage 1174 was 0.0 and the action was 2
the reward at stage 1175 wa

the reward at stage 76 was 0.0 and the action was 3
the reward at stage 77 was 0.0 and the action was 1
the reward at stage 78 was 0.0 and the action was 3
the reward at stage 79 was 0.0 and the action was 2
the reward at stage 80 was 0.0 and the action was 5
the reward at stage 81 was 0.0 and the action was 4
the reward at stage 82 was 0.0 and the action was 2
the reward at stage 83 was 0.0 and the action was 0
the reward at stage 84 was 0.0 and the action was 2
the reward at stage 85 was 0.0 and the action was 0
the reward at stage 86 was 0.0 and the action was 3
the reward at stage 87 was -1.0 and the action was 3
the reward at stage 88 was 0.0 and the action was 2
the reward at stage 89 was 0.0 and the action was 3
the reward at stage 90 was 0.0 and the action was 5
the reward at stage 91 was 0.0 and the action was 3
the reward at stage 92 was 0.0 and the action was 3
the reward at stage 93 was 0.0 and the action was 4
the reward at stage 94 was 0.0 and the action was 4
the reward 

the reward at stage 237 was 0.0 and the action was 2
the reward at stage 238 was 0.0 and the action was 0
the reward at stage 239 was 0.0 and the action was 1
the reward at stage 240 was 0.0 and the action was 1
the reward at stage 241 was 0.0 and the action was 4
the reward at stage 242 was 0.0 and the action was 1
the reward at stage 243 was 0.0 and the action was 0
the reward at stage 244 was 0.0 and the action was 3
the reward at stage 245 was 0.0 and the action was 5
the reward at stage 246 was 0.0 and the action was 4
the reward at stage 247 was 0.0 and the action was 1
the reward at stage 248 was 0.0 and the action was 5
the reward at stage 249 was 0.0 and the action was 1
the reward at stage 250 was 0.0 and the action was 5
the reward at stage 251 was 0.0 and the action was 5
the reward at stage 252 was 0.0 and the action was 2
the reward at stage 253 was 0.0 and the action was 2
the reward at stage 254 was 0.0 and the action was 1
the reward at stage 255 was 0.0 and the action

the reward at stage 412 was 0.0 and the action was 1
the reward at stage 413 was 0.0 and the action was 5
the reward at stage 414 was 0.0 and the action was 5
the reward at stage 415 was 0.0 and the action was 0
the reward at stage 416 was 0.0 and the action was 2
the reward at stage 417 was 0.0 and the action was 1
the reward at stage 418 was 0.0 and the action was 0
the reward at stage 419 was 0.0 and the action was 1
the reward at stage 420 was 0.0 and the action was 1
the reward at stage 421 was 0.0 and the action was 1
the reward at stage 422 was 0.0 and the action was 2
the reward at stage 423 was 0.0 and the action was 4
the reward at stage 424 was 0.0 and the action was 2
the reward at stage 425 was 0.0 and the action was 2
the reward at stage 426 was 0.0 and the action was 0
the reward at stage 427 was 0.0 and the action was 3
the reward at stage 428 was 0.0 and the action was 4
the reward at stage 429 was 0.0 and the action was 0
the reward at stage 430 was 0.0 and the action

the reward at stage 585 was 0.0 and the action was 3
the reward at stage 586 was 0.0 and the action was 1
the reward at stage 587 was 0.0 and the action was 5
the reward at stage 588 was 0.0 and the action was 1
the reward at stage 589 was 0.0 and the action was 3
the reward at stage 590 was 0.0 and the action was 1
the reward at stage 591 was 0.0 and the action was 0
the reward at stage 592 was 0.0 and the action was 0
the reward at stage 593 was 0.0 and the action was 3
the reward at stage 594 was 0.0 and the action was 5
the reward at stage 595 was 0.0 and the action was 0
the reward at stage 596 was 0.0 and the action was 4
the reward at stage 597 was 0.0 and the action was 4
the reward at stage 598 was 0.0 and the action was 1
the reward at stage 599 was 0.0 and the action was 2
the reward at stage 600 was 0.0 and the action was 1
the reward at stage 601 was 0.0 and the action was 4
the reward at stage 602 was 0.0 and the action was 4
the reward at stage 603 was 0.0 and the action

the reward at stage 746 was 0.0 and the action was 4
the reward at stage 747 was 0.0 and the action was 5
the reward at stage 748 was 0.0 and the action was 3
the reward at stage 749 was -1.0 and the action was 4
the reward at stage 750 was 0.0 and the action was 0
the reward at stage 751 was 0.0 and the action was 2
the reward at stage 752 was 0.0 and the action was 5
the reward at stage 753 was 0.0 and the action was 5
the reward at stage 754 was 0.0 and the action was 5
the reward at stage 755 was 0.0 and the action was 2
the reward at stage 756 was 0.0 and the action was 5
the reward at stage 757 was 0.0 and the action was 4
the reward at stage 758 was 0.0 and the action was 0
the reward at stage 759 was 0.0 and the action was 1
the reward at stage 760 was 0.0 and the action was 0
the reward at stage 761 was 0.0 and the action was 1
the reward at stage 762 was 0.0 and the action was 1
the reward at stage 763 was 0.0 and the action was 1
the reward at stage 764 was 0.0 and the actio

the reward at stage 906 was 0.0 and the action was 3
the reward at stage 907 was 0.0 and the action was 1
the reward at stage 908 was 0.0 and the action was 5
the reward at stage 909 was 0.0 and the action was 4
the reward at stage 910 was 0.0 and the action was 5
the reward at stage 911 was 0.0 and the action was 4
the reward at stage 912 was 0.0 and the action was 5
the reward at stage 913 was 0.0 and the action was 3
the reward at stage 914 was 0.0 and the action was 2
the reward at stage 915 was 0.0 and the action was 0
the reward at stage 916 was 0.0 and the action was 5
the reward at stage 917 was 0.0 and the action was 1
the reward at stage 918 was 0.0 and the action was 3
the reward at stage 919 was 0.0 and the action was 4
the reward at stage 920 was 0.0 and the action was 5
the reward at stage 921 was 0.0 and the action was 1
the reward at stage 922 was 0.0 and the action was 5
the reward at stage 923 was 0.0 and the action was 5
the reward at stage 924 was 0.0 and the action

the reward at stage 1069 was 0.0 and the action was 4
the reward at stage 1070 was 0.0 and the action was 3
the reward at stage 1071 was 0.0 and the action was 1
the reward at stage 1072 was 0.0 and the action was 2
the reward at stage 1073 was 0.0 and the action was 2
the reward at stage 1074 was 0.0 and the action was 4
the reward at stage 1075 was 0.0 and the action was 3
the reward at stage 1076 was 0.0 and the action was 4
the reward at stage 1077 was 0.0 and the action was 0
the reward at stage 1078 was 0.0 and the action was 0
the reward at stage 1079 was 0.0 and the action was 5
the reward at stage 1080 was 0.0 and the action was 2
the reward at stage 1081 was 0.0 and the action was 2
the reward at stage 1082 was 0.0 and the action was 2
the reward at stage 1083 was 0.0 and the action was 2
the reward at stage 1084 was 0.0 and the action was 5
the reward at stage 1085 was 0.0 and the action was 2
the reward at stage 1086 was 0.0 and the action was 3
the reward at stage 1087 was

the reward at stage 21 was 0.0 and the action was 2
the reward at stage 22 was 0.0 and the action was 4
the reward at stage 23 was 0.0 and the action was 5
the reward at stage 24 was 0.0 and the action was 4
the reward at stage 25 was 0.0 and the action was 0
the reward at stage 26 was 0.0 and the action was 3
the reward at stage 27 was 0.0 and the action was 1
the reward at stage 28 was 0.0 and the action was 1
the reward at stage 29 was 0.0 and the action was 5
the reward at stage 30 was 0.0 and the action was 3
the reward at stage 31 was 0.0 and the action was 0
the reward at stage 32 was 0.0 and the action was 0
the reward at stage 33 was 0.0 and the action was 4
the reward at stage 34 was 0.0 and the action was 0
the reward at stage 35 was 0.0 and the action was 1
the reward at stage 36 was 0.0 and the action was 2
the reward at stage 37 was 0.0 and the action was 5
the reward at stage 38 was 0.0 and the action was 2
the reward at stage 39 was 0.0 and the action was 0
the reward a

the reward at stage 178 was 0.0 and the action was 5
the reward at stage 179 was 0.0 and the action was 5
the reward at stage 180 was 0.0 and the action was 5
the reward at stage 181 was 0.0 and the action was 0
the reward at stage 182 was 0.0 and the action was 0
the reward at stage 183 was 0.0 and the action was 0
the reward at stage 184 was 0.0 and the action was 4
the reward at stage 185 was 0.0 and the action was 5
the reward at stage 186 was 0.0 and the action was 0
the reward at stage 187 was 0.0 and the action was 2
the reward at stage 188 was 0.0 and the action was 1
the reward at stage 189 was 0.0 and the action was 5
the reward at stage 190 was 0.0 and the action was 3
the reward at stage 191 was 0.0 and the action was 1
the reward at stage 192 was 0.0 and the action was 3
the reward at stage 193 was 0.0 and the action was 5
the reward at stage 194 was 0.0 and the action was 2
the reward at stage 195 was 0.0 and the action was 4
the reward at stage 196 was 0.0 and the action

In [68]:
#To test the image reshaper
with tf.Graph().as_default():
    with tf.Session() as sess: 
        reshaper = ReshapeImage()
        x = np.random.rand(210,160,3)
        fin_img = reshaper.return_processed_img(sess, x)


In [69]:
#to test network 
with tf.Graph().as_default(): 
    with tf.Session() as session:
        input_frames = np.random.rand(1,84,84,4)
        the_net = Network(session, 6)
        session.run(tf.global_variables_initializer())
        q_func = the_net.compute(input_frames)
        print(q_func.shape)

(1, 6)


In [80]:
x = np.random.rand(1,6)
y = np.argmax(x, axis = 1)
print(y[0])

2
