# Building an Agent to Play Atari games using Deep Q Network

First we import all the necessary libraries </font> 


In [39]:
import numpy as np
import gym
import tensorflow as tf
from tensorflow.contrib.layers import flatten, conv2d, fully_connected
from collections import deque, Counter
import random
from datetime import datetime
import time
import pickle

Now we define a function called preprocess_observation for preprocessing our input game screen. We reduce the image size
and convert the image into greyscale.

In [49]:
color = np.array([210, 164, 74]).mean()

def preprocess_observation(obs):

    # Crop and resize the image
    img = obs[::2, ::2]
    
    # Convert the image to greyscale
    img = img.mean(axis=2)
    
    # Improve image contrast
    img[img==color] = 0
    
    # Next we normalize the image from -1 to +1
    img = img/255
    
    img = img.reshape(105,80,1)
    
    return img

 Let us initialize our gym environment

In [50]:
env = gym.make("BreakoutDeterministic-v4")
n_outputs = env.action_space.n
n_outputs

4

Okay, Now we define a function called q_network for building our Q network. We input the game state
to the Q network and get the Q values for all the actions in that state. <br><br>
We build Q network with three convolutional layers with same padding followed by a fully connected layer. 

In [51]:
tf.reset_default_graph()

def q_network(X, name_scope):
    
    # Initialize layers
    initializer = tf.contrib.layers.variance_scaling_initializer()

    with tf.variable_scope(name_scope) as scope: 

        # initialize the convolutional layers
        layer_1 = conv2d(X, num_outputs=32, kernel_size=(8,8), stride=4, padding='SAME', weights_initializer=initializer) 
        tf.summary.histogram('layer_1',layer_1)
        
        layer_2 = conv2d(layer_1, num_outputs=64, kernel_size=(4,4), stride=2, padding='SAME', weights_initializer=initializer)
        tf.summary.histogram('layer_2',layer_2)
        
        layer_3 = conv2d(layer_2, num_outputs=64, kernel_size=(3,3), stride=1, padding='SAME', weights_initializer=initializer)
        tf.summary.histogram('layer_3',layer_3)
        
        # Flatten the result of layer_3 before feeding to the fully connected layer
        flat = flatten(layer_3)

        fc = fully_connected(flat, num_outputs=128, weights_initializer=initializer)
        tf.summary.histogram('fc',fc)
        
        output = fully_connected(fc, num_outputs=n_outputs, activation_fn=None, weights_initializer=initializer)
        print(output.name)
        tf.summary.histogram('output',output)
        

        # Vars will store the parameters of the network such as weights
        vars = {v.name[len(scope.name):]: v for v in tf.get_collection(key=tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name)} 
        return vars, output

Next we define a function called epsilon_greedy for performing epsilon greedy policy. In epsilon greedy policy we either select the best action with probability 1 - epsilon or a random action with
probability epsilon.

We use decaying epsilon greedy policy where value of epsilon will be decaying over time as we don't want to explore
forever. So over time our policy will be exploiting only good actions.

In [52]:
epsilon = 0.5
eps_min = 0.05
eps_max = 1.0
eps_decay_steps = 1000000

def epsilon_greedy(action, step):
    p = np.random.random(1).squeeze()
    epsilon = max(eps_min, eps_max - (eps_max-eps_min) * step/eps_decay_steps)
    if step % 1000 == 0:
        print(epsilon)
    if np.random.rand() < epsilon:
        return np.random.randint(n_outputs)
    else:
        return action

Now, we initialize our experience replay buffer of length 20000 which holds the experience.

We store all the agent's experience i.e (state, action, rewards) in the experience replay buffer
and  we sample from this minibatch of experience for training the network.

In [53]:
buffer_len = 20000
exp_buffer = deque(maxlen=buffer_len)

Next, we define a function called sample_memories for sampling experiences from the memory. Batch size is the number of experience sampled
from the memory.


In [54]:
def sample_memories(batch_size):
    perm_batch = np.random.permutation(len(exp_buffer))[:batch_size]
    mem = np.array(exp_buffer)[perm_batch]
    return mem[:,0], mem[:,1], mem[:,2], mem[:,3], mem[:,4]

Now we define our network hyperparameters,

In [55]:
num_episodes = 4000
batch_size = 32
input_shape = (None, 105, 80, 1)
learning_rate = 0.00025
X_shape = (None, 105, 80, 1)
discount_factor = 0.99

global_step = 0
copy_steps = 100
steps_train = 4
start_steps = 2000

 Now let us build our primary and target Q network

In [56]:
logdir = 'logs'

# Now we define the placeholder for our input i.e game state
X = tf.placeholder(tf.float32, shape=X_shape, name="X")

# we define a boolean called in_training_model to toggle the training
in_training_mode = tf.placeholder(tf.bool, name="in_training_mode")

# we build our Q network, which takes the input X and generates Q values for all the actions in the state
mainQ, mainQ_outputs = q_network(X, 'mainQ')

# similarly we build our target Q network
targetQ, targetQ_outputs = q_network(X, 'targetQ')

# define the placeholder for our action values
X_action = tf.placeholder(tf.int32, shape=(None,))
Q_action = tf.reduce_sum(targetQ_outputs * tf.one_hot(X_action, n_outputs), axis=-1, keep_dims=True)

# Copy the primary Q network parameters to the target Q network
copy_op = [tf.assign(main_name, targetQ[var_name]) for var_name, main_name in mainQ.items()]
copy_target_to_main = tf.group(*copy_op)

# Compute and optimize loss using gradient descent optimizer
# define a placeholder for our output i.e action
y = tf.placeholder(tf.float32, shape=(None,1))

# now we calculate the loss which is the difference between actual value and predicted value
loss = tf.reduce_mean(tf.square(y - Q_action))

# we use adam optimizer for minimizing the loss
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()

loss_summary = tf.summary.scalar('LOSS', loss)
merge_summary = tf.summary.merge_all()
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

mainQ/fully_connected_1/BiasAdd:0
targetQ/fully_connected_1/BiasAdd:0


 Now we start the tensorflow session and run the model,

In [57]:
with tf.Session() as sess:
    saver = tf.train.Saver()
    init.run()
    rewards = []
    # for each episode
    for i in range(num_episodes):
        done = False
        obs = env.reset()
        epoch = 0
        episodic_reward = 0
        actions_counter = Counter() 
        episodic_loss = []

        # while the state is not the terminal state
        while not done:

#             env.render()
        
            # get the preprocessed game screen
            obs = preprocess_observation(obs)
#             plt.imshow(img)

            # feed the game screen and get the Q values for each action
            actions = mainQ_outputs.eval(feed_dict={X:[obs], in_training_mode:False})

            # get the action
            action = np.argmax(actions, axis=-1)
            actions_counter[str(action)] += 1 

            # select the action using epsilon greedy policy
            action = epsilon_greedy(action, global_step)
            
            # now perform the action and move to the next state, next_obs, receive reward
            next_obs, reward, done, _ = env.step(action)

            # Store this transistion as an experience in the replay buffer
            exp_buffer.append([obs, action, preprocess_observation(next_obs), reward, done])
            
            # After certain steps, we train our Q network with samples from the experience replay buffer
            if global_step % steps_train == 0 and global_step > start_steps:
                
                # sample experience
                o_obs, o_act, o_next_obs, o_rew, o_done = sample_memories(batch_size)

                # states
                o_obs = [x for x in o_obs]

                # next states
                o_next_obs = [x for x in o_next_obs]

                # next actions
                next_act = mainQ_outputs.eval(feed_dict={X:o_next_obs, in_training_mode:False})


                # reward
                y_batch = o_rew + discount_factor * np.max(next_act, axis=-1) * (1-o_done) 

                # merge all summaries and write to the file
                mrg_summary = merge_summary.eval(feed_dict={X:o_obs, y:np.expand_dims(y_batch, axis=-1), X_action:o_act, in_training_mode:False})
                file_writer.add_summary(mrg_summary, global_step)

                # now we train the network and calculate loss
                train_loss, _ = sess.run([loss, training_op], feed_dict={X:o_obs, y:np.expand_dims(y_batch, axis=-1), X_action:o_act, in_training_mode:True})
                episodic_loss.append(train_loss)
            
            # after some interval we copy our main Q network weights to target Q network
            if (global_step+1) % copy_steps == 0 and global_step > start_steps:
                copy_target_to_main.run()
                
            obs = next_obs
            epoch += 1
            global_step += 1
            episodic_reward += reward
        print('Episode', i,'Epoch', epoch, 'Reward', episodic_reward, 'Global Step', global_step)
        rewards.append(episodic_reward)
        if i % 50 == 0:
            saver.save(sess, './models/trained-model-'+ str(i))
            pickle.dump(rewards, open('./rewards/trained-model-'+str(i)+'.pck', 'wb+'))
    saver.save(sess, './trained-model')
    pickle.dump(rewards, open('./rewards.pck', 'wb+'))
    env.close()
    
    

1.0
Episode 0 Epoch 129 Reward 0.0 Global Step 129
Episode 1 Epoch 138 Reward 0.0 Global Step 267
Episode 2 Epoch 134 Reward 0.0 Global Step 401
Episode 3 Epoch 179 Reward 1.0 Global Step 580
Episode 4 Epoch 142 Reward 0.0 Global Step 722
Episode 5 Epoch 129 Reward 0.0 Global Step 851
0.99905
Episode 6 Epoch 207 Reward 2.0 Global Step 1058
Episode 7 Epoch 216 Reward 2.0 Global Step 1274
Episode 8 Epoch 148 Reward 0.0 Global Step 1422
Episode 9 Epoch 128 Reward 0.0 Global Step 1550
Episode 10 Epoch 205 Reward 2.0 Global Step 1755
Episode 11 Epoch 134 Reward 0.0 Global Step 1889
0.9981
Episode 12 Epoch 190 Reward 2.0 Global Step 2079
Episode 13 Epoch 155 Reward 1.0 Global Step 2234
Episode 14 Epoch 161 Reward 1.0 Global Step 2395
Episode 15 Epoch 290 Reward 4.0 Global Step 2685
Episode 16 Epoch 244 Reward 3.0 Global Step 2929
0.99715
Episode 17 Epoch 187 Reward 1.0 Global Step 3116
Episode 18 Epoch 133 Reward 0.0 Global Step 3249
Episode 19 Epoch 204 Reward 2.0 Global Step 3453
Episode 2

Episode 160 Epoch 127 Reward 0.0 Global Step 27907
0.9734
Episode 161 Epoch 142 Reward 0.0 Global Step 28049
Episode 162 Epoch 215 Reward 2.0 Global Step 28264
Episode 163 Epoch 155 Reward 1.0 Global Step 28419
Episode 164 Epoch 168 Reward 1.0 Global Step 28587
Episode 165 Epoch 160 Reward 1.0 Global Step 28747
Episode 166 Epoch 192 Reward 1.0 Global Step 28939
0.97245
Episode 167 Epoch 275 Reward 4.0 Global Step 29214
Episode 168 Epoch 203 Reward 2.0 Global Step 29417
Episode 169 Epoch 138 Reward 0.0 Global Step 29555
Episode 170 Epoch 275 Reward 5.0 Global Step 29830
Episode 171 Epoch 134 Reward 0.0 Global Step 29964
0.9715
Episode 172 Epoch 126 Reward 0.0 Global Step 30090
Episode 173 Epoch 133 Reward 0.0 Global Step 30223
Episode 174 Epoch 198 Reward 2.0 Global Step 30421
Episode 175 Epoch 170 Reward 1.0 Global Step 30591
Episode 176 Epoch 212 Reward 2.0 Global Step 30803
Episode 177 Epoch 152 Reward 1.0 Global Step 30955
0.97055
Episode 178 Epoch 213 Reward 2.0 Global Step 31168
E

Episode 317 Epoch 143 Reward 0.0 Global Step 56249
Episode 318 Epoch 201 Reward 2.0 Global Step 56450
Episode 319 Epoch 179 Reward 1.0 Global Step 56629
Episode 320 Epoch 230 Reward 2.0 Global Step 56859
0.94585
Episode 321 Epoch 177 Reward 1.0 Global Step 57036
Episode 322 Epoch 237 Reward 2.0 Global Step 57273
Episode 323 Epoch 186 Reward 2.0 Global Step 57459
Episode 324 Epoch 229 Reward 3.0 Global Step 57688
Episode 325 Epoch 128 Reward 0.0 Global Step 57816
0.9449
Episode 326 Epoch 215 Reward 2.0 Global Step 58031
Episode 327 Epoch 158 Reward 1.0 Global Step 58189
Episode 328 Epoch 141 Reward 0.0 Global Step 58330
Episode 329 Epoch 190 Reward 1.0 Global Step 58520
Episode 330 Epoch 174 Reward 1.0 Global Step 58694
Episode 331 Epoch 130 Reward 0.0 Global Step 58824
0.94395
Episode 332 Epoch 204 Reward 2.0 Global Step 59028
Episode 333 Epoch 129 Reward 0.0 Global Step 59157
Episode 334 Epoch 131 Reward 0.0 Global Step 59288
Episode 335 Epoch 132 Reward 0.0 Global Step 59420
Episode 

Episode 474 Epoch 159 Reward 1.0 Global Step 85331
Episode 475 Epoch 123 Reward 0.0 Global Step 85454
Episode 476 Epoch 226 Reward 2.0 Global Step 85680
Episode 477 Epoch 127 Reward 0.0 Global Step 85807
Episode 478 Epoch 142 Reward 0.0 Global Step 85949
0.9183
Episode 479 Epoch 155 Reward 1.0 Global Step 86104
Episode 480 Epoch 226 Reward 2.0 Global Step 86330
Episode 481 Epoch 271 Reward 3.0 Global Step 86601
Episode 482 Epoch 204 Reward 2.0 Global Step 86805
Episode 483 Epoch 191 Reward 2.0 Global Step 86996
0.91735
Episode 484 Epoch 327 Reward 5.0 Global Step 87323
Episode 485 Epoch 127 Reward 0.0 Global Step 87450
Episode 486 Epoch 212 Reward 2.0 Global Step 87662
Episode 487 Epoch 135 Reward 0.0 Global Step 87797
Episode 488 Epoch 176 Reward 1.0 Global Step 87973
0.9164
Episode 489 Epoch 205 Reward 2.0 Global Step 88178
Episode 490 Epoch 165 Reward 1.0 Global Step 88343
Episode 491 Epoch 162 Reward 1.0 Global Step 88505
Episode 492 Epoch 162 Reward 1.0 Global Step 88667
Episode 4

Episode 629 Epoch 202 Reward 2.0 Global Step 115307
Episode 630 Epoch 177 Reward 1.0 Global Step 115484
Episode 631 Epoch 223 Reward 3.0 Global Step 115707
Episode 632 Epoch 184 Reward 2.0 Global Step 115891
0.8898
Episode 633 Epoch 274 Reward 3.0 Global Step 116165
Episode 634 Epoch 164 Reward 1.0 Global Step 116329
Episode 635 Epoch 131 Reward 0.0 Global Step 116460
Episode 636 Epoch 166 Reward 1.0 Global Step 116626
Episode 637 Epoch 131 Reward 0.0 Global Step 116757
0.88885
Episode 638 Epoch 255 Reward 3.0 Global Step 117012
Episode 639 Epoch 180 Reward 1.0 Global Step 117192
Episode 640 Epoch 187 Reward 1.0 Global Step 117379
Episode 641 Epoch 260 Reward 4.0 Global Step 117639
Episode 642 Epoch 244 Reward 3.0 Global Step 117883
0.8879
Episode 643 Epoch 301 Reward 4.0 Global Step 118184
Episode 644 Epoch 373 Reward 5.0 Global Step 118557
Episode 645 Epoch 134 Reward 0.0 Global Step 118691
Episode 646 Epoch 224 Reward 2.0 Global Step 118915
0.88695
Episode 647 Epoch 182 Reward 2.0 G

Episode 782 Epoch 200 Reward 2.0 Global Step 146519
Episode 783 Epoch 177 Reward 1.0 Global Step 146696
0.86035
Episode 784 Epoch 361 Reward 5.0 Global Step 147057
Episode 785 Epoch 212 Reward 2.0 Global Step 147269
Episode 786 Epoch 237 Reward 3.0 Global Step 147506
Episode 787 Epoch 177 Reward 1.0 Global Step 147683
Episode 788 Epoch 292 Reward 4.0 Global Step 147975
0.8593999999999999
Episode 789 Epoch 136 Reward 0.0 Global Step 148111
Episode 790 Epoch 232 Reward 3.0 Global Step 148343
Episode 791 Epoch 229 Reward 2.0 Global Step 148572
Episode 792 Epoch 127 Reward 0.0 Global Step 148699
Episode 793 Epoch 130 Reward 0.0 Global Step 148829
0.8584499999999999
Episode 794 Epoch 232 Reward 3.0 Global Step 149061
Episode 795 Epoch 155 Reward 1.0 Global Step 149216
Episode 796 Epoch 165 Reward 1.0 Global Step 149381
Episode 797 Epoch 243 Reward 3.0 Global Step 149624
Episode 798 Epoch 179 Reward 1.0 Global Step 149803
Episode 799 Epoch 193 Reward 2.0 Global Step 149996
0.8575
Episode 800

0.8309
Episode 935 Epoch 186 Reward 2.0 Global Step 178072
Episode 936 Epoch 193 Reward 2.0 Global Step 178265
Episode 937 Epoch 164 Reward 1.0 Global Step 178429
Episode 938 Epoch 190 Reward 2.0 Global Step 178619
Episode 939 Epoch 178 Reward 1.0 Global Step 178797
Episode 940 Epoch 171 Reward 1.0 Global Step 178968
0.82995
Episode 941 Epoch 330 Reward 5.0 Global Step 179298
Episode 942 Epoch 208 Reward 2.0 Global Step 179506
Episode 943 Epoch 127 Reward 0.0 Global Step 179633
Episode 944 Epoch 268 Reward 4.0 Global Step 179901
0.829
Episode 945 Epoch 232 Reward 3.0 Global Step 180133
Episode 946 Epoch 155 Reward 1.0 Global Step 180288
Episode 947 Epoch 205 Reward 2.0 Global Step 180493
Episode 948 Epoch 187 Reward 2.0 Global Step 180680
Episode 949 Epoch 154 Reward 1.0 Global Step 180834
Episode 950 Epoch 131 Reward 0.0 Global Step 180965
0.82805
Episode 951 Epoch 187 Reward 2.0 Global Step 181152
Episode 952 Epoch 157 Reward 1.0 Global Step 181309
Episode 953 Epoch 341 Reward 9.0 Gl

0.8005
Episode 1086 Epoch 202 Reward 2.0 Global Step 210161
Episode 1087 Epoch 127 Reward 0.0 Global Step 210288
Episode 1088 Epoch 125 Reward 0.0 Global Step 210413
Episode 1089 Epoch 189 Reward 2.0 Global Step 210602
Episode 1090 Epoch 189 Reward 2.0 Global Step 210791
0.79955
Episode 1091 Epoch 231 Reward 3.0 Global Step 211022
Episode 1092 Epoch 250 Reward 3.0 Global Step 211272
Episode 1093 Epoch 157 Reward 1.0 Global Step 211429
Episode 1094 Epoch 171 Reward 1.0 Global Step 211600
Episode 1095 Epoch 244 Reward 3.0 Global Step 211844
0.7986
Episode 1096 Epoch 293 Reward 4.0 Global Step 212137
Episode 1097 Epoch 261 Reward 3.0 Global Step 212398
Episode 1098 Epoch 332 Reward 5.0 Global Step 212730
Episode 1099 Epoch 240 Reward 3.0 Global Step 212970
0.79765
Episode 1100 Epoch 232 Reward 3.0 Global Step 213202
Episode 1101 Epoch 235 Reward 3.0 Global Step 213437
Episode 1102 Epoch 188 Reward 2.0 Global Step 213625
Episode 1103 Epoch 295 Reward 5.0 Global Step 213920
0.7967
Episode 1

Episode 1236 Epoch 214 Reward 3.0 Global Step 242585
Episode 1237 Epoch 188 Reward 2.0 Global Step 242773
0.76915
Episode 1238 Epoch 228 Reward 2.0 Global Step 243001
Episode 1239 Epoch 126 Reward 0.0 Global Step 243127
Episode 1240 Epoch 351 Reward 6.0 Global Step 243478
Episode 1241 Epoch 157 Reward 1.0 Global Step 243635
Episode 1242 Epoch 203 Reward 2.0 Global Step 243838
0.7682
Episode 1243 Epoch 254 Reward 3.0 Global Step 244092
Episode 1244 Epoch 160 Reward 1.0 Global Step 244252
Episode 1245 Epoch 246 Reward 3.0 Global Step 244498
Episode 1246 Epoch 313 Reward 5.0 Global Step 244811
0.76725
Episode 1247 Epoch 191 Reward 2.0 Global Step 245002
Episode 1248 Epoch 242 Reward 3.0 Global Step 245244
Episode 1249 Epoch 276 Reward 3.0 Global Step 245520
Episode 1250 Epoch 204 Reward 2.0 Global Step 245724
Episode 1251 Epoch 164 Reward 1.0 Global Step 245888
0.7663
Episode 1252 Epoch 157 Reward 1.0 Global Step 246045
Episode 1253 Epoch 192 Reward 2.0 Global Step 246237
Episode 1254 Epo

0.7378
Episode 1385 Epoch 182 Reward 2.0 Global Step 276090
Episode 1386 Epoch 262 Reward 4.0 Global Step 276352
Episode 1387 Epoch 249 Reward 3.0 Global Step 276601
Episode 1388 Epoch 171 Reward 1.0 Global Step 276772
Episode 1389 Epoch 160 Reward 1.0 Global Step 276932
0.73685
Episode 1390 Epoch 230 Reward 3.0 Global Step 277162
Episode 1391 Epoch 295 Reward 5.0 Global Step 277457
Episode 1392 Epoch 260 Reward 4.0 Global Step 277717
Episode 1393 Epoch 201 Reward 2.0 Global Step 277918
0.7359
Episode 1394 Epoch 213 Reward 3.0 Global Step 278131
Episode 1395 Epoch 274 Reward 3.0 Global Step 278405
Episode 1396 Epoch 489 Reward 8.0 Global Step 278894
0.73495
Episode 1397 Epoch 220 Reward 3.0 Global Step 279114
Episode 1398 Epoch 137 Reward 0.0 Global Step 279251
Episode 1399 Epoch 157 Reward 1.0 Global Step 279408
Episode 1400 Epoch 154 Reward 1.0 Global Step 279562
Episode 1401 Epoch 213 Reward 3.0 Global Step 279775
0.734
Episode 1402 Epoch 232 Reward 3.0 Global Step 280007
Episode 14

Episode 1533 Epoch 135 Reward 0.0 Global Step 310620
Episode 1534 Epoch 126 Reward 0.0 Global Step 310746
0.70455
Episode 1535 Epoch 287 Reward 4.0 Global Step 311033
Episode 1536 Epoch 181 Reward 1.0 Global Step 311214
Episode 1537 Epoch 231 Reward 3.0 Global Step 311445
Episode 1538 Epoch 206 Reward 2.0 Global Step 311651
Episode 1539 Epoch 240 Reward 3.0 Global Step 311891
0.7036
Episode 1540 Epoch 206 Reward 2.0 Global Step 312097
Episode 1541 Epoch 249 Reward 3.0 Global Step 312346
Episode 1542 Epoch 320 Reward 5.0 Global Step 312666
Episode 1543 Epoch 156 Reward 1.0 Global Step 312822
0.70265
Episode 1544 Epoch 326 Reward 5.0 Global Step 313148
Episode 1545 Epoch 217 Reward 3.0 Global Step 313365
Episode 1546 Epoch 158 Reward 1.0 Global Step 313523
Episode 1547 Epoch 201 Reward 2.0 Global Step 313724
Episode 1548 Epoch 231 Reward 3.0 Global Step 313955
0.7017
Episode 1549 Epoch 334 Reward 6.0 Global Step 314289
Episode 1550 Epoch 352 Reward 6.0 Global Step 314641
Episode 1551 Epo

Episode 1681 Epoch 237 Reward 3.0 Global Step 348506
Episode 1682 Epoch 211 Reward 3.0 Global Step 348717
Episode 1683 Epoch 128 Reward 0.0 Global Step 348845
0.66845
Episode 1684 Epoch 214 Reward 3.0 Global Step 349059
Episode 1685 Epoch 284 Reward 4.0 Global Step 349343
Episode 1686 Epoch 276 Reward 4.0 Global Step 349619
Episode 1687 Epoch 199 Reward 2.0 Global Step 349818
Episode 1688 Epoch 182 Reward 2.0 Global Step 350000
0.6675
Episode 1689 Epoch 199 Reward 2.0 Global Step 350199
Episode 1690 Epoch 209 Reward 2.0 Global Step 350408
Episode 1691 Epoch 280 Reward 4.0 Global Step 350688
0.66655
Episode 1692 Epoch 345 Reward 6.0 Global Step 351033
Episode 1693 Epoch 292 Reward 4.0 Global Step 351325
Episode 1694 Epoch 231 Reward 3.0 Global Step 351556
Episode 1695 Epoch 185 Reward 2.0 Global Step 351741
Episode 1696 Epoch 157 Reward 1.0 Global Step 351898
0.6656
Episode 1697 Epoch 262 Reward 4.0 Global Step 352160
Episode 1698 Epoch 183 Reward 2.0 Global Step 352343
Episode 1699 Epo

Episode 1829 Epoch 201 Reward 2.0 Global Step 383903
0.6352
Episode 1830 Epoch 299 Reward 5.0 Global Step 384202
Episode 1831 Epoch 230 Reward 3.0 Global Step 384432
Episode 1832 Epoch 253 Reward 3.0 Global Step 384685
Episode 1833 Epoch 251 Reward 3.0 Global Step 384936
0.63425
Episode 1834 Epoch 345 Reward 9.0 Global Step 385281
Episode 1835 Epoch 413 Reward 7.0 Global Step 385694
Episode 1836 Epoch 182 Reward 2.0 Global Step 385876
0.6333
Episode 1837 Epoch 184 Reward 2.0 Global Step 386060
Episode 1838 Epoch 289 Reward 5.0 Global Step 386349
Episode 1839 Epoch 260 Reward 4.0 Global Step 386609
Episode 1840 Epoch 322 Reward 4.0 Global Step 386931
0.63235
Episode 1841 Epoch 263 Reward 4.0 Global Step 387194
Episode 1842 Epoch 188 Reward 2.0 Global Step 387382
Episode 1843 Epoch 172 Reward 1.0 Global Step 387554
Episode 1844 Epoch 261 Reward 4.0 Global Step 387815
0.6314
Episode 1845 Epoch 339 Reward 4.0 Global Step 388154
Episode 1846 Epoch 352 Reward 6.0 Global Step 388506
Episode 1

0.59815
Episode 1977 Epoch 347 Reward 5.0 Global Step 423281
Episode 1978 Epoch 258 Reward 7.0 Global Step 423539
Episode 1979 Epoch 333 Reward 5.0 Global Step 423872
0.5972
Episode 1980 Epoch 297 Reward 4.0 Global Step 424169
Episode 1981 Epoch 281 Reward 4.0 Global Step 424450
Episode 1982 Epoch 323 Reward 5.0 Global Step 424773
Episode 1983 Epoch 172 Reward 1.0 Global Step 424945
0.59625
Episode 1984 Epoch 255 Reward 3.0 Global Step 425200
Episode 1985 Epoch 249 Reward 4.0 Global Step 425449
Episode 1986 Epoch 190 Reward 2.0 Global Step 425639
Episode 1987 Epoch 132 Reward 0.0 Global Step 425771
Episode 1988 Epoch 155 Reward 1.0 Global Step 425926
0.5952999999999999
Episode 1989 Epoch 303 Reward 4.0 Global Step 426229
Episode 1990 Epoch 230 Reward 2.0 Global Step 426459
Episode 1991 Epoch 216 Reward 3.0 Global Step 426675
Episode 1992 Epoch 257 Reward 4.0 Global Step 426932
0.5943499999999999
Episode 1993 Epoch 184 Reward 2.0 Global Step 427116
Episode 1994 Epoch 232 Reward 3.0 Glob

Episode 2124 Epoch 229 Reward 3.0 Global Step 462145
Episode 2125 Epoch 183 Reward 2.0 Global Step 462328
Episode 2126 Epoch 258 Reward 3.0 Global Step 462586
Episode 2127 Epoch 228 Reward 3.0 Global Step 462814
0.5601499999999999
Episode 2128 Epoch 208 Reward 2.0 Global Step 463022
Episode 2129 Epoch 298 Reward 4.0 Global Step 463320
Episode 2130 Epoch 397 Reward 10.0 Global Step 463717
Episode 2131 Epoch 245 Reward 4.0 Global Step 463962
0.5591999999999999
Episode 2132 Epoch 231 Reward 3.0 Global Step 464193
Episode 2133 Epoch 257 Reward 4.0 Global Step 464450
Episode 2134 Epoch 266 Reward 4.0 Global Step 464716
0.55825
Episode 2135 Epoch 331 Reward 5.0 Global Step 465047
Episode 2136 Epoch 265 Reward 4.0 Global Step 465312
Episode 2137 Epoch 310 Reward 5.0 Global Step 465622
Episode 2138 Epoch 301 Reward 4.0 Global Step 465923
0.5573
Episode 2139 Epoch 236 Reward 3.0 Global Step 466159
Episode 2140 Epoch 203 Reward 2.0 Global Step 466362
Episode 2141 Epoch 263 Reward 4.0 Global Step

Episode 2271 Epoch 315 Reward 8.0 Global Step 503580
Episode 2272 Epoch 375 Reward 6.0 Global Step 503955
0.5212
Episode 2273 Epoch 217 Reward 2.0 Global Step 504172
Episode 2274 Epoch 291 Reward 4.0 Global Step 504463
Episode 2275 Epoch 251 Reward 3.0 Global Step 504714
Episode 2276 Epoch 236 Reward 3.0 Global Step 504950
0.52025
Episode 2277 Epoch 425 Reward 7.0 Global Step 505375
Episode 2278 Epoch 184 Reward 2.0 Global Step 505559
Episode 2279 Epoch 329 Reward 6.0 Global Step 505888
0.5193
Episode 2280 Epoch 260 Reward 4.0 Global Step 506148
Episode 2281 Epoch 224 Reward 3.0 Global Step 506372
Episode 2282 Epoch 343 Reward 6.0 Global Step 506715
0.51835
Episode 2283 Epoch 360 Reward 9.0 Global Step 507075
Episode 2284 Epoch 359 Reward 6.0 Global Step 507434
Episode 2285 Epoch 348 Reward 6.0 Global Step 507782
0.5174000000000001
Episode 2286 Epoch 361 Reward 6.0 Global Step 508143
Episode 2287 Epoch 242 Reward 3.0 Global Step 508385
Episode 2288 Epoch 273 Reward 5.0 Global Step 5086

Episode 2416 Epoch 276 Reward 4.0 Global Step 546952
0.48035000000000005
Episode 2417 Epoch 350 Reward 6.0 Global Step 547302
Episode 2418 Epoch 337 Reward 6.0 Global Step 547639
0.47940000000000005
Episode 2419 Epoch 464 Reward 9.0 Global Step 548103
Episode 2420 Epoch 399 Reward 6.0 Global Step 548502
Episode 2421 Epoch 387 Reward 7.0 Global Step 548889
0.47845000000000004
Episode 2422 Epoch 380 Reward 6.0 Global Step 549269
Episode 2423 Epoch 188 Reward 2.0 Global Step 549457
Episode 2424 Epoch 309 Reward 5.0 Global Step 549766
0.47750000000000004
Episode 2425 Epoch 258 Reward 4.0 Global Step 550024
Episode 2426 Epoch 249 Reward 3.0 Global Step 550273
Episode 2427 Epoch 360 Reward 10.0 Global Step 550633
Episode 2428 Epoch 293 Reward 4.0 Global Step 550926
0.47655000000000003
Episode 2429 Epoch 275 Reward 3.0 Global Step 551201
Episode 2430 Epoch 234 Reward 3.0 Global Step 551435
Episode 2431 Epoch 328 Reward 9.0 Global Step 551763
0.4756
Episode 2432 Epoch 275 Reward 3.0 Global Ste

Episode 2560 Epoch 262 Reward 4.0 Global Step 590346
Episode 2561 Epoch 285 Reward 4.0 Global Step 590631
Episode 2562 Epoch 338 Reward 6.0 Global Step 590969
0.43855
Episode 2563 Epoch 353 Reward 6.0 Global Step 591322
Episode 2564 Epoch 325 Reward 6.0 Global Step 591647
0.4376
Episode 2565 Epoch 384 Reward 7.0 Global Step 592031
Episode 2566 Epoch 319 Reward 6.0 Global Step 592350
Episode 2567 Epoch 357 Reward 6.0 Global Step 592707
0.43665
Episode 2568 Epoch 297 Reward 5.0 Global Step 593004
Episode 2569 Epoch 306 Reward 5.0 Global Step 593310
Episode 2570 Epoch 356 Reward 7.0 Global Step 593666
Episode 2571 Epoch 230 Reward 3.0 Global Step 593896
0.4357
Episode 2572 Epoch 354 Reward 6.0 Global Step 594250
Episode 2573 Epoch 278 Reward 4.0 Global Step 594528
Episode 2574 Epoch 418 Reward 8.0 Global Step 594946
0.43474999999999997
Episode 2575 Epoch 497 Reward 13.0 Global Step 595443
Episode 2576 Epoch 259 Reward 4.0 Global Step 595702
0.43379999999999996
Episode 2577 Epoch 329 Rewar

Episode 2703 Epoch 211 Reward 3.0 Global Step 635525
Episode 2704 Epoch 274 Reward 4.0 Global Step 635799
0.39580000000000004
Episode 2705 Epoch 419 Reward 8.0 Global Step 636218
Episode 2706 Epoch 212 Reward 3.0 Global Step 636430
Episode 2707 Epoch 407 Reward 6.0 Global Step 636837
0.39485000000000003
Episode 2708 Epoch 309 Reward 5.0 Global Step 637146
Episode 2709 Epoch 291 Reward 4.0 Global Step 637437
Episode 2710 Epoch 332 Reward 6.0 Global Step 637769
Episode 2711 Epoch 215 Reward 3.0 Global Step 637984
0.39390000000000003
Episode 2712 Epoch 328 Reward 9.0 Global Step 638312
Episode 2713 Epoch 309 Reward 5.0 Global Step 638621
0.39295
Episode 2714 Epoch 483 Reward 16.0 Global Step 639104
Episode 2715 Epoch 439 Reward 14.0 Global Step 639543
Episode 2716 Epoch 312 Reward 6.0 Global Step 639855
0.392
Episode 2717 Epoch 323 Reward 9.0 Global Step 640178
Episode 2718 Epoch 298 Reward 5.0 Global Step 640476
Episode 2719 Epoch 286 Reward 4.0 Global Step 640762
Episode 2720 Epoch 194 

Episode 2847 Epoch 284 Reward 4.0 Global Step 681876
0.35209999999999997
Episode 2848 Epoch 218 Reward 3.0 Global Step 682094
Episode 2849 Epoch 309 Reward 5.0 Global Step 682403
Episode 2850 Epoch 473 Reward 12.0 Global Step 682876
0.35114999999999996
Episode 2851 Epoch 292 Reward 5.0 Global Step 683168
Episode 2852 Epoch 285 Reward 8.0 Global Step 683453
Episode 2853 Epoch 363 Reward 6.0 Global Step 683816
0.35019999999999996
Episode 2854 Epoch 412 Reward 8.0 Global Step 684228
Episode 2855 Epoch 319 Reward 5.0 Global Step 684547
Episode 2856 Epoch 243 Reward 4.0 Global Step 684790
0.34924999999999995
Episode 2857 Epoch 267 Reward 4.0 Global Step 685057
Episode 2858 Epoch 261 Reward 4.0 Global Step 685318
Episode 2859 Epoch 402 Reward 7.0 Global Step 685720
0.34830000000000005
Episode 2860 Epoch 312 Reward 5.0 Global Step 686032
Episode 2861 Epoch 180 Reward 2.0 Global Step 686212
Episode 2862 Epoch 201 Reward 2.0 Global Step 686413
Episode 2863 Epoch 441 Reward 8.0 Global Step 68685

0.30935
Episode 2989 Epoch 229 Reward 3.0 Global Step 727149
Episode 2990 Epoch 325 Reward 9.0 Global Step 727474
Episode 2991 Epoch 260 Reward 4.0 Global Step 727734
0.3084
Episode 2992 Epoch 322 Reward 4.0 Global Step 728056
Episode 2993 Epoch 214 Reward 3.0 Global Step 728270
Episode 2994 Epoch 217 Reward 3.0 Global Step 728487
Episode 2995 Epoch 255 Reward 7.0 Global Step 728742
0.30745
Episode 2996 Epoch 300 Reward 5.0 Global Step 729042
Episode 2997 Epoch 392 Reward 7.0 Global Step 729434
Episode 2998 Epoch 245 Reward 4.0 Global Step 729679
Episode 2999 Epoch 182 Reward 2.0 Global Step 729861
0.3065
Episode 3000 Epoch 225 Reward 3.0 Global Step 730086
Episode 3001 Epoch 356 Reward 6.0 Global Step 730442
Episode 3002 Epoch 182 Reward 2.0 Global Step 730624
Episode 3003 Epoch 181 Reward 2.0 Global Step 730805
0.30555
Episode 3004 Epoch 244 Reward 3.0 Global Step 731049
Episode 3005 Epoch 341 Reward 5.0 Global Step 731390
Episode 3006 Epoch 214 Reward 3.0 Global Step 731604
Episode 

Episode 3133 Epoch 356 Reward 6.0 Global Step 770820
0.26754999999999995
Episode 3134 Epoch 403 Reward 6.0 Global Step 771223
Episode 3135 Epoch 304 Reward 5.0 Global Step 771527
Episode 3136 Epoch 278 Reward 4.0 Global Step 771805
0.26659999999999995
Episode 3137 Epoch 261 Reward 4.0 Global Step 772066
Episode 3138 Epoch 366 Reward 10.0 Global Step 772432
Episode 3139 Epoch 446 Reward 9.0 Global Step 772878
0.26565000000000005
Episode 3140 Epoch 441 Reward 13.0 Global Step 773319
Episode 3141 Epoch 358 Reward 12.0 Global Step 773677
0.26470000000000005
Episode 3142 Epoch 399 Reward 13.0 Global Step 774076
Episode 3143 Epoch 325 Reward 5.0 Global Step 774401
Episode 3144 Epoch 320 Reward 6.0 Global Step 774721
0.26375000000000004
Episode 3145 Epoch 399 Reward 7.0 Global Step 775120
Episode 3146 Epoch 231 Reward 3.0 Global Step 775351
Episode 3147 Epoch 403 Reward 7.0 Global Step 775754
0.26280000000000003
Episode 3148 Epoch 302 Reward 5.0 Global Step 776056
Episode 3149 Epoch 405 Rewar

Episode 3274 Epoch 471 Reward 8.0 Global Step 817762
Episode 3275 Epoch 151 Reward 1.0 Global Step 817913
0.2229
Episode 3276 Epoch 519 Reward 12.0 Global Step 818432
Episode 3277 Epoch 215 Reward 3.0 Global Step 818647
Episode 3278 Epoch 282 Reward 4.0 Global Step 818929
0.22194999999999998
Episode 3279 Epoch 230 Reward 3.0 Global Step 819159
Episode 3280 Epoch 414 Reward 7.0 Global Step 819573
Episode 3281 Epoch 254 Reward 3.0 Global Step 819827
Episode 3282 Epoch 150 Reward 1.0 Global Step 819977
0.22099999999999997
Episode 3283 Epoch 271 Reward 7.0 Global Step 820248
Episode 3284 Epoch 348 Reward 4.0 Global Step 820596
Episode 3285 Epoch 229 Reward 3.0 Global Step 820825
0.22004999999999997
Episode 3286 Epoch 408 Reward 8.0 Global Step 821233
Episode 3287 Epoch 328 Reward 5.0 Global Step 821561
Episode 3288 Epoch 282 Reward 3.0 Global Step 821843
0.21909999999999996
Episode 3289 Epoch 310 Reward 8.0 Global Step 822153
Episode 3290 Epoch 304 Reward 4.0 Global Step 822457
Episode 329

0.18110000000000004
Episode 3414 Epoch 462 Reward 8.0 Global Step 862064
Episode 3415 Epoch 360 Reward 7.0 Global Step 862424
Episode 3416 Epoch 450 Reward 9.0 Global Step 862874
0.18015000000000003
Episode 3417 Epoch 378 Reward 7.0 Global Step 863252
Episode 3418 Epoch 431 Reward 8.0 Global Step 863683
0.17920000000000003
Episode 3419 Epoch 368 Reward 8.0 Global Step 864051
Episode 3420 Epoch 290 Reward 5.0 Global Step 864341
Episode 3421 Epoch 282 Reward 5.0 Global Step 864623
Episode 3422 Epoch 330 Reward 8.0 Global Step 864953
0.17825000000000002
Episode 3423 Epoch 307 Reward 5.0 Global Step 865260
Episode 3424 Epoch 308 Reward 5.0 Global Step 865568
Episode 3425 Epoch 251 Reward 7.0 Global Step 865819
0.1773
Episode 3426 Epoch 373 Reward 6.0 Global Step 866192
Episode 3427 Epoch 466 Reward 9.0 Global Step 866658
0.17635
Episode 3428 Epoch 437 Reward 11.0 Global Step 867095
Episode 3429 Epoch 323 Reward 5.0 Global Step 867418
Episode 3430 Epoch 315 Reward 5.0 Global Step 867733
0.1

Episode 3553 Epoch 353 Reward 9.0 Global Step 911814
0.13360000000000005
Episode 3554 Epoch 361 Reward 6.0 Global Step 912175
Episode 3555 Epoch 473 Reward 13.0 Global Step 912648
Episode 3556 Epoch 324 Reward 5.0 Global Step 912972
0.13265000000000005
Episode 3557 Epoch 364 Reward 12.0 Global Step 913336
Episode 3558 Epoch 377 Reward 8.0 Global Step 913713
0.13170000000000004
Episode 3559 Epoch 451 Reward 15.0 Global Step 914164
Episode 3560 Epoch 601 Reward 9.0 Global Step 914765
0.13075000000000003
Episode 3561 Epoch 353 Reward 12.0 Global Step 915118
Episode 3562 Epoch 350 Reward 10.0 Global Step 915468
Episode 3563 Epoch 296 Reward 5.0 Global Step 915764
0.12980000000000003
Episode 3564 Epoch 272 Reward 4.0 Global Step 916036
Episode 3565 Epoch 361 Reward 5.0 Global Step 916397
Episode 3566 Epoch 522 Reward 13.0 Global Step 916919
0.12885000000000002
Episode 3567 Epoch 209 Reward 3.0 Global Step 917128
Episode 3568 Epoch 248 Reward 4.0 Global Step 917376
Episode 3569 Epoch 282 Rew

0.08799999999999997
Episode 3691 Epoch 306 Reward 8.0 Global Step 960144
Episode 3692 Epoch 308 Reward 3.0 Global Step 960452
Episode 3693 Epoch 309 Reward 5.0 Global Step 960761
0.08704999999999996
Episode 3694 Epoch 300 Reward 4.0 Global Step 961061
Episode 3695 Epoch 395 Reward 7.0 Global Step 961456
Episode 3696 Epoch 341 Reward 6.0 Global Step 961797
0.08609999999999995
Episode 3697 Epoch 368 Reward 4.0 Global Step 962165
Episode 3698 Epoch 292 Reward 7.0 Global Step 962457
Episode 3699 Epoch 368 Reward 4.0 Global Step 962825
0.08514999999999995
Episode 3700 Epoch 291 Reward 5.0 Global Step 963116
Episode 3701 Epoch 227 Reward 3.0 Global Step 963343
Episode 3702 Epoch 358 Reward 5.0 Global Step 963701
0.08420000000000005
Episode 3703 Epoch 321 Reward 3.0 Global Step 964022
Episode 3704 Epoch 274 Reward 3.0 Global Step 964296
Episode 3705 Epoch 404 Reward 4.0 Global Step 964700
0.08325000000000005
Episode 3706 Epoch 367 Reward 9.0 Global Step 965067
Episode 3707 Epoch 336 Reward 6.

0.05
Episode 3829 Epoch 207 Reward 3.0 Global Step 1011082
Episode 3830 Epoch 388 Reward 10.0 Global Step 1011470
Episode 3831 Epoch 401 Reward 11.0 Global Step 1011871
0.05
Episode 3832 Epoch 509 Reward 6.0 Global Step 1012380
Episode 3833 Epoch 279 Reward 5.0 Global Step 1012659
0.05
Episode 3834 Epoch 388 Reward 7.0 Global Step 1013047
Episode 3835 Epoch 612 Reward 8.0 Global Step 1013659
0.05
Episode 3836 Epoch 493 Reward 3.0 Global Step 1014152
Episode 3837 Epoch 583 Reward 7.0 Global Step 1014735
0.05
Episode 3838 Epoch 633 Reward 2.0 Global Step 1015368
Episode 3839 Epoch 289 Reward 5.0 Global Step 1015657
0.05
Episode 3840 Epoch 351 Reward 2.0 Global Step 1016008
Episode 3841 Epoch 365 Reward 6.0 Global Step 1016373
Episode 3842 Epoch 348 Reward 6.0 Global Step 1016721
0.05
Episode 3843 Epoch 345 Reward 10.0 Global Step 1017066
Episode 3844 Epoch 366 Reward 2.0 Global Step 1017432
Episode 3845 Epoch 392 Reward 11.0 Global Step 1017824
0.05
Episode 3846 Epoch 354 Reward 10.0 Glo

0.05
Episode 3975 Epoch 511 Reward 5.0 Global Step 1074015
Episode 3976 Epoch 333 Reward 7.0 Global Step 1074348
0.05
Episode 3977 Epoch 654 Reward 11.0 Global Step 1075002
Episode 3978 Epoch 443 Reward 9.0 Global Step 1075445
Episode 3979 Epoch 501 Reward 12.0 Global Step 1075946
0.05
Episode 3980 Epoch 653 Reward 8.0 Global Step 1076599
Episode 3981 Epoch 364 Reward 8.0 Global Step 1076963
0.05
Episode 3982 Epoch 305 Reward 7.0 Global Step 1077268
Episode 3983 Epoch 453 Reward 8.0 Global Step 1077721
0.05
Episode 3984 Epoch 293 Reward 4.0 Global Step 1078014
Episode 3985 Epoch 577 Reward 7.0 Global Step 1078591
0.05
Episode 3986 Epoch 645 Reward 7.0 Global Step 1079236
Episode 3987 Epoch 375 Reward 8.0 Global Step 1079611
0.05
Episode 3988 Epoch 707 Reward 18.0 Global Step 1080318
Episode 3989 Epoch 418 Reward 7.0 Global Step 1080736
0.05
Episode 3990 Epoch 327 Reward 5.0 Global Step 1081063
Episode 3991 Epoch 304 Reward 5.0 Global Step 1081367
Episode 3992 Epoch 283 Reward 8.0 Globa

In [58]:
X.graph == tf.get_default_graph()

True