In [1]:
from keras import layers
from keras import models
from keras.optimizers import adam
import tensorflow as tf
import cv2

from DQN import Agent

import numpy as np
import gym

Using TensorFlow backend.


In [2]:
env_name = 'Breakout-v0'
env = gym.make(env_name)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

In [3]:
img_height = 84
img_width = 84
img_channels = 1

state_dim = [img_height,img_width,img_channels]

In [4]:
def preprocess(observation):
    observation = cv2.cvtColor(cv2.resize(observation, (84, 110)), cv2.COLOR_BGR2GRAY)
    observation = observation[26:110,:]
    ret, observation = cv2.threshold(observation,1,255,cv2.THRESH_BINARY)
    return np.reshape(observation,(84,84,1))


In [5]:
env.reset()
action = np.random.randint(low=0, high=nb_actions)
img, reward, end_episode, info = env.step(action=action)


a = np.array(preprocess(img))
print(a.shape)

(84, 84, 1)


In [6]:
action = np.random.randint(low=0, high=nb_actions)
img, reward, end_episode, info = env.step(action=action)
b = np.array(preprocess(img))

action = np.random.randint(low=0, high=nb_actions)
img, reward, end_episode, info = env.step(action=action)
c = np.array(preprocess(img))

state = np.dstack([a, b,c])
print(state.shape)


(84, 84, 3)


In [7]:
DQN_Agent = Agent(state_dim,nb_actions)

In [8]:
DQN_Agent.model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 84, 84, 1)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 42, 42, 64)   3200        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 42, 42, 64)   256         conv2d_1[0][0]                   
__________________________________________________________________________________________________
re_lu_1 (ReLU)                  (None, 42, 42, 64)   0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
max_poolin

In [9]:
batch_size = 32
n_episodes = 1000
done = False
for e in range(n_episodes): # iterate over new episodes of the game
    state = env.reset() # reset state at start of each new episode of the game
    state = preprocess(state)
    state.resize([1,84, 84,1])
    
    for time in range(5000):  # time represents a frame of the game; goal is to keep pole upright as long as possible up to range, e.g., 500 or 5000 timesteps
        env.render()
        action = DQN_Agent.act(state) # action is either 0 or 1 (move cart left or right); decide on one or other here
        next_state, reward, done, _ = env.step(action) # agent interacts with env, gets feedback; 4 state data points, e.g., pole angle, cart position        
        reward = reward if not done else -10 # reward +1 for each additional frame with pole upright        
        next_state = preprocess(next_state)
        next_state.resize([1,84, 84,1])
        DQN_Agent.remember(state, action, reward, next_state, done) # remember the previous timestep's state, actions, reward, etc.        
        state = next_state # set "current state" for upcoming iteration to the current next state        
        if done: # episode ends if agent drops pole or we reach timestep 5000
            print("episode: {}/{}, score: {}, e: {:.2}" # print the episode's score and agent's epsilon
                  .format(e, n_episodes, time, DQN_Agent.epsilon))
            break # exit loop
    if len(DQN_Agent.memory) > batch_size:
        DQN_Agent.replay(batch_size) # train the agent by replaying the experiences of the episode

episode: 0/1000, score: 236, e: 1.0
episode: 1/1000, score: 179, e: 0.99
episode: 2/1000, score: 305, e: 0.99
episode: 3/1000, score: 208, e: 0.99
episode: 4/1000, score: 178, e: 0.98
episode: 5/1000, score: 209, e: 0.98
episode: 6/1000, score: 177, e: 0.97
episode: 7/1000, score: 308, e: 0.97
episode: 8/1000, score: 252, e: 0.96
episode: 9/1000, score: 173, e: 0.96
episode: 10/1000, score: 217, e: 0.95
episode: 11/1000, score: 170, e: 0.95
episode: 12/1000, score: 220, e: 0.94
episode: 13/1000, score: 229, e: 0.94
episode: 14/1000, score: 277, e: 0.93
episode: 15/1000, score: 178, e: 0.93
episode: 16/1000, score: 200, e: 0.92
episode: 17/1000, score: 287, e: 0.92
episode: 18/1000, score: 208, e: 0.91
episode: 19/1000, score: 374, e: 0.91
episode: 20/1000, score: 260, e: 0.9
episode: 21/1000, score: 180, e: 0.9
episode: 22/1000, score: 211, e: 0.9
episode: 23/1000, score: 292, e: 0.89
episode: 24/1000, score: 215, e: 0.89
episode: 25/1000, score: 185, e: 0.88
episode: 26/1000, score: 4

episode: 214/1000, score: 294, e: 0.34
episode: 215/1000, score: 449, e: 0.34
episode: 216/1000, score: 334, e: 0.34
episode: 217/1000, score: 380, e: 0.34
episode: 218/1000, score: 225, e: 0.34
episode: 219/1000, score: 163, e: 0.33
episode: 220/1000, score: 364, e: 0.33
episode: 221/1000, score: 268, e: 0.33
episode: 222/1000, score: 287, e: 0.33
episode: 223/1000, score: 162, e: 0.33
episode: 224/1000, score: 165, e: 0.33
episode: 225/1000, score: 197, e: 0.32
episode: 226/1000, score: 302, e: 0.32
episode: 227/1000, score: 159, e: 0.32
episode: 228/1000, score: 255, e: 0.32
episode: 229/1000, score: 425, e: 0.32
episode: 230/1000, score: 517, e: 0.32
episode: 231/1000, score: 254, e: 0.31
episode: 232/1000, score: 269, e: 0.31
episode: 233/1000, score: 210, e: 0.31
episode: 234/1000, score: 307, e: 0.31
episode: 235/1000, score: 213, e: 0.31
episode: 236/1000, score: 261, e: 0.31
episode: 237/1000, score: 312, e: 0.3
episode: 238/1000, score: 156, e: 0.3
episode: 239/1000, score: 1

episode: 425/1000, score: 167, e: 0.12
episode: 426/1000, score: 221, e: 0.12
episode: 427/1000, score: 159, e: 0.12
episode: 428/1000, score: 323, e: 0.12
episode: 429/1000, score: 372, e: 0.12
episode: 430/1000, score: 587, e: 0.12
episode: 431/1000, score: 460, e: 0.12
episode: 432/1000, score: 236, e: 0.11
episode: 433/1000, score: 164, e: 0.11
episode: 434/1000, score: 379, e: 0.11
episode: 435/1000, score: 467, e: 0.11
episode: 436/1000, score: 159, e: 0.11
episode: 437/1000, score: 161, e: 0.11
episode: 438/1000, score: 159, e: 0.11
episode: 439/1000, score: 156, e: 0.11
episode: 440/1000, score: 471, e: 0.11
episode: 441/1000, score: 375, e: 0.11
episode: 442/1000, score: 274, e: 0.11
episode: 443/1000, score: 626, e: 0.11
episode: 444/1000, score: 538, e: 0.11
episode: 445/1000, score: 153, e: 0.11
episode: 446/1000, score: 266, e: 0.11
episode: 447/1000, score: 263, e: 0.11
episode: 448/1000, score: 309, e: 0.11
episode: 449/1000, score: 505, e: 0.11
episode: 450/1000, score:

episode: 632/1000, score: 163, e: 0.042
episode: 633/1000, score: 161, e: 0.042
episode: 634/1000, score: 532, e: 0.042
episode: 635/1000, score: 823, e: 0.041
episode: 636/1000, score: 287, e: 0.041
episode: 637/1000, score: 655, e: 0.041
episode: 638/1000, score: 754, e: 0.041
episode: 639/1000, score: 165, e: 0.041
episode: 640/1000, score: 260, e: 0.04
episode: 641/1000, score: 159, e: 0.04
episode: 642/1000, score: 241, e: 0.04
episode: 643/1000, score: 899, e: 0.04
episode: 644/1000, score: 741, e: 0.04
episode: 645/1000, score: 697, e: 0.039
episode: 646/1000, score: 973, e: 0.039
episode: 647/1000, score: 150, e: 0.039
episode: 648/1000, score: 262, e: 0.039
episode: 649/1000, score: 1117, e: 0.039
episode: 650/1000, score: 163, e: 0.038
episode: 651/1000, score: 159, e: 0.038
episode: 652/1000, score: 163, e: 0.038
episode: 653/1000, score: 938, e: 0.038
episode: 654/1000, score: 334, e: 0.038
episode: 655/1000, score: 410, e: 0.038


KeyboardInterrupt: 

In [None]:
state.resize([1,84, 84,1])
DQN_Agent.model.predict(state)


In [None]:
next_state, reward, done, _ = env.step(action)
next_state=preprocess(next_state)
next_state.shape