# Deep Reinforcement Learning for Breakout
Model that learns to play video game using a deep neural network and reinforcement learning (Q-leaning).

In [1]:
#Importing librairies
import numpy as np
from   gym import envs
import gym

#Keras
from keras.models     import Sequential
from keras.layers     import Dense, Convolution2D, MaxPooling2D, Dropout, Flatten
from keras.optimizers import SGD, RMSprop
from keras.utils      import np_utils

Using TensorFlow backend.


## Creating Game Environment

In [2]:
#Creating environment (with render)
env = gym.make('Breakout-v3')
print('Observation space : ' + str(env.observation_space))

#2d shape of image
imSize = env.reset().shape[:2]
imSize = [imSize[0], imSize[1],1] #Specify 3rd d

[2017-03-23 18:16:31,204] Making new env: Breakout-v3


Observation space : Box(210, 160, 3)


## Neural Network

In [3]:
#Convolutional neural network

def cnnModel(layerDim, poolSize, imSize, actFct, initW, dropProb):
    ''' Will build a generative deep NN model taking the following arguments:
        
        nUnits   : Vector of number of units per layer (will specify number of layers)
        actFct   : Activation function for layers
        initW    : How to initialize the weights of each layer
        dropProb : Dropout probability
        
        '''
    
    #Initializing our architecture as a feedforward network
    m = Sequential()

    for L in range(len(layerDim)):
        
        #Input dimension of current layer
        if not L:
            inputDim = imSize #If input layer
        else:
            inputDim = layerDim[L-1] 
        
        #CNN layer
        m.add(Convolution2D( layerDim[L][0], layerDim[L][1], layerDim[L][2], 
                             border_mode='same', input_shape = inputDim, 
                             init = initW, activation = actFct) )
        
        #Pooling layer
        m.add(MaxPooling2D(pool_size= poolSize, strides=(2,2), border_mode='same'))
            
            
        #Adding dropout if not input or last layer
        if (L > 0) and (L < len(layerDim)-1): 
            m.add(Dropout(dropProb))

    #Flattening for fully connected NN
    m.add(Flatten())
    m.add(Dropout(dropProb))
    
    return m

In [4]:
### CNN parameters

layerDim_cnn = [ [256,16,16] , [64,4,4] , [256,3,3] ]
poolSize     = [2,2]
acft_cnn     = 'relu'
initW        = 'normal'
dropProb_cnn =  0.5


#Building the CNN
cnn = cnnModel(layerDim_cnn, poolSize, imSize, acft_cnn, initW, dropProb_cnn)
cnn.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_1 (Convolution2D)  (None, 210, 160, 256) 65792       convolution2d_input_1[0][0]      
____________________________________________________________________________________________________
maxpooling2d_1 (MaxPooling2D)    (None, 105, 80, 256)  0           convolution2d_1[0][0]            
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)  (None, 105, 80, 64)   262208      maxpooling2d_1[0][0]             
____________________________________________________________________________________________________
maxpooling2d_2 (MaxPooling2D)    (None, 53, 40, 64)    0           convolution2d_2[0][0]            
___________________________________________________________________________________________