### Gridworld game

In [1]:
import numpy as np

def randPair(s,e):
    return np.random.randint(s,e), np.random.randint(s,e)

#find an array in the depth, dimension of the grid
def findLoc(state, obj):
    for i in range(0,4):
        for j in range(0,4):
            if (state[i,j] == obj).all():
                return i, j
            
#initialize the stationary grid, all item are placed deterministically
def initGrid():
    state = np.zeros((4,4,4))
    #place player
    state[0,1] = np.array([0,0,0,1])
    #place wall
    state[2,2] = np.array([0,0,1,0])
    #place pit
    state[1,1] = np.array([0,1,0,0])
    #place goal 
    state[3,3] = np.array([1,0,0,0])
    
    return state

#initialize player at random location but pit, wall and goal should be stationary
def initGridPlayer():
    state = np.zeros((4,4,4))
    #place player
    state[randPair(0,4)] = np.array([0,0,0,1])
    #place wall
    state[2,2] = np.array([0,0,1,0])
    #place pit
    state[1,1] = np.array([0,1,0,0])
    #place goal
    state[3,3] = np.array([1,0,0,0])
    
    a = findLoc(state, np.array([0,0,0,1]))
    w = findLoc(state, np.array([0,0,1,0]))
    p = findLoc(state, np.array([0,1,0,0]))
    g = findLoc(state, np.array([1,0,0,0]))
    
    if (not a or not w or not p or not g):
        return initGridPlayer()
    return state

#initialize grid so that player, goal, pit, wall are randomly placed
def initGridRand():
    state = np.zeros((4,4,4))
    #place player
    state[randPair(0,4)] = np.array([0,0,0,1])
    #place wall
    state[randPair(0,4)] = np.array([0,0,1,0])
    #place pit 
    state[randPair(0,4)] = np.array([0,1,0,0])
    #place wall
    state[randPair(0,4)] = np.array([1,0,0,0])
    
    a = findLoc(state, np.array([0,0,0,1]))
    w = findLoc(state, np.array([0,0,1,0]))
    p = findLoc(state, np.array([0,1,0,0]))
    g = findLoc(state, np.array([1,0,0,0]))
    
    #if any of the object are superimposed then call the function again to replace
    if (not a or not w or not p or not g):
        return initGridRand()
    return state


### Movement function

In [2]:
def makeMove(state, action):
    #need to locate player in grid
    player_loc = findLoc(state, np.array([0,0,0,1]))
    wall = findLoc(state, np.array([0,0,1,0]))
    pit = findLoc(state, np.array([0,1,0,0]))
    goal = findLoc(state, np.array([1,0,0,0]))
    
    state = np.zeros((4,4,4))
    
    actions = [[-1,0], [1,0], [0,-1], [0,1]]  #left, right, up, down
    #e.g: up => (player row-1, player column+0)
    new_loc = (player_loc[0] + actions[action][0], player_loc[1] + actions[action][1])
    if (new_loc != wall):
        if((np.array(new_loc) <= (3,3)).all() and (np.array(new_loc) >= (0,0)).all()):
            state[new_loc][3] = 1
            
    new_player_loc = findLoc(state, np.array([0,0,0,1]))
    if (not new_player_loc):
        state[player_loc] = np.array([0,0,0,1])
        
    #replace pit
    state[pit][1] = 1
    #replace wall
    state[wall][2] = 1
    #replace goal
    state[goal][0] = 1
    
    return state

In [3]:
def getLoc(state, level):
    for i in range(0,4):
        for j in range(0,4):
            if (state[i,j][level] == 1):
                return i,j
            
def getReward(state):
    player_loc = getLoc(state, 3)
    pit = getLoc(state, 1)
    goal = getLoc(state, 0)
    if(player_loc == pit):
        return -10
    elif(player_loc == goal):
        return 10
    else:
        return -1
    
def dispGrid(state):
    grid = np.zeros((4,4), dtype=str)
    
    player_loc = findLoc(state, np.array([0,0,0,1]))
    wall = findLoc(state, np.array([0,0,1,0]))
    pit = findLoc(state, np.array([0,1,0,0]))
    goal = findLoc(state, np.array([1,0,0,0]))
    
    for i in range(0,4):
        for j in range(0,4):
            grid[i,j] = ' '
            
    if player_loc:
        grid[player_loc] = 'P' #player
    if wall:
        grid[wall] = 'W'
    if goal:
        grid[goal] = '+'
    if pit:
        grid[pit] = '-'
        
    return grid
                    

In [4]:
state = initGridRand()
dispGrid(state)

array([['+', ' ', 'W', ' '],
       [' ', 'P', ' ', ' '],
       [' ', '-', ' ', ' '],
       [' ', ' ', ' ', ' ']], dtype='<U1')

In [5]:
state = makeMove(state, 0)
#state = makeMove(state, 0)
#state = makeMove(state, 0)
state = makeMove(state, 2)
#state = makeMove(state, 3)
#state = makeMove(state, 3)
print('Reward: %s' %(getReward(state),))
dispGrid(state)

Reward: 10


array([[' ', ' ', 'W', ' '],
       [' ', ' ', ' ', ' '],
       [' ', '-', ' ', ' '],
       [' ', ' ', ' ', ' ']], dtype='<U1')

### Neural network as our Q function
For the fun part let's build neural network that work as Q function.  

In [68]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import RMSprop

In [69]:
model = Sequential()
model.add(Dense(164, init = 'lecun_uniform', input_shape=(64,)))
model.add(Activation('relu'))
#model.add(Dropout(0.2))

model.add(Dense(150, init='lecun_uniform'))
model.add(Activation('relu'))
#model.add(Dropout(0.2))


model.add(Dense(4, init='lecun_uniform'))
model.add(Activation('linear'))

rms = RMSprop()
model.compile(loss='mse', optimizer=rms)

  
  
  # This is added back by InteractiveShellApp.init_path()


In [70]:
model.predict(state.reshape(1,64), batch_size=1)

array([[ 0.10289411, -0.03325115,  0.01939793, -0.07264078]],
      dtype=float32)

In [71]:
from IPython.display import clear_output
import random
epochs = 1000
gamma = 0.9
epsilon = 1
for i in range(epoch):
    state = initGrid()
    status = 1
    #while game is still in progress
    while(status == 1):
        #we are in state S
        #let's run our Q function on s and get Q values for all possible actions
        qval = model.predict(state.reshape(1,64), batch_size=1)
        if (random.random() < epsilon): #choose random action
            action = np.random.randint(0,4)
        else: #choose best action from q value
            action = (np.argmax(qval))
            
        #take action and observe new state S'
        new_state = makeMove(state, action)
        #observe reward
        reward = getReward(new_state)
        #get max_Q(S',a)
        newQ = model.predict(new_state.reshape(1,64), batch_size=1)
        maxQ = np.max(newQ)
        
        y = np.zeros((1,4))
        y[:] = qval[:]
        
        if reward == -1: #non terminal state
            update = (reward + (gamma*maxQ))
        else:#terminal state
            update = reward
            
        y[0][action] = update #target output
        print('Game #: %s' %(i,))
        model.fit(state.reshape(1,64), y, batch_size=1, nb_epoch=1, verbose=1)
        state = new_state
        if reward != -1:
            status = 0
        
        clear_output(wait=True)
        
        if epsilon > 0.1:
            epsilon -= (1/epochs)
        
        
        
        

Game #: 999
Epoch 1/1


In [72]:
def testAlgo(init=0):
    i = 0
    if init==0:
        state = initGrid()
    elif init==1:
        state = initGridPlayer()
    elif init==2:
        state = initGridRand()
        
    print('Initial state:')
    print(dispGrid(state))
    status = 1
    #while game is still in progress 
    while(status == 1):
        qval = model.predict(state.reshape(1,64), batch_size=1)
        action = (np.argmax(qval)) #take action with heightest Q value
        print('Move #: %s; taking action: %s' %(i, action))
        state = makeMove(state, action)
        print(dispGrid(state))
        reward = getReward(state)
        
        if reward != -1:
            status = 0
            print('Reward: %s' %(reward,))
        i += 1 #If we are taking more than 10 action
        if (i > 10):
            print("Game lost too many move")
            break
        

In [73]:
testAlgo(init=0)

Initial state:
[[' ' 'P' ' ' ' ']
 [' ' '-' ' ' ' ']
 [' ' ' ' 'W' ' ']
 [' ' ' ' ' ' '+']]
Move #: 0; taking action: 3
[[' ' ' ' 'P' ' ']
 [' ' '-' ' ' ' ']
 [' ' ' ' 'W' ' ']
 [' ' ' ' ' ' '+']]
Move #: 1; taking action: 3
[[' ' ' ' ' ' 'P']
 [' ' '-' ' ' ' ']
 [' ' ' ' 'W' ' ']
 [' ' ' ' ' ' '+']]
Move #: 2; taking action: 1
[[' ' ' ' ' ' ' ']
 [' ' '-' ' ' 'P']
 [' ' ' ' 'W' ' ']
 [' ' ' ' ' ' '+']]
Move #: 3; taking action: 1
[[' ' ' ' ' ' ' ']
 [' ' '-' ' ' ' ']
 [' ' ' ' 'W' 'P']
 [' ' ' ' ' ' '+']]
Move #: 4; taking action: 1
[[' ' ' ' ' ' ' ']
 [' ' '-' ' ' ' ']
 [' ' ' ' 'W' ' ']
 [' ' ' ' ' ' ' ']]
Reward: 10
