In [0]:
import numpy as np

def randPair(s,e):
    return np.random.randint(s,e), p.random.randint(s,e)

#finds an array in the "depth" dimension of the grid
def findLoc(state, obj):
    for i in range(0,5):
        for j in range(0,5):
            if (state[i,j] == obj).all():
                return i,j
def initGrid():
    state = np.zeros((5,5,5))
    #place player
    state[0,0] = np.array([0,0,0,0,1])
    state[4,4] = np.array([1,0,0,0,0])
    
    return state


In [0]:
def makeMove(state, action):
    #need to locate player in grid
    #need to determine what object (if any) is in the new grid spot the player is moving to
    player_loc = findLoc(state, np.array([0,0,0,0,1]))
    goal = findLoc(state, np.array([1,0,0,0,0]))
    state = np.zeros((5,5,5))

    actions = [[-1,0],[1,0],[0,-1],[0,1]]
    #e.g. up => (player row - 1, player column + 0)
    new_loc = (player_loc[0] + actions[action][0], player_loc[1] + actions[action][1])
    # if (new_loc != wall):
    if ((np.array(new_loc) <= (4,4)).all() and (np.array(new_loc) >= (0,0)).all()):
        state[new_loc][4] = 1

    new_player_loc = findLoc(state, np.array([0,0,0,0,1]))
    if (not new_player_loc):
        state[player_loc] = np.array([0,0,0,0,1])
    state[goal][0] = 1

    return state

In [0]:
def getLoc(state, level):
    for i in range(0,5):
        for j in range(0,5):
            if (state[i,j][level] == 1):
                return i,j

def getReward(state):
    player_loc = getLoc(state, 4)
    pit = getLoc(state, 1)
    goal = getLoc(state, 0)
    if (player_loc == pit):
        return -10
    elif (player_loc == goal):
        return 10
    else:
        return -1
    
def dispGrid(state):
    grid = np.zeros((5,5), dtype='str')
    player_loc = findLoc(state, np.array([0,0,0,0,1]))
    goal = findLoc(state, np.array([1,0,0,0,0]))
    for i in range(0,5):
        for j in range(0,5):
            grid[i,j] = ' '
            
    if player_loc:
        grid[player_loc] = 'S' #player
    if goal:
        grid[goal] = 'E' #goal
    
    return grid

In [0]:
%tensorflow_version 1.x
import tensorflow
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import RMSprop

Using TensorFlow backend.


In [0]:
model = Sequential()
model.add(Dense(164, kernel_initializer='lecun_uniform', input_shape=(125,)))
model.add(Activation('relu'))

model.add(Dense(32, kernel_initializer='lecun_uniform'))
model.add(Activation('relu'))

model.add(Dense(8, kernel_initializer='lecun_uniform'))
model.add(Activation('relu'))

model.add(Dense(16, kernel_initializer='lecun_uniform'))
model.add(Activation('relu'))

model.add(Dense(4, kernel_initializer='lecun_uniform'))
model.add(Activation('linear')) #linear output so we can have range of real-valued outputs

rms = RMSprop()
model.compile(loss='mse', optimizer=rms)

In [0]:
from IPython.display import clear_output
import random

epochs = 300
gamma = 0.9 #since it may take several moves to goal, making gamma high
epsilon = 1
for i in range(epochs):
    
    state = initGrid()
    status = 1
    #while game still in progress
    while(status == 1):
        #We are in state S
        #Let's run our Q function on S to get Q values for all possible actions
        qval = model.predict(state.reshape(1,125), batch_size=1)
        if (random.random() < epsilon): #choose random action
            action = np.random.randint(0,4)
        else: #choose best action from Q(s,a) values
            action = (np.argmax(qval))
        new_state = makeMove(state, action)
        reward = getReward(new_state)
        newQ = model.predict(new_state.reshape(1,125), batch_size=1)
        maxQ = np.max(newQ)
        y = np.zeros((1,4))
        y[:] = qval[:]
        if reward == -1: #non-terminal state
            update = (reward + (gamma * maxQ))
        else: #terminal state
            update = reward
        y[0][action] = update #target output
        print("Game #: %s" % (i,))
        model.fit(state.reshape(1,125), y, batch_size=1, nb_epoch=1, verbose=1)
        state = new_state
        if reward != -1:
            status = 0
        clear_output(wait=True)
    if epsilon > 0.1:
        epsilon -= (1/epochs)

Game #: 299
Epoch 1/1


In [0]:
def testAlgo(init=0):
    i = 0
    if init==0:
        state = initGrid()
    elif init==1:
        state = initGridPlayer()
    elif init==2:
        state = initGridRand()

    print("Initial State:")
    print(dispGrid(state))
    status = 1
    #while game still in progress
    while(status == 1):
        qval = model.predict(state.reshape(1,125), batch_size=1)
        action = (np.argmax(qval)) #take action with highest Q-value
        print('Move #: %s; Taking action: %s' % (i, action))
        state = makeMove(state, action)
        print(dispGrid(state))
        reward = getReward(state)
        if reward!=-1 :
            status = 0
            print("Reward: %s" % (reward,))
        i += 1 #If we're taking more than 10 actions, just stop, we probably can't win this game
        if (i > 20):
            print("Game lost; too many moves.")
            break

In [0]:
testAlgo(init=0)

Initial State:
[['S' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' 'E']]
Move #: 0; Taking action: 3
[[' ' 'S' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' 'E']]
Move #: 1; Taking action: 3
[[' ' ' ' 'S' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' 'E']]
Move #: 2; Taking action: 1
[[' ' ' ' ' ' ' ' ' ']
 [' ' ' ' 'S' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' 'E']]
Move #: 3; Taking action: 3
[[' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' 'S' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' 'E']]
Move #: 4; Taking action: 3
[[' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' 'S']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' 'E']]
Move #: 5; Taking action: 1
[[' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' 'S']
 [' ' ' ' ' ' ' ' ' ']
 [' ' ' ' ' ' ' ' 'E']]
Move 