In [1]:
import os, sys, time, datetime, json, random
import numpy as np
import matplotlib.pyplot as plt
import tkinter
from PIL import Image
import operator
%matplotlib qt

In [2]:
random.seed(12)

In [40]:
## Maze Array
maze = np.array([
    [0,0,0,0,0,0],
    [0,2,1,1,9,0],
    [0,1,1,1,1,0],
    [0,1,1,9,1,0],
    [0,1,1,1,1,0],
    [0,0,0,0,0,0]
])

In [4]:
# Function that draws the current maze state

# We'll set:
#     0 to  be the "walls"
#     1 to be the "floor"
#     2 to be the "traveler"
#     3 to be any "visited space"
#     9 to be the dungeon "exit"

def drawCurMazeState(maze):

    ## Image Array
    # Make each maze unit, a 100x100 pixel
    mazeim = np.kron(maze,np.ones((100, 100)))
    linmaze = np.reshape(mazeim, 100*100*36)
    

    pixel = []
    for index in linmaze:
        if index == 0: #Drawing walls
            pixel.append((0,0,0))
        elif index == 2: #Drawing Traveler
            pixel.append((219, 17, 6))
        elif index == 3: #Drawing "Visited State"
            pixel.append((244, 110, 66))
        elif index == 9: #Drawing "Exit"
            pixel.append((219, 194, 5))
        else:
            pixel.append((148, 176, 221))
        
    
    img = Image.new('RGB', (600,600))
    img.putdata(pixel)
    return img

In [5]:
## Drawing the Initial Maze State
test = drawCurMazeState(maze)
test.show()

In [29]:
## Define every possible state as a number from 0:15

## Define Actions

#Numerical index for an action
# 0->left
# 1->right
# 2->up
# 3->down

actions = ['left', 'right', 'up', 'down']

#delta for action
actionDelta ={'left': -1,
          'right':1,
          'up':-6,
          'down':6
         }

#Default Probability for actions
actionProb = {'left':.25,
          'right':.25,
          'up':.25,
          'down':.25
         }

# action function
# def act(currpos, action):
#     tuple(map(operator.add, currpos, action))
          
# Define possible positions
positions = np.arange(0, 36)
linmaze = np.reshape(maze, (36, 1)).copy()

# Define Initial Uniform Probability Dictionary at every position
ProbDict = dict.fromkeys(positions, actionProb.copy())

In [8]:
#Define Reward Dictionary
#Hitting a wall results in a 100 point penalty
#Just moving is a 10 point penalty
#Hitting a Reward is a 100 point gain
RewardDict = {0:-100, 1:-10, 9:100}

#Recasting this as a probability distribution

RewardDict = {0:-1, 1:1, 9:2} #Where these are exponential values, 
                              #Finding Gold is 100 times more valuable than exploration

In [9]:
#Define Utility Dictionary 
#This is initially empty
actionUtil = {'left':1,
          'right':1,
          'up':1,
          'down':1
         }


UtilityDict = dict.fromkeys(positions)

#Filling UtilityDict
for key in UtilityDict:
    UtilityDict[key] = actionUtil.copy()

In [42]:
## Q Learning, untrained
linmaze = np.reshape(maze, (36, 1)).copy()
ImageStream = [] #For making the gif
im_maze = linmaze.copy()

Initpos = 7 #This is the (0,0) of the inner array
visited = [] #Array of all positions visited 
GoldFlag = 0 #Number of Gold Coins Collected



#Take a Step
currpos = 7
for loopvar in range(26):

    
    Probspace = []
    for action in actions: #For every action
        Probspace.append(UtilityDict[currpos][action]) #Exponential prob for action at pos

    #Normalizing Probspace
    Probspace = np.power(10, Probspace)/np.sum(np.power(10, Probspace))

    step = actions[np.random.choice([0,1,2,3], p=Probspace)] #Step direction
    prevpos = currpos
    nextpos = currpos+actionDelta[step] #next possible position

    if linmaze[nextpos] == 0: #Check if we've hit a wall
        UtilityDict[currpos][step] == RewardDict[int(linmaze[nextpos])] #collect reward
        nextpos = currpos #if wall, stay put 


    if linmaze[nextpos] == 9: #Check if we've found treasure
        UtilityDict[currpos][step] == RewardDict[int(linmaze[nextpos])] #collect reward
        visited.append(currpos) #Add to visited
        linmaze[nextpos] = 1 #Removing Gold
        print('Collected Gold With Hunger Level {}'.format(loopvar))
        GoldFlag = GoldFlag+1
        currpos = nextpos #move


    if linmaze[nextpos] == 1: #Check if we can move
        UtilityDict[currpos][step] == RewardDict[int(linmaze[nextpos])] #collect reward
        visited.append(currpos) #Add to visited
        currpos = nextpos #move
        
        #Create Image
        
        im_maze[prevpos] = 3
        im_maze[currpos] = 2
        im_matmaze = np.reshape(im_maze, (6,6))
        img = drawCurMazeState(im_matmaze)
        
        ImageStream.append(img)
        
    if GoldFlag == 2:
        print('The Player Won With Hunger Level {}'.format(loopvar-1))
        break

if not GoldFlag == 2:
    print('The Player Died of Hunger (25 turns) :(')
    
#Saving Gif
ImageStream[0].save('Output\QLearning.gif',
           save_all=True,
           append_images=ImageStream[1:],
           duration=52*10,
           loop=0)

Collected Gold With Hunger Level 7
Collected Gold With Hunger Level 15
The Player Won With Hunger Level 14


In [47]:
## Q Learning, TRAINED
linmaze = np.reshape(maze, (36, 1)).copy()
ImageStream = [] #For making the gif
im_maze = linmaze.copy()

Initpos = 7 #This is the (0,0) of the inner array
visited = [] #Array of all positions visited 
GoldFlag = 0 #Number of Gold Coins Collected



#Take a Step
currpos = 7
for loopvar in range(26):

    
    Probspace = []
    for action in actions: #For every action
        Probspace.append(UtilityDict[currpos][action]) #Exponential prob for action at pos

    #Normalizing Probspace
    Probspace = np.power(10, Probspace)/np.sum(np.power(10, Probspace))

    step = actions[np.random.choice([0,1,2,3], p=Probspace)] #Step direction
    prevpos = currpos
    nextpos = currpos+actionDelta[step] #next possible position

    if linmaze[nextpos] == 0: #Check if we've hit a wall
        UtilityDict[currpos][step] == RewardDict[int(linmaze[nextpos])] #collect reward
        nextpos = currpos #if wall, stay put 


    if linmaze[nextpos] == 9: #Check if we've found treasure
        UtilityDict[currpos][step] == RewardDict[int(linmaze[nextpos])] #collect reward
        visited.append(currpos) #Add to visited
        linmaze[nextpos] = 1 #Removing Gold
        print('Collected Gold With Hunger Level {}'.format(loopvar))
        GoldFlag = GoldFlag+1
        currpos = nextpos #move


    if linmaze[nextpos] == 1: #Check if we can move
        UtilityDict[currpos][step] == RewardDict[int(linmaze[nextpos])] #collect reward
        visited.append(currpos) #Add to visited
        currpos = nextpos #move
        
        #Create Image
        
        im_maze[prevpos] = 3
        im_maze[currpos] = 2
        im_matmaze = np.reshape(im_maze, (6,6))
        img = drawCurMazeState(im_matmaze)
        
        ImageStream.append(img)
        
    if GoldFlag == 2:
        print('The Player Won With Hunger Level {}'.format(loopvar-1))
        break

if not GoldFlag == 2:
    print('The Player Died of Hunger (25 turns) :(')
    
#Saving Gif
ImageStream[0].save('Output\QLearningTrained.gif',
           save_all=True,
           append_images=ImageStream[1:],
           duration=52*10,
           loop=0)

Collected Gold With Hunger Level 10
Collected Gold With Hunger Level 15
The Player Won With Hunger Level 14


In [32]:
## Monte Carlo Stage
# We will allow our actor to wander it's world 
# calculating the utility of movement from every position it encounters
linmaze = np.reshape(maze, (36, 1)).copy()
ImageStream = [] #For making the gif
im_maze = linmaze.copy()

Initpos = 7 #This is the (0,0) of the inner array
visited = [] #Array of all positions visited 
GoldFlag = 0 #Number of Gold Coins Collected



#Take a Step
currpos = 7
for loopvar in range(26):

    
    Probspace = []
    for action in actions: #For every action
        Probspace.append(ProbDict[currpos][action]) #Exponential prob for action at pos

    step = actions[np.random.choice([0,1,2,3], p=Probspace)] #Step direction
    prevpos = currpos
    nextpos = currpos+actionDelta[step] #next possible position

    if linmaze[nextpos] == 0: #Check if we've hit a wall
        UtilityDict[currpos][step] == RewardDict[int(linmaze[nextpos])] #collect reward
        nextpos = currpos #if wall, stay put 


    if linmaze[nextpos] == 9: #Check if we've found treasure
        UtilityDict[currpos][step] == RewardDict[int(linmaze[nextpos])] #collect reward
        visited.append(currpos) #Add to visited
        linmaze[nextpos] = 1 #Removing Gold
        print('Collected Gold With Hunger Level {}'.format(loopvar))
        GoldFlag = GoldFlag+1
        currpos = nextpos #move


    if linmaze[nextpos] == 1: #Check if we can move
        UtilityDict[currpos][step] == RewardDict[int(linmaze[nextpos])] #collect reward
        visited.append(currpos) #Add to visited
        currpos = nextpos #move
        
        #Create Image
        
        im_maze[prevpos] = 3
        im_maze[currpos] = 2
        im_matmaze = np.reshape(im_maze, (6,6))
        img = drawCurMazeState(im_matmaze)
        
        ImageStream.append(img)
        
    if GoldFlag == 2:
        print('The Player Won With Hunger Level {}'.format(loopvar-1))
        break

if not GoldFlag == 2:
    print('The Player Died of Hunger (25 turns) :(')
    
#Saving Gif
ImageStream[0].save('Output\RandomWalk.gif',
           save_all=True,
           append_images=ImageStream[1:],
           duration=52*10,
           loop=0)

Collected Gold With Hunger Level 11
The Player Died of Hunger (25 turns) :(
