### Setup for the maze environment

In [30]:
import numpy as np
from random import random
import math

In [31]:
action_to_num = {"left":0, "right":1, "up":2, "down":3}
num_to_action = {0:"left", 1:"right", 2:"up", 3:"down"}
    
def select_move_from_num(r):
    if r < 0.25:
        return action_to_num["left"]
    elif r < 0.5:
        return action_to_num["right"]
    elif r < 0.75:
        return action_to_num["up"]
    else:
        return action_to_num["down"]

In [32]:
class Environment:
    
    def __init__(self):
        # dimensions of the maze
        self.maze_width = 10
        self.maze_height = 10
        
        # create a maze
        self.maze = np.zeros((10, 10))
        
        # add the current (at the start at beginning) and end points
        self.current_point_x = 1
        self.current_point_y = 1
        self.end_point_x = self.maze_height - 1
        self.end_point_y = self.maze_width - 1
        
        self.add_walls()
    
    # add the walls to the environment so that the agent cannot go through those
    def add_walls(self):
        pass
    
    # get the reward for the most recent action performed
    def get_reward(self):
        # just manhattan distance for now
        dist = abs(self.current_point_x - self.end_point_x) + abs(self.current_point_y - self.end_point_y)
        if dist == 0:
            return 10
        else:
            return 1/dist
    
    # determine whether the game is done
    def is_done(self):
        return self.current_point_x == self.end_point_x and self.current_point_y == self.end_point_y
        
    # move the agent
    def move(self, direction):
        # agent cannot move if already at an edge
        if direction == action_to_num["left"]:
            if self.current_point_x > 0:
                self.current_point_x -= 1
        elif direction == action_to_num["right"]:
            if self.current_point_x < self.maze_width - 1:
                self.current_point_x += 1
        elif direction == action_to_num["up"]:
            if self.current_point_y > 0:
                self.current_point_y -= 1
        elif direction == action_to_num["down"]:
            if self.current_point_y < self.maze_height - 1:
                self.current_point_y += 1

    # test out by making some random moves
    def do_moves(self):
        moves = []
        for i in range(10):
            r = random()
            moves.append(select_move_from_num(r))
            self.move(moves[-1])
            self.maze[self.current_point_y, self.current_point_x] = 1
            print(self.get_reward())
        
        print(moves)
        print(self.maze)
                

In [33]:
env = Environment()
# env.do_moves()