In [49]:
from enum import Enum
import random
import numpy as np

GRID_HEIGHT = 5
GRID_WIDTH = 6

class ACTION(int, Enum):
  UP = 0
  DOWN = 1
  LEFT = 2
  RIGHT = 3
  
ACTION_MIN = 0
ACTION_MAX = ACTION.RIGHT

class Agent:
  def __init__(self):
    pass
  
  def get_next_action(self, observation):
    return ACTION(random.randint(ACTION_MIN, ACTION_MAX))


class GridWorld:
  def __init__(self, height, width):
    self.height = height
    self.width = width
    self.init_board()
    self.agent_y = self.agent_x = 1
    
    self.set_agent_position(y=random.randint(1, height-2), x=random.randint(1, width-2))
    self.agent = Agent()

  def init_board(self):
    self.board = np.zeros((self.height, self.width), int)
    self.board[0,:] = 1
    self.board[-1, :] = 1
    self.board[:, 0] = 1
    self.board[:, -1] = 1
    
  def set_agent_position(self, y, x):
    self.board[self.agent_y, self.agent_x] = 0
    self.agent_y = y
    self.agent_x = x
    self.board[self.agent_y, self.agent_x] = 2

  def __repr__(self):
    s = []
    s.append(f"GridWorld(height={self.height}, width={self.width})")
    s.append(f"    Agent x={self.agent_x}, y={self.agent_y}")
    s.append("\n".join(["".join([str(self.board[row, col]) for col in range(self.width)]) for row in range(self.height)]))
    return "\n".join(s)
    
  def act(self, action):
    agent_y = self.agent_y
    agent_x = self.agent_x
    
    if action == ACTION.UP:
      agent_y -= 1
    elif action == ACTION.DOWN:
      agent_y += 1
    elif action == ACTION.LEFT:
      agent_x -= 1
    elif action == ACTION.RIGHT:
      agent_x += 1
    else:
      raise ValueError(f"Invalid ACTION {action}")
      
    reward = 0
    
    if self.board[agent_y, agent_x] != 0:
      reward = -1
    else:      
      self.set_agent_position(y=agent_y, x=agent_x)
    
    return reward
  
  def run(self):
    for _ in range(10):
      observation = np.zeros((ACTION_MAX + 1, ))
      observation[ACTION.UP]     = self.board[self.agent_y + 1, self.agent_x    ]
      observation[ACTION.DOWN]   = self.board[self.agent_y - 1, self.agent_x    ]
      observation[ACTION.LEFT]   = self.board[self.agent_y    , self.agent_x - 1]
      observation[ACTION.RIGHT]  = self.board[self.agent_y    , self.agent_x + 1]
      print("Observation ", observation)
      
      action = self.agent.get_next_action(observation)
      print(action)
      reward = self.act(action)
      print(f"    Reward {reward}")
      print(self)
    
  
grid_world = GridWorld(GRID_HEIGHT, GRID_WIDTH)
print(grid_world)

grid_world.run()

GridWorld(height=5, width=6)
    Agent x=4, y=2
111111
100001
100021
100001
111111
Observation  [0. 0. 0. 1.]
ACTION.UP
    Reward 0
GridWorld(height=5, width=6)
    Agent x=4, y=1
111111
100021
100001
100001
111111
Observation  [0. 1. 0. 1.]
ACTION.UP
    Reward -1
GridWorld(height=5, width=6)
    Agent x=4, y=1
111111
100021
100001
100001
111111
Observation  [0. 1. 0. 1.]
ACTION.LEFT
    Reward 0
GridWorld(height=5, width=6)
    Agent x=3, y=1
111111
100201
100001
100001
111111
Observation  [0. 1. 0. 0.]
ACTION.RIGHT
    Reward 0
GridWorld(height=5, width=6)
    Agent x=4, y=1
111111
100021
100001
100001
111111
Observation  [0. 1. 0. 1.]
ACTION.DOWN
    Reward 0
GridWorld(height=5, width=6)
    Agent x=4, y=2
111111
100001
100021
100001
111111
Observation  [0. 0. 0. 1.]
ACTION.DOWN
    Reward 0
GridWorld(height=5, width=6)
    Agent x=4, y=3
111111
100001
100001
100021
111111
Observation  [1. 0. 0. 1.]
ACTION.UP
    Reward 0
GridWorld(height=5, width=6)
    Agent x=4, y=2
111111
1000