In [12]:
import gym
from gym import spaces
import numpy as np

class FrozenLakeEnv(gym.Env):
    def __init__(self, grid):
        super(FrozenLakeEnv, self).__init__()
        self.grid = grid
        self.grid_size = len(grid)
        self.action_space = spaces.Discrete(4)  # Left, Down, Right, Up
        self.observation_space = spaces.Discrete(self.grid_size ** 2)

    def reset(self):
        self.state = (0, 0)
        return self.to_observation(self.state)
    
    def step(self, action):
        
        r, c = self.state

        # Define possible actions for each direction with edge cases considered
        if r == 0:  # Top edge
            if c == 0:  # Top-left corner
                possible_actions = {0: [1], 1: [2], 2: [1], 3: [2]}
            elif c == self.grid_size - 1:  # Top-right corner
                possible_actions = {0: [1], 1: [0], 2: [1], 3: [0]}
            else:
                possible_actions = {0: [1, 2], 1: [0, 2], 2: [1, 0], 3: [0, 2]}
        elif r == self.grid_size - 1:  # Bottom edge
            if c == 0:  # Bottom-left corner
                possible_actions = {0: [3], 1: [2], 2: [3], 3: [2]}
            elif c == self.grid_size - 1:  # Bottom-right corner
                possible_actions = {0: [3], 1: [0], 2: [3], 3: [0]}
            else:
                possible_actions = {0: [3, 2], 1: [0, 2], 2: [3, 0], 3: [0, 2]}
        elif c == 0:  # Left edge
            possible_actions = {0: [1, 3], 1: [2, 3], 2: [1, 3], 3: [1, 2]}
        elif c == self.grid_size - 1:  # Right edge
            possible_actions = {0: [1, 3], 1: [0, 3], 2: [1, 3], 3: [1, 0]}
        else:
            possible_actions = {
                0: [0, 3, 1],  # Left
                1: [1, 0, 2],  # Down
                2: [2, 1, 3],  # Right
                3: [3, 2, 0]   # Up
            }

        # Choose a random action from the possible actions with equal probability
        action = np.random.choice(possible_actions[action])
        print("Chosen action", ["left", "down", "right", "up"][action])

        # Move in the chosen direction if within grid bounds
        if action == 0 and c > 0:
            c -= 1
        elif action == 1 and r < self.grid_size - 1:
            r += 1
        elif action == 2 and c < self.grid_size - 1:
            c += 1
        elif action == 3 and r > 0:
            r -= 1

        self.state = (r, c)
        reward = -0.1
        done = False

        # Check state of the cell
        if self.grid[r][c] == 'H':
            reward = -5
            done = True
        elif self.grid[r][c] == 'G':
            reward = 10
            done = True

        return self.to_observation(self.state), reward, done, {}

    def to_observation(self, state):
        return state[0] * self.grid_size + state[1]

    def render(self):
        grid = np.full((self.grid_size, self.grid_size), ' ')
        for r in range(self.grid_size):
            for c in range(self.grid_size):
                grid[r, c] = self.grid[r][c]
        r, c = self.state
        grid[r, c] = 'A'
        print('\n'.join(' '.join(row) for row in grid))
        print()

def make_frozenlake(grid):
    return FrozenLakeEnv(grid)


In [13]:
no_aleatoric_uncertainty_3x3 = [
    ['S', 'F', 'F'],
    ['F', 'F', 'F'],
    ['F', 'F', 'G']
]

safe_3x3 = [
    ['S', 'H', 'F'],
    ['F', 'F', 'F'],
    ['F', 'F', 'G']
]

long_safe_4x3 = [
    ['S', 'H', 'G'],
    ['F', 'H', 'F'],
    ['F', 'H', 'F'],
    ['F', 'F', 'F']
]

short_unsafe_long_safe_4x3 = [
    ['S', 'H', 'G'],
    ['F', 'F', 'F'],
    ['F', 'H', 'F'],
    ['F', 'F', 'F']
]

unsafe_path_safe_area_3x4 = [
    ['S', 'F', 'H', 'F'],
    ['F', 'F', 'F', 'F'],
    ['F', 'F', 'H', 'G']
]

env = make_frozenlake(no_aleatoric_uncertainty_3x3)

env.reset()
env.render()

done = False
while not done:
    action = env.action_space.sample()
    print("Trying to go", ["left", "down", "right", "up"][action])
    _, _, done, _ = env.step(action)
    env.render()


A F F
F F F
F F G

Trying to go up
Chosen action right
S A F
F F F
F F G

Trying to go left
Chosen action down
S F F
F A F
F F G

Trying to go left
Chosen action left
S F F
A F F
F F G

Trying to go right
Chosen action up
A F F
F F F
F F G

Trying to go right
Chosen action down
S F F
A F F
F F G

Trying to go up
Chosen action down
S F F
F F F
A F G

Trying to go down
Chosen action right
S F F
F F F
F A G

Trying to go down
Chosen action right
S F F
F F F
F F A

