In [52]:
import gym
from gym import spaces
import numpy as np

class FrozenLakeEnv(gym.Env):
    def __init__(self, grid):
        super(FrozenLakeEnv, self).__init__()
        self.grid = grid
        self.grid_height = len(grid)
        self.grid_width = len(grid[0])
        [self.start_y], [self.start_x] = np.where(np.array(grid) == 'S')
        self.slip_probability = 1/3 # Probability to slip to one side (so the chance of slipping in any direction is 2 times this value)
        assert self.slip_probability <= 1/2
        self.action_space = spaces.Discrete(4)  # Left, Down, Right, Up
        self.observation_space = spaces.Discrete(self.grid_height * self.grid_width)

        self.state_action_count = {}
        for x in range(self.grid_width):
            for y in range(self.grid_height):
                self.state_action_count[(x, y)] = {0: 0, 1: 0, 2: 0, 3: 0}

    def reset(self):
        # Top left corner is 0, 0
        self.state = (self.start_x, self.start_y)
        return self.to_observation(self.state)
    
    def step(self, action):
        self.state_action_count[self.state][action] += 1
        x, y = self.state

        # Define possible actions for each chosen direction
        # Make sure the first action in the array is the action itself (no slip)
        possible_actions = {
                0: [0, 3, 1],  # Left
                1: [1, 0, 2],  # Down
                2: [2, 1, 3],  # Right
                3: [3, 2, 0]   # Up
            }            

        # Choose a random action from the possible actions according to self.slip_probability
        p = self.slip_probability
        action = np.random.choice(possible_actions[action], p=[1-2*p, p, p])
        print("Actual action", ["left", "down", "right", "up"][action])

        # Move in the chosen direction if its within bounds
        if action == 0 and x > 0:
            x -= 1
        elif action == 1 and y < self.grid_height - 1:
            y += 1
        elif action == 2 and x < self.grid_width - 1:
            x += 1
        elif action == 3 and y > 0:
            y -= 1

        self.state = (x, y)
        reward = -0.1
        done = False

        # Check state of the cell
        if self.grid[y][x] == 'X':
            reward = -5
            done = True
        elif self.grid[y][x] == 'G':
            reward = 10
            done = True

        return self.to_observation(self.state), reward, done, {}

    def to_observation(self, state):
        x, y = state
        return y * self.grid_width + x

    def render(self):
        grid = np.full((self.grid_height, self.grid_width), ' ')
        for y in range(self.grid_height):
            for x in range(self.grid_width):
                grid[y, x] = self.grid[y][x]
        x, y = self.state
        grid[y, x] = 'A'
        print('\n'.join(' '.join(row) for row in grid))
        print()

def make_frozenlake(grid):
    return FrozenLakeEnv(grid)


In [53]:
no_aleatoric_uncertainty_3x3 = [
    ['.', 'S', '.'],
    ['.', '.', '.'],
    ['.', '.', 'G']
]

safe_3x3 = [
    ['S', 'X', '.'],
    ['.', '.', '.'],
    ['.', '.', 'G']
]

long_safe_4x3 = [
    ['S', 'X', 'G'],
    ['.', 'X', '.'],
    ['.', 'X', '.'],
    ['.', '.', '.']
]

short_unsafe_long_safe_4x3 = [
    ['S', 'X', 'G'],
    ['.', '.', '.'],
    ['.', 'X', '.'],
    ['.', '.', '.']
]

unsafe_path_safe_area_3x4 = [
    ['S', '.', 'X', '.'],
    ['.', '.', '.', '.'],
    ['.', '.', 'X', 'G']
]

env = make_frozenlake(no_aleatoric_uncertainty_3x3)

env.reset()
env.render()

done = False
while not done:
    action = env.action_space.sample()
    print("Trying to go", ["left", "down", "right", "up"][action])
    _, _, done, _ = env.step(action)
    env.render()
print("State action counts:", env.state_action_count)


. A .
. . .
. . G

Trying to go up
Actual action right
. S A
. . .
. . G

Trying to go up
Actual action right
. S A
. . .
. . G

Trying to go right
Actual action down
. S .
. . A
. . G

Trying to go left
Actual action down
. S .
. . .
. . A

State action counts: {(0, 0): {0: 0, 1: 0, 2: 0, 3: 0}, (0, 1): {0: 0, 1: 0, 2: 0, 3: 0}, (0, 2): {0: 0, 1: 0, 2: 0, 3: 0}, (1, 0): {0: 0, 1: 0, 2: 0, 3: 1}, (1, 1): {0: 0, 1: 0, 2: 0, 3: 0}, (1, 2): {0: 0, 1: 0, 2: 0, 3: 0}, (2, 0): {0: 0, 1: 0, 2: 1, 3: 1}, (2, 1): {0: 1, 1: 0, 2: 0, 3: 0}, (2, 2): {0: 0, 1: 0, 2: 0, 3: 0}}
