In [11]:
import gym
from gym import spaces
import numpy as np

class FrozenLakeEnv(gym.Env):
    def __init__(self, grid):
        super(FrozenLakeEnv, self).__init__()
        self.grid = grid
        self.grid_size = len(grid)
        self.action_space = spaces.Discrete(4)  #Left, Down, Right, Up
        self.observation_space = spaces.Discrete(self.grid_size ** 2)

    def reset(self):
        self.state = (0, 0)
        return self.to_observation(self.state)

    def step(self, action):
        r, c = self.state

        # left = 0,
        # down = 1,
        # right = 2,
        # up = 3
        if action == 0 and c > 0:
            c -= 1
        elif action == 1 and r < self.grid_size - 1:
            r += 1
        elif action == 2 and c < self.grid_size - 1:
            c += 1
        elif action == 3 and r > 0:
            r -= 1

        self.state = (r, c)
        reward = -0.1
        done = False

        # Check state of the cell
        if self.grid[r][c] == 'H':
            reward = -1
            done = True
        elif self.grid[r][c] == 'G':
            reward = 1
            done = True

        return self.to_observation(self.state), reward, done, {}

    def to_observation(self, state):
        return state[0] * self.grid_size + state[1]

    def render(self):
        grid = np.full((self.grid_size, self.grid_size), ' ')
        for r in range(self.grid_size):
            for c in range(self.grid_size):
                grid[r, c] = self.grid[r][c]
        r, c = self.state
        grid[r, c] = 'A'
        print('\n'.join(' '.join(row) for row in grid))
        print()

def make_frozenlake(grid):
    return FrozenLakeEnv(grid)


In [18]:
safe_3x3_grid = [
    ['S', 'F', 'F'],
    ['F', 'H', 'F'],
    ['F', 'F', 'G']
]

unsafe_3x3_grid = [
    ['S', 'H', 'F'],
    ['F', 'F', 'H'],
    ['F', 'H', 'G']
]

safe_5x5_grid = [
    ['S', 'F', 'F', 'F', 'F'],
    ['F', 'F', 'F', 'H', 'F'],
    ['F', 'H', 'F', 'F', 'F'],
    ['F', 'F', 'H', 'F', 'H'],
    ['F', 'F', 'F', 'F', 'G']
]

unsafe_5x5_grid = [
    ['S', 'F', 'H', 'F', 'H'],
    ['H', 'H', 'F', 'H', 'F'],
    ['F', 'H', 'H', 'F', 'H'],
    ['H', 'F', 'H', 'H', 'F'],
    ['F', 'H', 'F', 'H', 'G']
]

env = make_frozenlake(safe_3x3_grid)

env.reset()
env.render()

done = False
while not done:
    action = env.action_space.sample()  
    _, _, done, _ = env.step(action)
    env.render()


A F F
F H F
F F G

A F F
F H F
F F G

A F F
F H F
F F G

A F F
F H F
F F G

S A F
F H F
F F G

S F A
F H F
F F G

S A F
F H F
F F G

S F F
F A F
F F G

