# Solving the Frozen Lake Problem with Value Iteration

The Frozen Lake problem is a classic grid world problem where an agent
must navigate from a starting point to a goal while avoiding holes
(dangerous states) on a frozen lake. The goal is to find the optimal
policy that maximizes the agent’s chance of reaching the goal safely.


In [None]:
from gym import Env, spaces
import numpy as np

class SimpleGridWorld3x3Env(Env):
    def __init__(self):
        self.grid_size = 3
        self.action_space = spaces.Discrete(4)  # Four possible actions: left, down, right, up
        self.observation_space = spaces.Discrete(self.grid_size * self.grid_size)  # 9 possible states in a 3x3 grid

        self.grid = np.array([
            [0, 1, 0],
            [0, 0, 0],
            [0, 0, 2]
        ])  # 0: empty, 1: agent, 2: goal

        self.agent_position = (0, 1)  # Start the agent at position (0, 1)

    def step(self, action):
        if action == 0:  # left
            new_position = (self.agent_position[0], max(self.agent_position[1] - 1, 0))
        elif action == 1:  # down
            new_position = (min(self.agent_position[0] + 1, self.grid_size - 1), self.agent_position[1])
        elif action == 2:  # right
            new_position = (self.agent_position[0], min(self.agent_position[1] + 1, self.grid_size - 1))
        elif action == 3:  # up
            new_position = (max(self.agent_position[0] - 1, 0), self.agent_position[1])

        if self.grid[new_position] != 0:  # If the new position is not empty
            reward = -1  # Move is not allowed, receive negative reward
        else:
            reward = 0  # Move is allowed, receive zero reward

            # Update agent's position
            self.grid[self.agent_position] = 0  # Clear current position
            self.grid[new_position] = 1  # Move agent to new position
            self.agent_position = new_position  # Update agent's position

        # Check if the agent has reached the goal
        done = (self.agent_position == (2, 2))

        # Return the new state (observation), reward, whether the episode is done, and additional info
        return self.agent_position[0] * self.grid_size + self.agent_position[1], reward, done, {}

    def reset(self):
        # Reset the environment by placing the agent back at the starting position
        self.grid = np.array([
            [0, 1, 0],
            [0, 0, 0],
            [0, 0, 2]
        ])
        self.agent_position = (0, 1)
        return self.agent_position[0] * self.grid_size + self.agent_position[1]

    def render(self, mode='human'):
        # Render the grid world
        for i in range(self.grid_size):
            for j in range(self.grid_size):
                if (i, j) == self.agent_position:
                    print("A", end=" ")  # Agent
                elif self.grid[i, j] == 2:
                    print("G", end=" ")  # Goal
                else:
                    print("_", end=" ")  # Empty
            print()  # New line for next row

# Create an instance of the environment
env = SimpleGridWorld3x3Env()

# Test the environment by taking some random actions
for _ in range(5):
    action = env.action_space.sample()  # Random action
    state, reward, done, _ = env.step(action)
    env.render()
    print("Action:", action)
    print("State:", state)
    print("Reward:", reward)
    print("Done:", done)
    print()


In [None]:
env = SimpleGridWorld3x3Env()

In [None]:
env.reset()

In [None]:
env.render()

In [None]:
env.step(1)

In [None]:
env.render(mode='human')

In [1]:
# Test the learned policy
total_reward = 0
num_episodes = 100
episode=0
for episode in range(num_episodes):
    state = env.reset()
    terminated= False
    truncated= False
    env.render('human')
    while not (terminated or truncated):
        action = env.action_space.sample()
        new_state, reward, done, info  = env.step(action)
        #done = terminated or truncated
        print(f"Episode {episode+1}" + ' We are in State: ' + str(new_state) +  ' getting reward: ' + str(total_reward) + ' after doing action:' + str(action))

        total_reward += reward
        if done:
            break

NameError: name 'env' is not defined