<a href="https://colab.research.google.com/github/alex-smith-uwec/AI_Spring2025/blob/main/FrozenLake.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import gym

# Load the FrozenLake environment

env=gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=True,new_step_api=True)

n_states = env.observation_space.n
n_actions = env.action_space.n

gamma = 0.95  # Discount factor
theta = 1e-6  # Convergence threshold

# Initialize value table
V = np.zeros(n_states)
policy = np.zeros(n_states, dtype=int)

# Value Iteration Algorithm
def value_iteration():
    global V, policy
    while True:
        delta = 0
        new_V = np.copy(V)
        for s in range(n_states):
            q_values = np.zeros(n_actions)
            for a in range(n_actions):
                for prob, next_state, reward, done in env.unwrapped.P[s][a]:
                    if done:
                        q_values[a] += prob * reward
                    else:
                        q_values[a] += prob * (reward + gamma * V[next_state])
            new_V[s] = max(q_values)
            delta = max(delta, abs(V[s] - new_V[s]))
            policy[s] = np.argmax(q_values)
        V = new_V
        if delta < theta:
            break

# Extract optimal policy using the converged value function
def extract_policy():
    for s in range(n_states):
        q_values = np.zeros(n_actions)
        for a in range(n_actions):
            for prob, next_state, reward, done in env.unwrapped.P[s][a]:
                if done:
                    q_values[a] += prob * reward
                else:
                    q_values[a] += prob * (reward + gamma * V[next_state])
        policy[s] = np.argmax(q_values)

# Display the policy with start, goal, and holes
def display_policy():
    actions = ['←', '↓', '→', '↑']
    grid_size = int(np.sqrt(n_states))

    # Get the map layout of FrozenLake
    lake_map = env.unwrapped.desc  # Extracts the grid layout

    print("\nOptimal Policy:\n")
    for i in range(grid_size):
        row = ""
        for j in range(grid_size):
            state = i * grid_size + j
            tile = lake_map[i, j].decode("utf-8")  # Get the character from the FrozenLake map

            if tile == 'H':
                cell = 'H'  # Hole
            elif tile == 'G':
                cell = 'G'  # Goal
            elif tile == 'S':
                cell = 'S'  # Start position
            else:
                cell = actions[policy[state]]  # Normal state, show policy action

            row += f"| {cell} "
        row += "|"
        print(row)
        print("-" * (grid_size * 4 + 1))

# Run the value iteration algorithm
value_iteration()
extract_policy()
display_policy()

# Reset the environment
env.reset()



Optimal Policy:

| S | ↑ | ← | ↑ |
-----------------
| ← | H | ← | H |
-----------------
| ↑ | ↓ | ← | H |
-----------------
| H | → | ↓ | G |
-----------------


0