# **FrozenLake Reinforcement Learning Agent**

In [1]:
# Step 1: Import Libraries
import gymnasium as gym
import numpy as np

In [2]:
# Step 2: Create environments
env = gym.make("FrozenLake-v1", is_slippery=False)
test_env = gym.make("FrozenLake-v1", is_slippery=False)

In [3]:
# Step 3: Initialize Q-table
state_size = env.observation_space.n
action_size = env.action_space.n
q_table = np.zeros((state_size, action_size))

In [4]:
# Step 4: Define hyperparameters
num_episodes = 3000
max_steps = 100
alpha = 0.8
gamma = 0.95
epsilon = 1.0
epsilon_decay = 0.995
min_epsilon = 0.05

In [5]:
# Step 5: Training loop
for episode in range(num_episodes):
    state, _ = env.reset()
    done = False
    total_reward = 0

    for step in range(max_steps):
        # ε-greedy policy
        if np.random.rand() < epsilon:
            action = env.action_space.sample()
        else:
            action = np.argmax(q_table[state])

        next_state, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated

        # Q-learning update rule
        q_table[state, action] = q_table[state, action] + alpha * (
            reward + gamma * np.max(q_table[next_state]) - q_table[state, action]
        )

        state = next_state
        total_reward += reward

        if done:
            break

    epsilon = max(min_epsilon, epsilon * epsilon_decay)

    if (episode + 1) % 500 == 0:
        print(f"Episode {episode + 1}: Total Reward = {total_reward}")

print("\n Training complete!")

Episode 500: Total Reward = 1
Episode 1000: Total Reward = 1
Episode 1500: Total Reward = 1
Episode 2000: Total Reward = 0
Episode 2500: Total Reward = 0
Episode 3000: Total Reward = 1

 Training complete!


In [6]:
# Step 6: Test the trained agent
state, _ = test_env.reset()
done = False
total_reward = 0

print("\n Step-by-step agent movement:")
while not done:
    action = np.argmax(q_table[state])
    next_state, reward, terminated, truncated, _ = test_env.step(action)
    done = terminated or truncated
    print(f"Step: Agent moved to state {next_state}")
    total_reward += reward
    state = next_state

print("\n Agent reached the goal with total reward:", total_reward)


 Step-by-step agent movement:
Step: Agent moved to state 1
Step: Agent moved to state 2
Step: Agent moved to state 6
Step: Agent moved to state 10
Step: Agent moved to state 14
Step: Agent moved to state 15

 Agent reached the goal with total reward: 1
