In [1]:
import numpy as np
import gym

# 1️⃣ Create the environment
env = gym.make("FrozenLake-v1", is_slippery=False)  # Set `is_slippery=False` for simpler learning

# 2️⃣ Initialize Q-Table
state_size = env.observation_space.n  # 16 states in a 4x4 grid
action_size = env.action_space.n  # 4 possible actions (Left, Down, Right, Up)
Q_table = np.zeros((state_size, action_size))

# 3️⃣ Training parameters
learning_rate = 0.8
discount_factor = 0.95
episodes = 1000  # Runs faster with 1000 episodes
exploration_prob = 1.0
exploration_decay = 0.995
min_exploration = 0.01

# 4️⃣ Training loop
for episode in range(episodes):
    state, _ = env.reset()
    done = False

    while not done:
        # Choose action (Explore or Exploit)
        if np.random.rand() < exploration_prob:
            action = env.action_space.sample()  # Random action
        else:
            action = np.argmax(Q_table[state, :])  # Best learned action

        # Perform action
        new_state, reward, done, _, _ = env.step(action)

        # Update Q-Table
        Q_table[state, action] = (1 - learning_rate) * Q_table[state, action] + \
                                 learning_rate * (reward + discount_factor * np.max(Q_table[new_state, :]))

        state = new_state  # Move to next state

    # Reduce exploration probability
    exploration_prob = max(min_exploration, exploration_prob * exploration_decay)

print("✅ AI Training Complete!")

# 5️⃣ AI Testing: Play 5 Games
for game in range(5):
    state, _ = env.reset()
    done = False
    print(f"\n🎮 **Game {game+1}**")

    while not done:
        action = np.argmax(Q_table[state, :])  # Choose best action
        state, _, done, _, _ = env.step(action)
        env.render()  # Show movement on the grid

print("🚀 AI Successfully Crosses the Frozen Lake!")
env.close()


ModuleNotFoundError: No module named 'gym'