In [1]:
import gym
import numpy as np

class QLearningAgent:
    def __init__(self, env, learning_rate=0.1, discount_factor=0.99, epsilon=0.1):
        self.env = env
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.q_table = np.zeros((env.observation_space.n, env.action_space.n))

    def choose_action(self, state):
        if np.random.uniform(0, 1) < self.epsilon:
            return self.env.action_space.sample()  # Explore action space
        else:
            return np.argmax(self.q_table[state, :])  # Exploit learned values

    def update_q_table(self, state, action, reward, next_state):
        old_value = self.q_table[state, action]
        next_max = np.max(self.q_table[next_state, :])
        new_value = (1 - self.learning_rate) * old_value + self.learning_rate * (reward + self.discount_factor * next_max)
        self.q_table[state, action] = new_value

    def train(self, num_episodes=1000):
        rewards = []
        for episode in range(num_episodes):
            state = self.env.reset()
            total_reward = 0
            done = False
            while not done:
                action = self.choose_action(state)
                next_state, reward, done, _ = self.env.step(action)
                self.update_q_table(state, action, reward, next_state)
                total_reward += reward
                state = next_state
            rewards.append(total_reward)
            if (episode + 1) % 100 == 0:
                print(f"Episode {episode + 1}/{num_episodes}, Total Reward: {total_reward}")
        return rewards

# Setup the FrozenLake environment
env = gym.make('FrozenLake-v0')

# Initialize Q-Learning Agent
ql_agent = QLearningAgent(env)

# Train the agent
episode_rewards = ql_agent.train(num_episodes=1000)

# Evaluate the effectiveness of Q-Learning
avg_reward = np.mean(episode_rewards)
print(f"Average Reward: {avg_reward}")

# Close the environment
env.close()


  logger.warn(


DeprecatedEnv: Environment version v0 for `FrozenLake` is deprecated. Please use `FrozenLake-v1` instead.