In [5]:
import gymnasium as gym
import numpy as np


In [3]:
env = gym.make("LunarLander-v2", render_mode="human")
observation, info = env.reset(seed=2024)

for _ in range(1000):
    action = env.action_space.sample()
    observation, reward, terminated, truncated, info = env.step(action)

    if terminated or truncated:
        observation, info = env.reset()

env.close()

2024-09-17 00:39:49.575 python[31071:859018] +[IMKClient subclass]: chose IMKClient_Legacy
2024-09-17 00:39:49.575 python[31071:859018] +[IMKInputSession subclass]: chose IMKInputSession_Legacy


In [4]:
env = gym.make("LunarLander-v2", render_mode = "human")

alpha = 0.1
gamma = 0.99
epsilon = 1.0
epsilon_decay = 0.995
epsilon_min = 0.01
episodes = 1000
max_steps = 1000

In [6]:
state_bins = [np.linspace(-1.0, 1.0,10) for _ in range(env.observation_space.shape[0])]

n_bins = tuple(len(bins) + 1 for bins in state_bins)
q_table = np.zeros(n_bins + (env.action_space.n,))

def discretize_state(state):
    return tuple(np.digitize(state[i], state_bins[i]) for i in range(len(state)))

In [7]:
for episode in range(episodes):
    state, _ = env.reset(seed=2024)
    state = discretize_state(state)
    total_reward = 0

    for step in range(max_steps):

        if np.random.uniform(0,1) < epsilon:
            action = env.action_space.sample()
        else:
            action = np.argmax(q_table[state])

        next_state, reward, terminated, truncated, _ = env.step(action)
        next_state = discretize_state(next_state)

        old_value = q_table[state][action]
        next_max = np.max(q_table[next_state])


        new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
        q_table[state][action] = new_value

        state = next_state
        total_reward += reward

        if terminated or truncated:
            break
    
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay
    
    print(f"Episode {episode + 1 }, Total Reward: {total_reward}")

env.close()

Episode 1, Total Reward: -304.0727702611291
Episode 2, Total Reward: -90.66231707966797
Episode 3, Total Reward: -302.8986187053682
Episode 4, Total Reward: -246.71466402567629
Episode 5, Total Reward: -279.96059594209214
Episode 6, Total Reward: -233.62314772093882
Episode 7, Total Reward: -140.11704381040033
Episode 8, Total Reward: -334.5609340096206
Episode 9, Total Reward: -143.5386580618478
Episode 10, Total Reward: -350.21896736385867
Episode 11, Total Reward: -170.2070301187738
Episode 12, Total Reward: -148.13195535269708
Episode 13, Total Reward: -109.01201488864034
Episode 14, Total Reward: -196.1825015179188
Episode 15, Total Reward: -300.5628584251218
Episode 16, Total Reward: -246.5707414645205
Episode 17, Total Reward: -228.3795193505042
Episode 18, Total Reward: -193.62586605106742
Episode 19, Total Reward: -363.82086648905613
Episode 20, Total Reward: -147.3674850946677
Episode 21, Total Reward: -324.88043136398994
Episode 22, Total Reward: -158.24160541331884
Episode 