In [None]:
# Install versi stabil untuk numpy dan gym
!pip install numpy==1.23.5 gym==0.26.2 --quiet


In [None]:
import gym
import numpy as np
import random
import matplotlib.pyplot as plt

# Inisialisasi environment
env = gym.make("FrozenLake-v1", is_slippery=False)

# Inisialisasi Q-table
q_table = np.zeros([env.observation_space.n, env.action_space.n])

# Hyperparameters
alpha = 0.8      # Learning rate
gamma = 0.95     # Discount factor
epsilon = 0.1    # Exploration rate
episodes = 1000  # Jumlah episode

print(f"Jumlah State: {env.observation_space.n}")
print(f"Jumlah Aksi: {env.action_space.n}")


In [None]:
rewards = []

for episode in range(episodes):
    try:
        state, _ = env.reset()
    except:
        state = env.reset()

    total_rewards = 0
    done = False

    while not done:
        # Pilih aksi: eksplorasi atau eksploitasi
        if random.uniform(0, 1) < epsilon:
            action = env.action_space.sample()
        else:
            action = np.argmax(q_table[state])

        try:
            next_state, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
        except:
            next_state, reward, done, _ = env.step(action)

        # Update Q-table
        old_value = q_table[state, action]
        next_max = np.max(q_table[next_state])
        q_table[state, action] = old_value + alpha * (reward + gamma * next_max - old_value)

        state = next_state
        total_rewards += reward

    rewards.append(total_rewards)


In [None]:
print("Q-table setelah training:")
print(q_table)

print(f"Rata-rata reward setelah training: {sum(rewards)/episodes}")

chunks = np.array_split(rewards, 10)
avg_rewards = [np.sum(chunk) for chunk in chunks]

plt.plot(avg_rewards)
plt.xlabel("Episode Group (x100)")
plt.ylabel("Total Reward")
plt.title("Performa Agent per 100 Episode")
plt.grid(True)
plt.show()


In [None]:
try:
    state, _ = env.reset()
except:
    state = env.reset()

env.render()

done = False
while not done:
    action = np.argmax(q_table[state])
    try:
        state, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated
    except:
        state, reward, done, _ = env.step(action)
    env.render()
