# Q-Learning pada FrozenLake-v1 (8x8, slippery=False)

In [None]:

!pip install gym==0.25.2 numpy==1.23.5 --quiet


In [None]:
import gym
import numpy as np
import random
from tqdm import tqdm

In [None]:
# Buat environment
env = gym.make("FrozenLake-v1", map_name="8x8", is_slippery=False)

In [None]:
# Inisialisasi Q-table
action_space_size = env.action_space.n
state_space_size = env.observation_space.n
q_table = np.zeros((state_space_size, action_space_size))

In [None]:
# Parameter Q-learning
alpha = 0.8      # learning rate
gamma = 0.95     # discount factor
epsilon = 0.2    # eksplorasi
episodes = 2000
rewards = []

In [None]:
# Training loop
success_count = 0
for episode in tqdm(range(episodes)):
    state = env.reset()
    if isinstance(state, tuple):
        state = state[0]
    
    total_rewards = 0
    done = False
    
    while not done:
        if random.uniform(0, 1) < epsilon:
            action = env.action_space.sample()
        else:
            action = np.argmax(q_table[state])

        next_state, reward, done, truncated, info = env.step(action)
        if isinstance(next_state, tuple):
            next_state = next_state[0]

        old_value = q_table[state, action]
        next_max = np.max(q_table[next_state])
        q_table[state, action] = old_value + alpha * (reward + gamma * next_max - old_value)

        state = next_state
        total_rewards += reward

        if reward == 1.0:
            success_count += 1

    rewards.append(total_rewards)

In [None]:
print("Q-table setelah training:")
print(q_table)
print("\nRata-rata reward setelah training:", np.mean(rewards))
print("Jumlah episode yang berhasil mencapai goal:", success_count)

**Tips:** Coba ganti `is_slippery=True` untuk simulasi yang lebih sulit.