# RL for FrozenLake @ OpenAI gym

importing requisite modules

In [None]:
import numpy as np
import gymnasium as gym

initialising gym environment

In [None]:
env = gym.make("FrozenLake-v1", map_name="4x4", is_slippery=False)
print('Action Space:', env.action_space)
print('Observation Space:', env.observation_space)

### TABULAR Q-LEARNING

initialising Q-table

In [None]:
Qtable = np.zeros((env.observation_space.n, env.action_space.n))
Qtable.shape

Q-Learning hyperparameters

In [None]:
EPISODES  = 20000
rewardLog = np.zeros(EPISODES)

ALPHA   = 0.1
GAMMA   = 1

EPS_MAX = 0.42
EPS_MIN = 0.05

training

In [None]:
for episode in range(EPISODES):
    done = False
    state, info = env.reset(seed = 42)
    
    # exploration rate decay
    EPSILON = EPS_MAX - (EPS_MAX - EPS_MIN)*(episode/EPISODES)

    while not done:
        # epsilon-greedy agent
        if np.random.uniform() < EPSILON:
            action = env.action_space.sample()
        else:
            action = np.argmax(Qtable[state, :])

        # take action
        new_state, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated

        # Q-learning
        maxQnext = np.max(Qtable[new_state, :])
        Qtable[state, action] += ALPHA*(reward + GAMMA*maxQnext - Qtable[state, action])

        # updates
        state = new_state
        rewardLog[episode] += reward

env.close()

batch mean reward over training

In [None]:
N = EPISODES // 10

for k in range(EPISODES//N):
    print(N*(k+1), np.mean(rewardLog[N*k:N*(k+1)]))

test accuracy of final agent

In [None]:
TEST = 100
test = gym.make("FrozenLake-v1", render_mode='human')

for k in range(TEST):
    done = False
    state, info = test.reset(seed = 69)
    
    while not done:
        # greedy agent
        action = np.argmax(Qtable[state, :])

        # take action
        state, reward, terminated, truncated, info = test.step(action)
        done = terminated or truncated

test.close()