# Reinforcement Learning For FROZEN LAKE and Taxi

<img src="https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit3/envs.gif" alt="Environments"/>

In [3]:
import gymnasium as gym
import numpy as np
import tqdm
import os
import random
import pickle


In [6]:
# Environment
env = gym.make('FrozenLake-v1', render_mode='rgb_array', map_name='4x4', is_slippery=False)

In [9]:
print(f'Observation space: {env.observation_space.n}')
print(f'Sample Observation : {env.observation_space.sample()}')

Observation space: 16
Sample Observation : 0


In [8]:
print(f'Action Space: {env.action_space.n}')
print(f'Action Sample : {env.action_space.sample()}')

Action Space: 4
Action Sample : 0


In [10]:
# Define the variables
state_space = env.observation_space.n
action_space = env.action_space.n
Qtable = np.zeros((state_space, action_space))

In [12]:
# Action using Epsilon greedy
epsilon_greedy = lambda state, epsilon: np.argmax(Qtable[state][:]) if np.random.uniform(0,1) > epsilon else env.action_space.sample()

In [13]:
n_episodes = 10000
learning_rate = 0.7
n_eval_episodes = 100
max_steps = 99
gamma = 0.95
eval_seed = []
max_epsilon = 1.0
min_epsilon = 0.05
decay_rate = 0.0005

In [14]:
def train(n_episodes, learning_rate, max_steps, gamma, epsilon_greedy, Qtable):
    for episode in tqdm.tqdm(range(n_episodes)):
        epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay_rate * episode)
        state, info = env.reset()
        step = 0
        terminated = False
        truncated = False
        for step in range(max_steps):
            action = epsilon_greedy(state, epsilon)
            new_state, reward, terminated, truncated, info = env.step(action)
            Qtable[state][action]  = Qtable[state][action] + learning_rate * reward + gamma * np.max(Qtable[new_state]) - Qtable[state][action]
            if terminated or truncated: break
            state = new_state
    return Qtable

In [15]:
Qtable_frozenlake = train(n_episodes, learning_rate, max_steps, gamma, epsilon_greedy, Qtable)

100%|██████████| 10000/10000 [00:00<00:00, 21339.65it/s]


In [16]:
Qtable_frozenlake

array([[0.51456432, 0.54164666, 0.54164666, 0.51456432],
       [0.51456432, 0.        , 0.57015437, 0.54164666],
       [0.54164666, 0.6001625 , 0.54164666, 0.57015437],
       [0.57015437, 0.        , 0.54164666, 0.54164666],
       [0.54164666, 0.57015437, 0.        , 0.51456432],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.63175   , 0.        , 0.57015437],
       [0.        , 0.        , 0.        , 0.        ],
       [0.57015437, 0.        , 0.6001625 , 0.54164666],
       [0.57015437, 0.63175   , 0.63175   , 0.        ],
       [0.6001625 , 0.665     , 0.        , 0.6001625 ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.63175   , 0.665     , 0.6001625 ],
       [0.63175   , 0.665     , 0.7       , 0.63175   ],
       [0.        , 0.        , 0.        , 0.        ]])

In [24]:
import imageio
def record_video(env, Qtable, out_directory, fps=1):
    images = []
    terminated = False
    truncated = False
    state, info = env.reset(seed=random.randint(0,500))
    img = env.render()
    images.append(img)
    while not terminated or truncated:
        action = np.argmax(Qtable[state][:])
        state, reward, terminated, trunctated, info = env.step(action)
        img = env.render()
        images.append(img)
    imageio.mimsave(out_directory, [np.array(img) for i, img in enumerate(images)], fps=fps)

In [25]:
video_path = 'replay.mp4'
record_video(env, Qtable_frozenlake, video_path,fps=1)