<a href="https://colab.research.google.com/github/gerryfrank10/RL/blob/main/repeat_frozen_lake.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import gymnasium as gym
import tqdm

In [3]:
env = gym.make('FrozenLake-v1', render_mode='rgb_array', map_name="8x8", is_slippery=False) # What is render_mode

The observation and action numbers

In [4]:
env.observation_space.n, env.action_space.n

(np.int64(64), np.int64(4))

The action and observation random sample

In [5]:
env.observation_space.sample(), env.action_space.sample()

(np.int64(36), np.int64(0))

In [6]:
# Using Q learning approach to solve this
state_space = env.observation_space.n
action_space = env.action_space.n
Q_table = np.zeros((state_space, action_space))

In [7]:
epsilon_greedy = lambda state, epsilon: np.argmax(Q_table[state][:]) if np.random.uniform(0,1) > epsilon else env.action_space.sample()

In [8]:
n_episodes = 10000
learning_rate = 0.7
n_eval_episodes = 100
max_steps = 99
gamma = 0.95
eval_seed = []
max_epsilon = 1.0
min_epsilon = 0.05
decay_rate = 0.0005

In [9]:
def train(n_episodes, learning_rate, max_steps, gamma, epsilon_greedy, Qtable):
  for episode in tqdm.tqdm(range(n_episodes)):
    epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay_rate * episode)
    state, info = env.reset()
    step = 0
    terminated = False
    truncated = False
    for step in range(max_steps):
      action = epsilon_greedy(state, epsilon)
      new_state, reward, terminated, truncated, info = env.step(action)
      Qtable[state][action] = Qtable[state][action] + learning_rate * (reward + gamma * np.max(Qtable[new_state]) - Qtable[state][action])
      if terminated or truncated: break
      state = new_state
  return Qtable

In [10]:
Qtable_frozenlake = train(n_episodes=20000, gamma=0.95, learning_rate=0.01, max_steps=99, epsilon_greedy=epsilon_greedy, Qtable=Q_table)

100%|██████████| 20000/20000 [01:10<00:00, 282.98it/s]


## Record Video

In [11]:
import imageio
def record_video(env, Qtable, out_directory, fps=1):
  """
  """
  images = []
  terminated = False
  truncated = False
  state, info = env.reset(seed=np.random.randint(0,500))
  img = env.render()
  images.append(img)
  while not terminated or truncated:
    action = np.argmax(Qtable[state][:])
    state, reward, terminated, truncated, info = env.step(action)
    img = env.render()
    images.append(img)
  imageio.mimsave(out_directory, [np.array(img) for i, img in enumerate(images)], fps=fps)

In [None]:
video_path = 'repeat-frozen2.mp4'
record_video(env, Qtable_frozenlake, video_path, fps=1)