In [1]:
!pip install wheel setuptools pip --upgrade

Collecting wheel
  Downloading wheel-0.44.0-py3-none-any.whl.metadata (2.3 kB)
Collecting setuptools
  Using cached setuptools-72.1.0-py3-none-any.whl.metadata (6.6 kB)
Collecting pip
  Using cached pip-24.2-py3-none-any.whl.metadata (3.6 kB)
Downloading wheel-0.44.0-py3-none-any.whl (67 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.1/67.1 kB[0m [31m744.1 kB/s[0m eta [36m0:00:00[0m
[?25hUsing cached setuptools-72.1.0-py3-none-any.whl (2.3 MB)
Using cached pip-24.2-py3-none-any.whl (1.8 MB)
Installing collected packages: wheel, setuptools, pip
  Attempting uninstall: wheel
    Found existing installation: wheel 0.43.0
    Uninstalling wheel-0.43.0:
      Successfully uninstalled wheel-0.43.0
  Attempting uninstall: setuptools
    Found existing installation: setuptools 71.0.4
    Uninstalling setuptools-71.0.4:
      Successfully uninstalled setuptools-71.0.4
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:

In [2]:
!pip install swig
!pip install gymnasium[box2d]

Collecting swig
  Using cached swig-4.2.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (3.6 kB)
Using cached swig-4.2.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.9 MB)
Installing collected packages: swig
Successfully installed swig-4.2.1
Collecting gymnasium[box2d]
  Using cached gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium[box2d])
  Using cached Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Collecting box2d-py==2.3.5 (from gymnasium[box2d])
  Using cached box2d-py-2.3.5.tar.gz (374 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Using cached Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Using cached gymnasium-0.29.1-py3-none-any.whl (953 kB)
Building wheels for collected packages: box2d-py
  Building wheel for box2d-py (setup.py) ... [?25l[?25hdone
  Created wheel for box2d-py: filename=box2d_py-2.3.5-cp310-cp310-linux_x86_64.whl size=2376105 sha256=27

In [12]:
import gymnasium as gym
import random

env = gym.make("LunarLander-v2")

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

In [14]:
class mlp(nn.Module):
  def __init__(self, obs, actions):
    super(mlp, self).__init__()
    self.network = nn.Sequential(
        nn.Linear(obs, 64),
        nn.ReLU(),
        nn.Linear(64, 64),
        nn.ReLU(),
        nn.Linear(64, actions)
    )

  def forward(self, x):
    return self.network(x)

In [13]:
def update_target(dqn, target, tau):
   for target_param, local_param in zip(target.parameters(), dqn.parameters()):
    target_param.data.copy_(tau*local_param.data + (1.0-tau)*target_param.data)

In [29]:
eps = 0.99
decay = 0.995
gamma = 0.99
min_eps = 0.1
lr = 5e-4
batch_size = 64
tau = 1e-3
qn = mlp(env.observation_space.shape[0],env.action_space.n)
target = mlp(env.observation_space.shape[0], env.action_space.n)


def get_action(eps, obs):
  n = random.random()
  if n < eps:
    return random.randint(0,env.action_space.n-1)
  with torch.no_grad():
    return torch.argmax(qn(obs)).item()

criterion = nn.MSELoss()
optimizer = optim.Adam(qn.parameters(), lr = lr)


states = []
next_states = []
actions = []
rewards = []
dones = []
lengs = []


tot = 0
for ep in range(1000):
  done = False
  obs, _ = env.reset()
  lens = 0
  final_rew = 0
  learned = 0
  avg_loss = 0
  while not done:
    action = get_action(eps, torch.tensor(obs))
    next_obs, reward, terminated, truncated, info = env.step(action)
    final_rew+=reward
    ds = terminated or truncated

    states.append(obs)
    next_states.append(next_obs)
    rewards.append(reward)
    actions.append(action)
    dones.append(int(ds))

    if len(states) > 1e5:
      states = states[1:]
      next_states = next_states[1:]
      rewards = rewards[1:]
      actions = actions[1:]
      dones = dones[1:]

    if batch_size < len(states) and tot % 4 == 0:
      inds = random.sample(range(len(states)),batch_size)
      b_states = torch.tensor([states[i] for i in inds]).float()
      b_n = torch.tensor([next_states[i] for i in inds]).float()
      b_actions = torch.tensor([actions[i] for i in inds]).reshape(-1,1)
      b_rewards = torch.tensor([rewards[i] for i in inds]).reshape(-1,1).float()
      b_dones = torch.tensor([dones[i] for i in inds]).reshape(-1,1)
      qpred = qn(b_states)
      qpred = qpred.gather(1, b_actions)
      with torch.no_grad():
        max_n,_ = target(b_n).max(dim=1)
        max_n = max_n.reshape(-1,1)

      tgt = b_rewards + gamma*max_n*(1-b_dones)
      optimizer.zero_grad()
      loss = criterion(qpred, tgt)
      avg_loss+=loss.item()
      loss.backward()
      optimizer.step()
      learned+=1
    if tot % 4 == 0:
      update_target(qn, target, tau)
    lens+=1
    tot+=1
    obs = next_obs
    done = terminated or truncated
  if ep > 5:
    eps = max(min_eps, eps * decay)
  lengs.append(lens)
  if learned > 0:
    print(f"Ep {ep}, avg_loss: {avg_loss/learned}, len: {lens}, eps: {eps}, reward:{final_rew}")
  else:
    print(f"Ep {ep}, len: {lens}, eps: {eps}, reward: {final_rew}")

Ep 0, avg_loss: 10.30353205544608, len: 92, eps: 0.99, reward:-385.6786321444884
Ep 1, avg_loss: 71.12707806922295, len: 148, eps: 0.99, reward:-250.1832127041262
Ep 2, avg_loss: 107.34732449849447, len: 57, eps: 0.99, reward:-112.59983062835974
Ep 3, avg_loss: 133.64620112745385, len: 79, eps: 0.99, reward:-110.74185756436248
Ep 4, avg_loss: 62.912769381816574, len: 104, eps: 0.99, reward:-289.13818554408294
Ep 5, avg_loss: 63.36906465240147, len: 90, eps: 0.99, reward:-364.74840899847203
Ep 6, avg_loss: 119.02765717225917, len: 70, eps: 0.98505, reward:-96.3873262431579
Ep 7, avg_loss: 113.11260259778877, len: 74, eps: 0.98012475, reward:-111.29663144083104
Ep 8, avg_loss: 75.23578062810395, len: 75, eps: 0.97522412625, reward:-73.65645751892849
Ep 9, avg_loss: 121.40046603783317, len: 92, eps: 0.97034800561875, reward:-341.8710265841736
Ep 10, avg_loss: 113.37330224778917, len: 74, eps: 0.9654962655906563, reward:-177.87145201014766
Ep 11, avg_loss: 115.03210774174443, len: 109, eps

KeyboardInterrupt: 

In [30]:

# https://stackoverflow.com/questions/77042526/how-to-record-and-save-video-of-gym-environment
import gymnasium as gym

###
# create a temporary variable with our env, which will use rgb_array as render mode. This mode is supported by the RecordVideo-Wrapper
tmp_env = gym.make("LunarLander-v2", render_mode="rgb_array")

# wrap the env in the record video
tmenv = gym.wrappers.RecordVideo(env=tmp_env,video_folder="/content/sample_data",name_prefix="test-video", episode_trigger=lambda x: x % 1 == 0)

# env reset for a fresh start
obs, info = tmenv.reset()

###
# Start the recorder
tmenv.start_video_recorder()


# AI logic
for _ in range(1000):
    action = get_action(0, torch.tensor(obs))

    obs, reward, terminated, truncated, _ = tmenv.step(action)

    tmenv.render()

    if terminated or truncated:
        observation, info = env.reset()

####
# Don't forget to close the video recorder before the env!
tmenv.close_video_recorder()

# Close the environment
tmenv.close()


  logger.warn(


Moviepy - Building video /content/sample_data/test-video-episode-0.mp4.
Moviepy - Writing video /content/sample_data/test-video-episode-0.mp4





Moviepy - Done !
Moviepy - video ready /content/sample_data/test-video-episode-0.mp4
Moviepy - Building video /content/sample_data/test-video-episode-0.mp4.
Moviepy - Writing video /content/sample_data/test-video-episode-0.mp4





Moviepy - Done !
Moviepy - video ready /content/sample_data/test-video-episode-0.mp4


In [31]:
from IPython.display import HTML
from base64 import b64encode
import os

# Input video path
save_path = "/content/sample_data/test-video-episode-0.mp4"

# Compressed video path
compressed_path = "/content/videos/result_compressed.mp4"

os.system(f"ffmpeg -i {save_path} -vcodec libx264 {compressed_path}")

# Show video
mp4 = open(compressed_path,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)