In [3]:
import gymnasium as gym
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from stable_baselines3 import DQN
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecMonitor
from stable_baselines3.common.callbacks import (
    EvalCallback,
    StopTrainingOnRewardThreshold,
    BaseCallback,
)
from stable_baselines3.common.callbacks import ProgressBarCallback
from stable_baselines3.common.evaluation import evaluate_policy

In [6]:
# Create the LunarLander-v3 environment
env = gym.make(
    "LunarLander-v3",
    # continuous=False,
    # gravity=-10.0,
    # enable_wind=False,
    # wind_power=15.0,
    # turbulence_power=1.5,
    render_mode="rgb_array"
)

In [3]:
stop_callback = StopTrainingOnRewardThreshold(reward_threshold=200, verbose=1)

eval_callback = EvalCallback(
    env,
    best_model_save_path="../logs/exercise_2/dqn/",
    log_path="../logs/exercise_2/dqn/",
    eval_freq=5_000,
    deterministic=True,
    render=False,
    n_eval_episodes=50,
    callback_on_new_best=stop_callback,
)

In [4]:
# Create DQN model
model = DQN(
    "MlpPolicy",
    env,
    learning_rate=0.00021924459122944128,
    buffer_size=10_000,
    batch_size=128,
    gamma=0.9594449582014025,
    train_freq=1,
    exploration_fraction=0.24785493712044818,
    exploration_final_eps=0.04073005273312161,
    policy_kwargs = dict(net_arch=[64, 64]),
    seed=42,
    tensorboard_log="../logs/exercise_2/dqn/dqn_tensorboard/",
    device="cpu",
    verbose=0,
)

In [5]:
# Train model on the environment
TRAINING_TIMESTEPS = 500_000
model.learn(total_timesteps=TRAINING_TIMESTEPS, callback=[eval_callback, ProgressBarCallback()])

Output()

<stable_baselines3.dqn.dqn.DQN at 0x731f337746e0>

In [4]:
model = DQN.load("../logs/exercise_2/dqn/best_model.zip")

In [7]:
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=50)
print(f"Mean reward: {mean_reward} +/- {std_reward}")



Mean reward: 52.321416877684285 +/- 113.03638259129633


In [8]:
MAX_STEPS = 1_000

observation, info = env.reset(seed=42)
frames = []
step_count = 0

# for step in range(MAX_STEPS):
while True:
    frame = env.render()
    frames.append(frame)

    action, _ = model.predict(observation, deterministic=True)
    observation, reward, terminated, truncated, info = env.step(action)
    step_count += 1

    if terminated or truncated:
        print(f"Episode finished after {step_count} steps ({"truncated" if truncated else "terminated"})")
        break

env.close()

print(f"Final Step: {step_count}")
print(f"Number of Frames: {len(frames)}")

Episode finished after 1000 steps (truncated)
Final Step: 1000
Number of Frames: 1000


In [9]:
from IPython.display import HTML
from base64 import b64encode
import os

# Create a video from the frames
video_filename = "../videos/lunarlander_dqn.mp4"
compressed_path = "../videos/lunarlander_dqn_compressed.mp4"
height, width, _ = frames[0].shape

fourcc = cv2.VideoWriter_fourcc(*"mp4v")
video = cv2.VideoWriter(video_filename, fourcc, 30.0, (width, height))

for frame in frames:
    video.write(
        cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    )
video.release()

print(f"Video guardado como {video_filename}")

os.system(f"rm {compressed_path}")
# Compressed video path
os.system(f"ffmpeg -i {video_filename} -vcodec libx264 {compressed_path}")
os.system(f"rm {video_filename}")
os.system(f"mv {compressed_path} {video_filename}")

# Show video
mp4 = open(video_filename, "rb").read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML(
    """
<video width=800 controls>
      <source src="%s" type="video/mp4">
</video>"""
    % data_url
)

Video guardado como ../videos/lunarlander_dqn.mp4


rm: cannot remove '../videos/lunarlander_dqn_compressed.mp4': No such file or directory
ffmpeg version n7.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 14.2.1 (GCC) 20250207
  configuration: --prefix=/usr --disable-debug --disable-static --disable-stripping --enable-amf --enable-avisynth --enable-cuda-llvm --enable-lto --enable-fontconfig --enable-frei0r --enable-gmp --enable-gnutls --enable-gpl --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libdav1d --enable-libdrm --enable-libdvdnav --enable-libdvdread --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgsm --enable-libharfbuzz --enable-libiec61883 --enable-libjack --enable-libjxl --enable-libmodplug --enable-libmp3lame --enable-libopencore_amrnb --enable-libopencore_amrwb --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libplacebo --enable-libpulse --enable-librav1e --enable-librsvg --enable-librubberband --enable-libsnappy --enab