In [1]:
import gymnasium as gym
import numpy as np
import torch as th
import matplotlib.pyplot as plt

from stable_baselines3 import DQN 
from stable_baselines3.common.evaluation import evaluate_policy

In [2]:
env = gym.make("LunarLander-v2", render_mode="human") 

In [3]:
tensorboard_log = "data/tb/"

In [4]:
dqn_model = DQN(
    "MlpPolicy", 
    env, 
    verbose=1,
    train_freq=4,
    gradient_steps=-1,
    gamma=0.99, 
    exploration_fraction=0.12,
    exploration_final_eps=0.1,
    target_update_interval=250,
    learning_starts=0,
    buffer_size=50000, 
    batch_size=128,
    learning_rate=6.4e-4, 
    policy_kwargs=dict(net_arch=[256, 256]),
    tensorboard_log=tensorboard_log, 
    seed=2, 
)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [5]:
mean_reward, std_reward = evaluate_policy(
    dqn_model,
    dqn_model.get_env(),
    deterministic=True, 
    n_eval_episodes=10,  # Adjust the number of evaluation episodes
)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}") 


mean_reward:-575.40 +/- 147.49


In [6]:
dqn_model.learn(int(1.2e4), log_interval=10) #learning for model

Logging to data/tb/DQN_5
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 211      |
|    ep_rew_mean      | -142     |
|    exploration_rate | 0.1      |
| time/               |          |
|    episodes         | 10       |
|    fps              | 39       |
|    time_elapsed     | 53       |
|    total_timesteps  | 2112     |
| train/              |          |
|    learning_rate    | 0.00064  |
|    loss             | 1.03     |
|    n_updates        | 2108     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 477      |
|    ep_rew_mean      | -84.2    |
|    exploration_rate | 0.1      |
| time/               |          |
|    episodes         | 20       |
|    fps              | 39       |
|    time_elapsed     | 242      |
|    total_timesteps  | 9547     |
| train/              |          |
|    learning_rate    | 0.00064  |
|    loss             | 0.378 

<stable_baselines3.dqn.dqn.DQN at 0x1d52b1514d0>

In [23]:
mean_reward, std_reward = evaluate_policy(dqn_model, dqn_model.get_env(), deterministic=True, n_eval_episodes=20)

print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

mean_reward:-57.57 +/- 16.10


In [24]:
import os
os.system("Xvfb :1 -screen 0 1024x768x24 &") 
os.environ['DISPLAY'] = ':1' 

In [25]:
import base64
from pathlib import Path

from IPython import display as ipythondisplay


def show_videos(video_path="", prefix=""):
    html = []
    for mp4 in Path(video_path).glob("{}*.mp4".format(prefix)):
        video_b64 = base64.b64encode(mp4.read_bytes())
        html.append(
            """<video alt="{}" autoplay 
                    loop controls style="height: 400px;">
                    <source src="data:video/mp4;base64,{}" type="video/mp4" />
                </video>""".format(
                mp4, video_b64.decode("ascii")
            )
        )
    ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))

In [26]:
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv


def record_video(
    env_id,
    model,
    video_length=500,
    prefix="",
    video_folder="videos/",
):
    """
    :param env_id: (str)
    :param model: (RL model)
    :param video_length: (int)
    :param prefix: (str)
    :param video_folder: (str)
    """
    eval_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])
    # Start the video at step=0 and record 500 steps
    eval_env = VecVideoRecorder(
        eval_env,
        video_folder=video_folder,
        record_video_trigger=lambda step: step == 0,
        video_length=video_length,
        name_prefix=prefix,
    )

    obs = eval_env.reset()
    for _ in range(video_length):
        action, _ = model.predict(obs, deterministic=False)
        obs, _, _, _ = eval_env.step(action)

    # Close the video recorder
    eval_env.close()

In [27]:
record_video("LunarLander-v2", dqn_model, video_length=500, prefix="dqn-lunarlander")

Saving video to c:\Users\KIIT0001\Desktop\videos\dqn-lunarlander-step-0-to-step-500.mp4
Moviepy - Building video c:\Users\KIIT0001\Desktop\videos\dqn-lunarlander-step-0-to-step-500.mp4.
Moviepy - Writing video c:\Users\KIIT0001\Desktop\videos\dqn-lunarlander-step-0-to-step-500.mp4



                                                               

Moviepy - Done !
Moviepy - video ready c:\Users\KIIT0001\Desktop\videos\dqn-lunarlander-step-0-to-step-500.mp4


In [28]:
show_videos("videos", prefix="dqn-lunarlander")