In [1]:
%pip install -qr https://raw.githubusercontent.com/anton-dergunov/hugging-face-deep-rl/main/requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [2]:
import gymnasium as gym
from gymnasium.wrappers import RecordVideo
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
import torch
import cv2
from IPython.display import Video
import imageio
import os
import shutil

In [3]:
import warnings

# Ignore the specific UserWarning from Pygame about pkg_resources
warnings.filterwarnings("ignore", category=UserWarning, module='pygame')
# Ignore the specific DeprecationWarning from jupyter_client
warnings.filterwarnings("ignore", category=DeprecationWarning, module='jupyter_client')

In [4]:
MODEL_PATH = "models/ppo_cartpole"
VIDEO_PATH = "videos/cartpole_demo.mp4"

### Create Environment

In [5]:
# Create a vectorized environment (parallel environments)
# "CartPole-v1" is a simple classic control task
# n_envs=4 means we run 4 copies of the environment in parallel
vec_env = make_vec_env("CartPole-v1", n_envs=4)

### Define & Train PPO Agent

In [6]:
# Initialize PPO agent with Multi-Layer Perceptron policy
# verbose=1 shows training logs
# Using CPU, because otherwise I would get this warning:
# UserWarning: You are trying to run PPO on the GPU, but it is
# primarily intended to run on the CPU when not using a CNN policy...
model = PPO("MlpPolicy", vec_env, verbose=1, device="cpu")

# Train the agent for 25,000 timesteps
model.learn(total_timesteps=25000)

# Save the trained model
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
model.save(MODEL_PATH)

Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 22.7     |
|    ep_rew_mean     | 22.7     |
| time/              |          |
|    fps             | 21425    |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 8192     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 33.2        |
|    ep_rew_mean          | 33.2        |
| time/                   |             |
|    fps                  | 7808        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.013723278 |
|    clip_fraction        | 0.187       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.682      |
|    explained_variance   | -0.00163    |
|    learning

### Load Trained Model

In [7]:
# Load the saved model
model = PPO.load(MODEL_PATH)

### Run Trained Agent

In [8]:
def record_cartpole_video(model, video_path, steps=1000):
    # Set the video driver to a dummy one to prevent window creation
    os.environ["SDL_VIDEODRIVER"] = "dummy"

    # Create a temporary folder for the RecordVideo wrapper's output
    TEMP_VIDEOS = "temp_videos"
    
    # Create env with video recording enabled
    env = gym.make("CartPole-v1", render_mode="rgb_array")
    
    try:
        frames = []
        obs, info = env.reset()
        episode, step = 0, 0

        for _ in range(steps):
            action, _ = model.predict(obs)
            obs, reward, terminated, truncated, info = env.step(action)
            
            # Render the frame from the environment
            frame = env.render()

            # --- Overlay text (small, anti-aliased) ---
            cv2.putText(frame, f"Ep: {episode}", (10, 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(frame, f"Step: {step}", (10, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)

            frames.append(frame)
            step += 1

            if terminated or truncated:
                episode += 1
                step = 0
                obs, info = env.reset()
        
        # Save the collected frames to a single video file
        os.makedirs(os.path.dirname(video_path), exist_ok=True)
        imageio.mimsave(video_path, frames, fps=30, macro_block_size=None)
        print(f"Saved video to {video_path}")

    finally:
        # Ensure the environment is closed to release all resources
        env.close()

In [9]:
# Save to video
os.makedirs(os.path.dirname(VIDEO_PATH), exist_ok=True)
record_cartpole_video(model, video_path=VIDEO_PATH, steps=1000)

Saved video to videos/cartpole_demo.mp4


In [11]:
Video(VIDEO_PATH, embed=True)