# Introduction to RL Agent

This notebook introduces the basics of reinforcement learning agents.

## Setup

Install required dependencies for the RL environment.

In [None]:
!sudo apt install swig cmake
!sudo apt-get update
!sudo apt-get install -y python3-opengl
!sudo apt install ffmpeg
!sudo apt install xvfb

In [1]:
!pip install -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt
!pip install pyvirtualdisplay

Collecting stable-baselines3==2.0.0a5 (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1))
  Using cached stable_baselines3-2.0.0a5-py3-none-any.whl.metadata (5.3 kB)
Collecting swig (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 2))
  Using cached swig-4.3.1.post0-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (3.5 kB)
Collecting huggingface_sb3 (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 4))
  Using cached huggingface_sb3-3.0-py3-none-any.whl.metadata (6.3 kB)
Collecting gymnasium==0.28.1 (from stable-baselines3==2.0.0a5->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1))
  Using cached gymnasium-0.28.1-py3-none-any.whl.metadata (9.2 kB)
Collecting torch>=1.11 (from stable-baselines3==2.0

In [5]:
# RL starts here 

## Import Libraries

In [None]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import numpy as np

## Create Environment

In [None]:
# Create the LunarLander-v2 environment
env = gym.make("LunarLander-v2")
print("Observation space:", env.observation_space)
print("Action space:", env.action_space)

## Define the PPO Model

In [None]:
# Create the PPO model
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    learning_rate=3e-4,
    n_steps=2048,
    batch_size=64,
    n_epochs=10,
    gamma=0.99,
    gae_lambda=0.95,
    clip_range=0.2,
    tensorboard_log="./ppo_lunarlander_tensorboard/"
)

print("Model created successfully!")

## Train the Model

In [None]:
# Train the agent for 1 million timesteps
total_timesteps = 1_000_000
model.learn(total_timesteps=total_timesteps)

# Save the trained model
model.save("ppo_lunarlander")
print("Training complete! Model saved.")

## Evaluate the Trained Model

In [None]:
# Evaluate the trained agent
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10, deterministic=True)

print(f"Mean reward: {mean_reward:.2f} +/- {std_reward:.2f}")

## Visualize the Results

In [None]:
# Setup virtual display for rendering
from pyvirtualdisplay import Display
import matplotlib.pyplot as plt
from IPython import display as ipythondisplay

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

In [None]:
# Record a video of the trained agent
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv

def record_video(env_id, model, video_length=500, prefix='ppo-lunarlander'):
    """
    Record a video of the agent
    """
    eval_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])
    
    # Start recording
    eval_env = VecVideoRecorder(eval_env, f"videos/{prefix}",
                               record_video_trigger=lambda x: x == 0, video_length=video_length,
                               name_prefix=prefix)
    
    obs = eval_env.reset()
    for _ in range(video_length):
        action, _ = model.predict(obs, deterministic=True)
        obs, _, _, _ = eval_env.step(action)
    
    # Close the video recorder
    eval_env.close()

record_video('LunarLander-v2', model, video_length=1000)
print("Video recorded successfully!")

In [None]:
# Display the video
from base64 import b64encode
from IPython.display import HTML
import os
import glob

def show_videos(video_path='', prefix=''):
    """
    Displays videos in Jupyter notebook
    """
    html = []
    for mp4 in glob.glob(os.path.join(video_path, "*.mp4")):
        video_b64 = b64encode(open(mp4, "rb").read())
        html.append('''<video alt="{}" autoplay 
                      loop controls style="height: 400px;">
                      <source src="data:video/mp4;base64,{}" type="video/mp4" />
                 </video>'''.format(mp4, video_b64.decode('ascii')))
    ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))

show_videos('videos', 'ppo-lunarlander')

ModuleNotFoundError: No module named 'huggingface_sb3'