#Task 5



In [1]:
# Install SWIG, a tool used to connect C/C++ code with Python. It's often used in RL environments to enable efficient communication between Python and low-level implementations of algorithms
!pip install -q swig

# Install the gym library with the box2d environment, used for 2D physics-based simulation tasks
!pip install -q gym[box2d]

# Install the gym library with the Atari environment, used for training agents on Atari 2600 games
!pip install -q gym[atari]

# Install stable-baselines3 with extra dependencies (needed for various environments and features in the library), a set of RL algorithms implemented in PyTorch
!pip install stable-baselines3[extra]

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.4/374.4 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for box2d-py (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting stable-baselines3[extra]
  Downloading stable_baselines3-2.3.2-py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.3/182.3 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gymnasium<0.30,>=0.28.1 (from stable-baselines3[extra])
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [2]:
# Import the gymnasium library as gym, which provides various environments for developing and testing RL algorithms
import gymnasium as gym
import numpy as np

In [3]:
# Import PPO (Proximal Policy Optimization) algorithm from stable-baselines3. PPO is a policy-gradient method
from stable_baselines3 import PPO

  and should_run_async(code)


In [4]:
# Import the CnnPolicy policy class. CnnPolicy is a policy class that uses CNN for function approximation
from stable_baselines3.dqn import CnnPolicy

# Import make_vec_env utility function to create vectorized environments for parallel execution of multiple environment instances. This can speed up training by allowing multiple agents to interact with their environments simultaneously
from stable_baselines3.common.env_util import make_vec_env

  and should_run_async(code)


In [5]:
from stable_baselines3.common.env_util import make_atari_env

vec_env = make_atari_env("ALE/MsPacman-v5", n_envs=4)

model = PPO("CnnPolicy", vec_env, verbose=1)
model.learn(total_timesteps=100000)
model.save("ppo_Pacman")

Using cuda device
Wrapping the env in a VecTransposeImage.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 547      |
|    ep_rew_mean     | 518      |
| time/              |          |
|    fps             | 263      |
|    iterations      | 1        |
|    time_elapsed    | 31       |
|    total_timesteps | 8192     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 548          |
|    ep_rew_mean          | 500          |
| time/                   |              |
|    fps                  | 238          |
|    iterations           | 2            |
|    time_elapsed         | 68           |
|    total_timesteps      | 16384        |
| train/                  |              |
|    approx_kl            | 0.0083929915 |
|    clip_fraction        | 0.0688       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.19        |

In [6]:
# For visualization
from gym.wrappers.monitoring import video_recorder  # Import video recording utility from gym
from IPython.display import HTML  # Import HTML display utility from IPython
from IPython import display  # Import display utility from IPython
import glob  # Import glob for file pattern matching
import base64, io, os, shutil  # Import base64 for encoding, io for file handling, os for operating system interactions, and shutil for file operations
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv  # Import vectorized video recorder and dummy vectorized environment from stable-baselines3

# Set SDL video driver to 'dummy' to avoid issues on headless servers (servers without a graphical interface)
os.environ['SDL_VIDEODRIVER'] = 'dummy'

  and should_run_async(code)


In [11]:
# Remove existing 'video' directory and create a new one to store video files
shutil.rmtree('video', ignore_errors=True)
os.makedirs("video", exist_ok=True)

# Function to display the recorded video. It searches for mp4 files in the 'video' directory, reads and encodes the video in base64 format, and displays it using an HTML video tag
def show_video():
    mp4list = glob.glob('video/*.mp4')  # Get list of mp4 files in the 'video' directory
    if len(mp4list) > 0:
        mp4 = mp4list[0]  # Get the first mp4 file from the list
        video = io.open(mp4, 'r+b').read()  # Read the video file
        encoded = base64.b64encode(video)  # Encode the video in base64
        display.display(HTML(data='''<video alt="test" autoplay loop controls style="height: 400px;">
              <source src="data:video/mp4;base64,{0}" type="video/mp4" />
            </video>'''.format(encoded.decode('ascii'))))  # Display the video in an HTML video tag
    else:
        print("Could not find video")  # Print error message if no video found


# Function to record a video of a RL model's performance in the MountainCar-v0 environment. It sets up the environment and video recorder, runs the model for a specified number of steps, and records the video
def show_video_of_model():
    """
    :param env_id: (str) environment ID
    :param model: (RL model) reinforcement learning model
    :param video_length: (int) length of the video in frames
    :param prefix: (str) prefix for the video file name
    :param video_folder: (str) folder to save the video
    """
    video_length = 500  # Set video length to 1000 frames
    eval_env = make_atari_env("ALE/MsPacman-v5", n_envs=1)  # Create Atari environment for ALE/MsPacman-v5

    # Start the video at step=0 and record 1000 steps
    eval_env = VecVideoRecorder(
        eval_env,
        video_folder="video/",  # Folder to save the video
        record_video_trigger=lambda step: step == 0,  # Trigger video recording at the first step
        video_length=video_length,  # Length of the video
        name_prefix="",  # Prefix for the video file name
    )

    obs = eval_env.reset()  # Reset the environment to start
    for _ in range(video_length):
        action, _ = model.predict(obs)  # Predict the action using the model
        obs, _, _, _ = eval_env.step(action)  # Take the action in the environment and get the new observation

    # Close the video recorder
    eval_env.close()

In [12]:
show_video_of_model()

Exception ignored in: <function VecVideoRecorder.__del__ at 0x7f9a873f8430>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/stable_baselines3/common/vec_env/vec_video_recorder.py", line 113, in __del__
    self.close_video_recorder()
  File "/usr/local/lib/python3.10/dist-packages/stable_baselines3/common/vec_env/vec_video_recorder.py", line 104, in close_video_recorder
    self.video_recorder.close()
  File "/usr/local/lib/python3.10/dist-packages/gymnasium/wrappers/monitoring/video_recorder.py", line 151, in close
    raise error.DependencyNotInstalled(
gymnasium.error.DependencyNotInstalled: moviepy is not installed, run `pip install moviepy`


Saving video to /content/video/-step-0-to-step-500.mp4


  logger.warn("Unable to save last video! Did you call close()?")


Moviepy - Building video /content/video/-step-0-to-step-500.mp4.
Moviepy - Writing video /content/video/-step-0-to-step-500.mp4





Moviepy - Done !
Moviepy - video ready /content/video/-step-0-to-step-500.mp4


In [13]:
show_video()