## Update and install packages

In [2]:
# Update and install display packages and stable baseline 3
# uncomment if running notebook for the first time
!apt-get update && apt-get install swig cmake -y
!apt-get update && apt-get install ffmpeg freeglut3-dev xvfb -y
!pip install box2d-py
!pip install moviepy
!pip install "stable-baselines3[extra]>=2.0.0a4"
!pip install tensorboard


Get:1 http://security.ubuntu.com/ubuntu focal-security InRelease [128 kB]   
Get:2 http://archive.ubuntu.com/ubuntu focal InRelease [265 kB]             
Get:3 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [30.9 kB]
Get:4 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1275 kB]
Get:5 http://archive.ubuntu.com/ubuntu focal-updates InRelease [128 kB]
Get:6 http://archive.ubuntu.com/ubuntu focal-backports InRelease [128 kB]
Get:7 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [4090 kB]
Get:8 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB] 
Get:9 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [4069 kB]
Get:10 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]
Get:11 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]
Get:12 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]
Get:13 http://archive.ubu

## Import Libraries

In [1]:
# Import rquired libraries and modules
import os
import signal
import subprocess
import gymnasium as gym
import stable_baselines3
from stable_baselines3 import DQN
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
import torch
from pathlib import Path
import base64
from IPython import display as ipythondisplay
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.logger import configure
import tensorboard


In [2]:
# Check gym and stable baseline 3 versions
print(f"{gym.__version__=}")
print(f"{stable_baselines3.__version__=}")


gym.__version__='0.29.1'
stable_baselines3.__version__='2.4.0a10'


## Settings

### Tunning parameters

In [4]:
# parameters for tunning the model
param_learning_rate = 0.0005 # too small led to overfitting
param_learning_starts = 100000
param_exploration_fraction = 0.15
param_buffer_size = 1000000
param_batch_size = 128
param_gradient_steps = 1
param_total_timesteps = 1000000
save_interval = param_total_timesteps/10


### Other settings

In [5]:
# name of the environment to run
env_name = 'CartPole-v1'
env_prefix = 'cartpole'

# output directoies
base_dir = '.'
output_dir = os.path.join(base_dir, 'output')
env_dir =  os.path.join(output_dir, env_prefix)
logs_dir = os.path.join(env_dir, 'logs')
models_dir = os.path.join(env_dir, 'models')
videos_dir = os.path.join(env_dir, 'videos')

os.makedirs(logs_dir, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)
os.makedirs(videos_dir, exist_ok=True)

# tensorboard name for algorithm logs
tb_log_name = 'DQN'
model_prefix = 'dqn'
model_name_final = f"{model_prefix}_model_final"

print(logs_dir)
print(models_dir)
print(videos_dir)

./output/cartpole/logs
./output/cartpole/models
./output/cartpole/videos


## Callbacks and Directory Setup

In [6]:
# class to manage display
class Display:
    def __init__(self, command: str):
        self.command = command
        
    def start(self):
        self.process = subprocess.Popen(self.command.split())  
        os.environ['DISPLAY'] = ':1'
        
    def terminate(self):
        self.process.terminate()
        
# callback for saving model at regular intervals
class SaveOnIntervalCallback(BaseCallback):
    def __init__(self, save_interval: int, save_path: str, verbose=1):
        super().__init__(verbose)
        self.save_interval = save_interval
        self.save_path = save_path

    def _on_step(self) -> bool:
        # Save the model every 'save_interval' steps
        if self.num_timesteps % self.save_interval == 0:
            save_file = os.path.join(self.save_path, f'{model_prefix}_model_{self.num_timesteps}')
            self.model.save(save_file)
            if self.verbose > 0:
                print(f'Saving model to {save_file}.zip')
        return True


## Custom Feature Extractor

In [7]:
# Feature extraction from frames as observations / states
class CustomANN(BaseFeaturesExtractor):
    def __init__(self, observation_space, features_dim=128):
        super(CustomANN, self).__init__(observation_space, features_dim)
        
        # Define your neural network layers
        self.net = nn.Sequential(
            nn.Linear(observation_space.shape[0], 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, features_dim)  # Output dimension should match features_dim
        )
    
    def forward(self, observations):
        return self.net(observations)
    

## Initialise model with atari environemnt

In [8]:
# Initialize the Atari environment with the specified game and configurations
env = gym.make(env_name)
# Stack 4 consecutive frames together to provide temporal information
# env = VecFrameStack(env, n_stack=param_n_stack) # can fine tunenumber of frame images to learn from. Always more than 1. 4 frames in each state

In [9]:
# Initialize the DQN agent with specified parameters
model = DQN(
    env=env,
    policy='MlpPolicy',
    verbose=1,
    learning_rate=param_learning_rate,
    buffer_size=param_buffer_size,
    learning_starts=param_learning_starts,
    gradient_steps=param_gradient_steps,
    exploration_fraction=param_exploration_fraction,
    exploration_final_eps=0.01,
    train_freq=4,
    batch_size=param_batch_size,
    policy_kwargs={'features_extractor_class': CustomANN},
    tensorboard_log=logs_dir
)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


## Model training

In [11]:
display = Display("Xvfb :1 -screen 0 1024x768x24")
display.start()

In [None]:
# Define the interval at which models are saved during training
save_callback = SaveOnIntervalCallback(save_interval, models_dir)
model.learn(total_timesteps=param_total_timesteps, progress_bar=True, callback=save_callback)

# Save the final model after training completes
final_model_path = os.path.join(models_dir, model_name_final)
model.save(final_model_path)

Logging to ./output/cartpole/logs/DQN_14


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.8     |
|    ep_rew_mean      | 20.8     |
|    exploration_rate | 1        |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1185     |
|    time_elapsed     | 0        |
|    total_timesteps  | 83       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.2     |
|    ep_rew_mean      | 29.2     |
|    exploration_rate | 1        |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1846     |
|    time_elapsed     | 0        |
|    total_timesteps  | 234      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25       |
|    ep_rew_mean      | 25       |
|    exploration_rate | 1        |
| time/               |          |
|    episodes       

The XKEYBOARD keymap compiler (xkbcomp) reports:
> Internal error:   Could not resolve keysym XF86AudioPreset
> Internal error:   Could not resolve keysym XF86MonBrightnessCycle
> Internal error:   Could not resolve keysym XF86WWAN
> Internal error:   Could not resolve keysym XF86RFKill
> Internal error:   Could not resolve keysym XF86Keyboard
> Internal error:   Could not resolve keysym XF86RotationLockToggle
> Internal error:   Could not resolve keysym XF86FullScreen
Errors from xkbcomp are not fatal to the X server


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.3     |
|    ep_rew_mean      | 23.3     |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 36       |
|    fps              | 2110     |
|    time_elapsed     | 0        |
|    total_timesteps  | 839      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.4     |
|    ep_rew_mean      | 23.4     |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 40       |
|    fps              | 2139     |
|    time_elapsed     | 0        |
|    total_timesteps  | 937      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23       |
|    ep_rew_mean      | 23       |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes       

## Results rendering

### Video recording functions

In [10]:
# Functions to record videos of the agent playing and display the videos

def show_videos(video_path="", prefix=""):
    """Displays videos from a specified directory."""
    html = []
    for mp4 in Path(video_path).glob("{}*.mp4".format(prefix)):
        video_b64 = base64.b64encode(mp4.read_bytes())
        html.append(
            '''<video alt="{0}" autoplay
                      loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{1}" type="video/mp4" />
            </video>'''.format(mp4, video_b64.decode('ascii'))
        )
    ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))


def record_video(env_id, model, video_length=3000, prefix="", video_folder=videos_dir):
    eval_env = DummyVecEnv([lambda: gym.make(env_id, render_mode='rgb_array')])
    eval_env = VecVideoRecorder(
        eval_env,
        video_folder=video_folder,
        record_video_trigger=lambda step: step == 0,
        video_length=video_length,
        name_prefix=prefix,
    )
    obs = eval_env.reset()
    for _ in range(video_length):
        action, _ = model.predict(obs)
        obs, _, _, _ = eval_env.step(action)
    eval_env.close()


In [11]:
# Display the video
import os

def get_model_identifiers(models_dir):
    files = os.listdir(models_dir)
    model_files = [f for f in files if f.startswith(f'{model_prefix}_model_')]
    identifiers = [f.split('_')[2] for f in model_files]
    return identifiers

def find_key_identifiers(identifiers):
    identifiers.sort()  # Ensure identifiers are sorted
    earliest = identifiers[0]
    final = identifiers[-1]
    middle = identifiers[len(identifiers) // 2]
    return earliest, middle, final

def view(models_dir, video_length=2000):
    print("MODEL DIR", models_dir)
    identifiers = get_model_identifiers(models_dir)
    earliest, middle, final = find_key_identifiers(identifiers)

    # Record videos at the beginning, middle, and end of training
    for stage, identifier in zip(["beginning", "middle", "end"], [earliest, middle, final]):
        model_path = os.path.join(models_dir, f'{model_prefix}_model_{identifier}')
        print("MODEL PATH", model_path)
        model = DQN.load(model_path)
        record_video(env_name, model, video_length=video_length, prefix=f'{model_prefix}-{env_prefix}-{stage}')

    # Display the videos
    for stage in ["beginning", "middle", "end"]:
        show_videos("videos", prefix=f'{model_prefix}-{env_prefix}-{stage}')



### Recording videos

In [12]:
# record video using background display
display = Display("Xvfb :1 -screen 0 1024x768x24")
display.start()

The XKEYBOARD keymap compiler (xkbcomp) reports:
> Internal error:   Could not resolve keysym XF86AudioPreset
> Internal error:   Could not resolve keysym XF86MonBrightnessCycle
> Internal error:   Could not resolve keysym XF86WWAN
> Internal error:   Could not resolve keysym XF86RFKill
> Internal error:   Could not resolve keysym XF86Keyboard
> Internal error:   Could not resolve keysym XF86RotationLockToggle
> Internal error:   Could not resolve keysym XF86FullScreen
Errors from xkbcomp are not fatal to the X server


In [13]:

view(models_dir, video_length=3000)

MODEL DIR ./output/cartpole/models
MODEL PATH ./output/cartpole/models/dqn_model_10500000.zip
Saving video to /root/RL/A2/output/cartpole/videos/dqn-cartpole-beginning-step-0-to-step-3000.mp4
Moviepy - Building video /root/RL/A2/output/cartpole/videos/dqn-cartpole-beginning-step-0-to-step-3000.mp4.
Moviepy - Writing video /root/RL/A2/output/cartpole/videos/dqn-cartpole-beginning-step-0-to-step-3000.mp4



The XKEYBOARD keymap compiler (xkbcomp) reports:                 
> Internal error:   Could not resolve keysym XF86AudioPreset
> Internal error:   Could not resolve keysym XF86MonBrightnessCycle
> Internal error:   Could not resolve keysym XF86WWAN
> Internal error:   Could not resolve keysym XF86RFKill
> Internal error:   Could not resolve keysym XF86Keyboard
> Internal error:   Could not resolve keysym XF86RotationLockToggle
> Internal error:   Could not resolve keysym XF86FullScreen
Errors from xkbcomp are not fatal to the X server


Moviepy - Done !
Moviepy - video ready /root/RL/A2/output/cartpole/videos/dqn-cartpole-beginning-step-0-to-step-3000.mp4
MODEL PATH ./output/cartpole/models/dqn_model_3000000.zip
Saving video to /root/RL/A2/output/cartpole/videos/dqn-cartpole-middle-step-0-to-step-3000.mp4
Moviepy - Building video /root/RL/A2/output/cartpole/videos/dqn-cartpole-middle-step-0-to-step-3000.mp4.
Moviepy - Writing video /root/RL/A2/output/cartpole/videos/dqn-cartpole-middle-step-0-to-step-3000.mp4



The XKEYBOARD keymap compiler (xkbcomp) reports:                 
> Internal error:   Could not resolve keysym XF86AudioPreset
> Internal error:   Could not resolve keysym XF86MonBrightnessCycle
> Internal error:   Could not resolve keysym XF86WWAN
> Internal error:   Could not resolve keysym XF86RFKill
> Internal error:   Could not resolve keysym XF86Keyboard
> Internal error:   Could not resolve keysym XF86RotationLockToggle
> Internal error:   Could not resolve keysym XF86FullScreen
Errors from xkbcomp are not fatal to the X server


Moviepy - Done !
Moviepy - video ready /root/RL/A2/output/cartpole/videos/dqn-cartpole-middle-step-0-to-step-3000.mp4
MODEL PATH ./output/cartpole/models/dqn_model_final.zip
Saving video to /root/RL/A2/output/cartpole/videos/dqn-cartpole-end-step-0-to-step-3000.mp4
Moviepy - Building video /root/RL/A2/output/cartpole/videos/dqn-cartpole-end-step-0-to-step-3000.mp4.
Moviepy - Writing video /root/RL/A2/output/cartpole/videos/dqn-cartpole-end-step-0-to-step-3000.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready /root/RL/A2/output/cartpole/videos/dqn-cartpole-end-step-0-to-step-3000.mp4


The XKEYBOARD keymap compiler (xkbcomp) reports:
> Internal error:   Could not resolve keysym XF86AudioPreset
> Internal error:   Could not resolve keysym XF86MonBrightnessCycle
> Internal error:   Could not resolve keysym XF86WWAN
> Internal error:   Could not resolve keysym XF86RFKill
> Internal error:   Could not resolve keysym XF86Keyboard
> Internal error:   Could not resolve keysym XF86RotationLockToggle
> Internal error:   Could not resolve keysym XF86FullScreen
Errors from xkbcomp are not fatal to the X server


In [None]:
display.terminate()