# 1. Setup


In [1]:
from google.colab import drive, files
import os

drive.mount('/content/gdrive')
path = "gdrive/My Drive/MSEC/AIPI590/"
os.chdir(path)

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
%cd stable-baselines3 
!pip install -e .[docs,tests,extra] 


In [3]:
from stable_baselines3 import A2C
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack, VecVideoRecorder, DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import gym


# 2. Train the model

We display a Tensorboard to monitor training, and we save a video.

In [4]:
# There already exists an environment generator that will make and wrap atari environments correctly.
#env_id = 'Skiing-v0'
env_id = 'ALE/Breakout-v5'
env = make_atari_env(env_id, n_envs=16,seed=0)
env = VecFrameStack(env, n_stack=4)



In [5]:
#model = A2C('CnnPolicy', env, verbose=0,tensorboard_log="./a2c_skiing_tensorboard/")
model_gae_vanilla = A2C('CnnPolicy', env, verbose=0,
                        gae_lambda=0)
model_gae = A2C('CnnPolicy', env, verbose=0)
model_nstep = A2C('CnnPolicy', env, verbose=0
                  ,n_step_ad = True) #ensure co



In [None]:
model_gae_vanilla.learn(total_timesteps=3e6)

In [None]:
model_gae.learn(total_timesteps=3e6)

In [None]:
model_nstep.learn(total_timesteps=3e6)

# 3. Saving the model

Finally, we demonstrate and saving/loading the policy/model.

In [None]:
#
model_gae_vanilla.save("a2c_breakout_gae_vanilla")
model_gae.save("a2c_breakout_gae")
model_nstep.save("a2c_breakout_nstep")

# 4. Evaluate training

In this section we compare then models and include TensorBoards.

In [None]:
eval_env = make_atari_env(env_id, n_envs=4,seed=0)
eval_env = VecFrameStack(eval_env, n_stack=16)

# gae vanilla
mean_reward, std_reward = evaluate_policy(model_gae_vanilla, eval_env, n_eval_episodes=100)
print(f"mean_reward gae vanilla:{mean_reward:.2f} +/- {std_reward:.2f}")

# gae
mean_reward, std_reward = evaluate_policy(model_gae, eval_env, n_eval_episodes=100)
print(f"mean_reward gae:{mean_reward:.2f} +/- {std_reward:.2f}")

# n-step
mean_reward, std_reward = evaluate_policy(model_nstep, eval_env, n_eval_episodes=100)
print(f"mean_reward nstep:{mean_reward:.2f} +/- {std_reward:.2f}")

In [None]:
#trained_model_gae = A2C.load("a2c_breakout_gae", verbose=1)
#trained_model_nstep = A2C.load("a2c_breakout_nstep", verbose=1)

Our n-step (=4) modification does not look successful. Next we show a Tensorboard.

In [15]:
%load_ext tensorboard

In [17]:
%tensorboard --logdir=./a2c_breakout_nstep_tensorboard/

ERROR: Timed out waiting for TensorBoard to start. It may still be running as pid 682.

# 5. Final evaluation

In this section we display the final evaluation video.




In [10]:
# Set up fake display; otherwise rendering will fail
os.system("Xvfb :1 -screen 0 1024x768x24 &")
os.environ['DISPLAY'] = ':1'

In [11]:
import base64
from pathlib import Path

from IPython import display as ipythondisplay

def show_videos(video_path='', prefix=''):
  """
  Taken from https://github.com/eleurent/highway-env

  :param video_path: (str) Path to the folder containing videos
  :param prefix: (str) Filter the video, showing only the only starting with this prefix
  """
  html = []
  for mp4 in Path(video_path).glob("{}*.mp4".format(prefix)):
      video_b64 = base64.b64encode(mp4.read_bytes())
      html.append('''<video alt="{}" autoplay 
                    loop controls style="height: 400px;">
                    <source src="data:video/mp4;base64,{}" type="video/mp4" />
                </video>'''.format(mp4, video_b64.decode('ascii')))
  ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))

In [12]:
def record_video(env_id, model, video_length=500, prefix='', video_folder='videos/'):
  """
  :param env_id: (str)
  :param model: (RL model)
  :param video_length: (int)
  :param prefix: (str)
  :param video_folder: (str)
  """
  eval_env = make_atari_env(env_id, n_envs=4,seed=0)
  eval_env = VecFrameStack(eval_env, n_stack=4)

  #eval_env = DummyVecEnv([lambda: gym.make(env_id)])
  # Start the video at step=0 and record 500 steps
  eval_env = VecVideoRecorder(eval_env, video_folder=video_folder,
                              record_video_trigger=lambda step: step == 0, video_length=video_length,
                              name_prefix=prefix)

  obs = eval_env.reset()
  for _ in range(video_length):
    action, _ = model.predict(obs)
    obs, _, _, _ = eval_env.step(action)

  # Close the video recorder
  eval_env.close()

In [None]:
record_video(env_id=env_id, model=model_gae_vanilla, video_length=2000, prefix='a2c-breakout_gae_vanilla')
record_video(env_id=env_id, model=model_gae, video_length=2000, prefix='a2c-breakout_gae')
record_video(env_id=env_id, model=model_nstep, video_length=2000, prefix='a2c-breakout_nstep')


Saving video to /content/gdrive/My Drive/MSEC/AIPI590/stable-baselines3/videos/a2c-breakout_gae-step-0-to-step-2000.mp4
Saving video to /content/gdrive/My Drive/MSEC/AIPI590/stable-baselines3/videos/a2c-breakout_nstep-step-0-to-step-2000.mp4


In [13]:
show_videos('videos', prefix='a2c-breakout_gae_vanilla')
show_videos('videos', prefix='a2c-breakout_gae')
show_videos('videos', prefix='a2c-breakout_nstep')