### **Setting up the environment**

* __Downgrading tensorflow for `stable-baselines` support.__
* __Installing `highway-env` & `stable-baselines`.__
* __Setting up virtual display for google colab.__
* __Importing plotting and progress measurement packages.__



In [None]:
# Package download statements.
# Note 1: This time we also use ffmpeg package for handling video recording related tasks.
# Note 2: The package version are generic and doesn't require any specific package version downloads.
!apt-get update >& /dev/null
!pip install pyvirtualdisplay >& /dev/null
!apt-get install -y xvfb python-opengl ffmpeg >& /dev/null
!pip install highway_env
# importing gym and high_env for loading different environment scenarios.
import gym
import highway_env

# Agent related import statements.
!pip install stable-baselines3
# HER: stands for Hindsight Experience Replay, in stable-baselines it is a 
# wrapper package for algorithms like TD3, SAC, DDPG etc.

# Note: HER works only on goal environment and works only gym env inherits from gym.GoalEnv.

# SAC: stands for Soft Actor Critic, This algorithm optimizes stochastic policy
# with off-policy approach. The policy is designed to maximize trade-off between
# expected rewards and entropy i.e. randomness by this algorithm.
from stable_baselines3 import HER, DQN, SAC, DDPG, TD3

# tqdm: gives progress bars to loops.
from tqdm.notebook import trange

In [None]:
# Similar, to earlier rendering procedures for creating virtual display.
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display
from gym.wrappers import Monitor
from pathlib import Path
import base64

# Declaing display screen size for rendering highway-env environment.
display = Display(visible=0, size=(1366, 768))
display.start()

# These functions are also available in '/highway-env/scripts/' directory in utils.py
# file of highway-env github repository. These, can be directly accessed with below
# commands but we are using these functions here to fix a specific size of recorded videos.
# Note: commands are stated below for directly using these functions.
# Also, we have changed these functions slightly. Therefore, refer documentation.

# !git clone https://github.com/eleurent/highway-env.git
# import sys
# sys.path.insert(0, './highway-env/scripts/')
# from utils import record_videos, show_videos, capture_intermediate_frames

def wrap_env(env):
    '''
    Monitoring the environment interactions by agent and recording them in video.
    '''
    return Monitor(env, './video', force=True, video_callable=lambda episode: True)


def show_video():
    '''
    Reading the stored video and display the output inline with code cells.
    '''
    html = []
    for mp4 in Path('./video').glob("*.mp4"):
        video_b64 = base64.b64encode(mp4.read_bytes())
        html.append('''<video alt="{}" autoplay
                      loop controls style="height: 212px;">
                      <source src="data:video/mp4;base64,{}" type="video/mp4" />
                 </video>'''.format(mp4, video_b64.decode('ascii')))
    ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))


def capture_intermediate_frames(env):
    '''
    Sending rendered frames to Monitor for logging video recording of captured frame.
    '''
    env.unwrapped.automatic_rendering_callback = env.video_recorder.capture_frame

### **Model training for Soft Actor Critic (SAC) agent**

In [None]:
env = gym.make("parking-v0")

# Stable-baselines3 needs max episode length to be set. 

# SAC parses the model class of Off-Policy RL model.
# 'MlpPolicy' implements actor-critic with a MLP (2 layers of 64 nodes).
model = HER('MlpPolicy', env, SAC, n_sampled_goal=4,
            goal_selection_strategy='future', max_episode_length=100,
            verbose=1, buffer_size=int(1e6),
            learning_rate=1e-3,
            gamma=0.95, batch_size=256, online_sampling=True,
            policy_kwargs=dict(net_arch=[256, 256, 256]))
# argument: total_timesteps is passed as approximately 33k.
# It gives number of timestamps to train on.
model.learn(int(32768))

In [None]:
# If you interested in saving the trained model.
# Use 'save' function to save the model & 'load'
# for loading model into memory.
# model.save("./her_model_parking")
# We can also delete the existing model instance if we want to saved instance.
# del model
# model = HER.load('./her_model_parking', env=env)

### **Displaying output for the trained SAC agent**

In [None]:
env = wrap_env(gym.make("parking-v0"))

for episode in trange(5, desc="Output Episodes"):
    # capture_intermediate_frames is inside the loop
    # With this we can capture multiple iterations of goal completion
    # into our Monitor instance.
    obs, done = env.reset(), False
    capture_intermediate_frames

    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)

env.close()


In [None]:
# Outputting all the goal completion videos.
show_video()

### **Downloading the created agent videos**

In [None]:
# zipping the video folder for the given SARSA agent.
!zip -r /content/file.zip /content/video
# downloading the file resource.
from google.colab import files
files.download("/content/file.zip")