### **Setting up the environment**

* __Downgrading tensorflow for `stable-baselines` support.__
* __Installing `highway-env` & `stable-baselines`.__
* __Setting up virtual display for google colab.__
* __Importing plotting and progress measurement packages.__



In [None]:
# Downgrading tensorflow version for compatability with stable-baselines package.
# stable-baselines hasn't been made compatible with Tensorflow-2.0.
# Hence, it can't be used directly with Tensorflow version installed in Colab. 
!pip install tensorflow==1.15.3
# After this restart runtime and print the version output of tensorflow.

Collecting tensorflow==1.15.3
[?25l  Downloading https://files.pythonhosted.org/packages/02/36/9a02e27f0ec248b676a380ffe910c1858e3af3027c0d4d513dd0b56a5613/tensorflow-1.15.3-cp36-cp36m-manylinux2010_x86_64.whl (110.5MB)
[K     |████████████████████████████████| 110.5MB 36kB/s 
Collecting tensorboard<1.16.0,>=1.15.0
[?25l  Downloading https://files.pythonhosted.org/packages/1e/e9/d3d747a97f7188f48aa5eda486907f3b345cd409f0a0850468ba867db246/tensorboard-1.15.0-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 54.8MB/s 
Collecting tensorflow-estimator==1.15.1
[?25l  Downloading https://files.pythonhosted.org/packages/de/62/2ee9cd74c9fa2fa450877847ba560b260f5d0fb70ee0595203082dafcc9d/tensorflow_estimator-1.15.1-py2.py3-none-any.whl (503kB)
[K     |████████████████████████████████| 512kB 54.2MB/s 
Collecting gast==0.2.2
  Downloading https://files.pythonhosted.org/packages/4e/35/11749bf99b2d4e3cceb4d55ca22590b0d7c2c62b9de38ac4a4a7f4687421/gast-0.2.2.tar.gz
Collec

In [None]:
# Confirming that the model version has downgraded from Tensorflow-2.0
import tensorflow as tf
print(tf.__version__)
# Output: 1.15.3

1.15.3


In [None]:
# Package download statements.
# Note 1: This time we also use ffmpeg package for handling video recording related tasks.
# Note 2: The package version are generic and doesn't require any specific package version downloads.
!pip install git+https://github.com/eleurent/highway-env
!apt-get update
!pip install pyvirtualdisplay
!apt-get install -y xvfb python-opengl ffmpeg
# importing gym and high_env for loading different environment scenarios.
import gym
import highway_env

# Agent related import statements.
!pip install stable-baselines==2.10.0
# HER: stands for Hindsight Experience Replay, in stable-baselines it is a 
# wrapper package for algorithms like TD3, SAC, DDPG etc.

# Note: HER works only on goal environment and works only gym env inherits from gym.GoalEnv.

# Agent related pip install command.
!pip install stable-baselines[mpi] --upgrade
# Restart runtime one more time 'ONLY IF' DDPG import error exists
# for completing mpi4py package se

# SAC: stands for Soft Actor Critic, This algorithm optimizes stochastic policy
# with off-policy approach. The policy is designed to maximize trade-off between
# expected rewards and entropy i.e. randomness by this algorithm.
from stable_baselines import HER, DQN, SAC, DDPG, TD3

# tqdm: gives progress bars to loops.
from tqdm.notebook import trange

Collecting git+https://github.com/eleurent/highway-env
  Cloning https://github.com/eleurent/highway-env to /tmp/pip-req-build-x0861r8d
  Running command git clone -q https://github.com/eleurent/highway-env /tmp/pip-req-build-x0861r8d
Collecting pygame
[?25l  Downloading https://files.pythonhosted.org/packages/01/da/4ff439558641a26dd29b04c25947e6c0ace041f56b2aa2ef1134edab06b8/pygame-2.0.1-cp36-cp36m-manylinux1_x86_64.whl (11.8MB)
[K     |████████████████████████████████| 11.8MB 270kB/s 
Building wheels for collected packages: highway-env
  Building wheel for highway-env (setup.py) ... [?25l[?25hdone
  Created wheel for highway-env: filename=highway_env-1.0.dev0-cp36-none-any.whl size=80900 sha256=07a36e9548a41006e4394891f1df62cc93597a427097c5b8c7fdb6fce4fdd6b5
  Stored in directory: /tmp/pip-ephem-wheel-cache-bf6dubfj/wheels/e6/10/d8/02a077ca221bbac1c6fc12c1370c2f773a8cd602d4be3df0cc
Successfully built highway-env
Installing collected packages: pygame, highway-env
Successfully inst

In [None]:
# Similar, to earlier rendering procedures for creating virtual display.
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display
from gym.wrappers import Monitor
from pathlib import Path
import base64

# Declaring display screen size for rendering highway-env environment.
display = Display(visible=0, size=(1366, 768))
display.start()

# These functions are also available in '/highway-env/scripts/' directory in utils.py
# file of highway-env github repository. These, can be directly accessed with below
# commands but we are using these functions here to fix a specific size of recorded videos.
# Note: commands are stated below for directly using these functions.
# Also, we have changed these functions slightly. Therefore, refer documentation.

# !git clone https://github.com/eleurent/highway-env.git
# import sys
# sys.path.insert(0, './highway-env/scripts/')
# from utils import record_videos, show_videos, capture_intermediate_frames

def wrap_env(env):
    '''
    Monitoring the environment interactions by agent and recording them in video.
    '''
    return Monitor(env, './video', force=True, video_callable=lambda episode: True)


def show_video():
    '''
    Reading the stored video and display the output inline with code cells.
    '''
    html = []
    for mp4 in Path('./video').glob("*.mp4"):
        video_b64 = base64.b64encode(mp4.read_bytes())
        html.append('''<video alt="{}" autoplay
                      loop controls style="height: 212px;">
                      <source src="data:video/mp4;base64,{}" type="video/mp4" />
                 </video>'''.format(mp4, video_b64.decode('ascii')))
    ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))


def capture_intermediate_frames(env):
    '''
    Sending rendered frames to Monitor for logging video recording of captured frame.
    '''
    env.unwrapped.automatic_rendering_callback = env.video_recorder.capture_frame

### **Model training for Soft Actor Critic (SAC) agent**

In [None]:
env = gym.make("parking-v0")
# SAC parses the model class of Off-Policy RL model.
# 'MlpPolicy' implements actor-critic with a MLP (2 layers of 64 nodes).
model = HER('MlpPolicy', env, SAC, n_sampled_goal=4,
            goal_selection_strategy='future',
            verbose=1, buffer_size=int(1e6),
            learning_rate=1e-3,
            gamma=0.9, batch_size=256,
            policy_kwargs=dict(layers=[256, 256, 256]))
# argument: total_timesteps is passed as approximately 33k.
# It gives number of timestamps to train on.
model.learn(int(32768))





Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use keras.layers.Dense instead.






Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where




------------------------------------------
| current_lr              | 0.001        |
| ent_coef                | 0.0042772014 |
| ent_coef_loss           | -0.47107145  |
| entropy                 | 1.3999009    |
| episodes                | 100          |
| fps                     | 20           |
| mean 100 episode reward | -47.3        |
| n_updates               | 9590         |
| policy_loss             | 2.6379743    |
| qf1_loss                | 0.008341698  |
| qf2_loss                | 0.008334148  |
| success rate            | 0.0505       |
| time_elapsed            | 474          |
| total timesteps         | 9689         |
| value_loss              | 0.009654267  |
-------------

<stable_baselines.sac.sac.SAC at 0x7f245a556f98>

In [None]:
# If you interested in saving the trained model.
# Use 'save' function to save the model & 'load'
# for loading model into memory.
# model.save("./her_model_parking")
# We can also delete the existing model instance if we want to saved instance.
# del model
# model = HER.load('./her_model_parking', env=env)

### **Displaying output for the trained SAC agent**

In [None]:
env = wrap_env(gym.make("parking-v0"))

for episode in trange(5, desc="Output Episodes"):
    # capture_intermediate_frames is inside the loop
    # With this we can capture multiple iterations of goal completion
    # into our Monitor instance.
    obs, done = env.reset(), False
    capture_intermediate_frames

    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)

env.close()


HBox(children=(FloatProgress(value=0.0, description='Output Episodes', max=5.0, style=ProgressStyle(descriptio…




In [None]:
# Outputting all the goal completion videos.
show_video()

### **Downloading the created agent videos**

In [None]:
# zipping the video folder for the given SARSA agent.
!zip -r /content/file.zip /content/video
# downloading the file resource.
from google.colab import files
files.download("/content/file.zip")

  adding: content/video/ (stored 0%)
  adding: content/video/openaigym.video.1.101.video000004.mp4 (deflated 13%)
  adding: content/video/openaigym.video.1.101.video000002.meta.json (deflated 60%)
  adding: content/video/openaigym.video.1.101.video000001.meta.json (deflated 60%)
  adding: content/video/openaigym.episode_batch.1.101.stats.json (deflated 43%)
  adding: content/video/openaigym.video.1.101.video000003.meta.json (deflated 60%)
  adding: content/video/openaigym.video.1.101.video000002.mp4 (deflated 12%)
  adding: content/video/openaigym.video.1.101.video000004.meta.json (deflated 60%)
  adding: content/video/openaigym.video.1.101.video000000.mp4 (deflated 6%)
  adding: content/video/openaigym.video.1.101.video000001.mp4 (deflated 7%)
  adding: content/video/openaigym.video.1.101.video000003.mp4 (deflated 16%)
  adding: content/video/openaigym.video.1.101.video000000.meta.json (deflated 60%)
  adding: content/video/openaigym.manifest.1.101.manifest.json (deflated 72%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>