<a href="https://colab.research.google.com/github/ankitabuntolia/DRL/blob/main/02_Technical_Tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

---
Before you start exploring this notebook check if GPU support is enabled.
To enable the GPU backend for your notebook, go to **Edit** → **Notebook Settings** and set **Hardware accelerator** to **GPU**. 
For this notebook GPU support is not required but for future exercises it will be necessary.

---


# Install OpenAI Gym and dependencies to render the environments

In [None]:
# Install some dependencies
!apt update
!apt-get install -y xvfb x11-utils ffmpeg
!pip install gym pyvirtualdisplay==0.2.* PyOpenGL==3.1.* PyOpenGL-accelerate==3.1.*
# Install some environments
!pip install gym[box2d]
!pip install gym[atari]
!pip install procgen
# Bonus PyBullet (open source alternative to MuJoCo)
!pip install stable-baselines3[extra] pybullet

# Import necessary modules

In [None]:
%matplotlib inline

# file handling and misc utilities
import os, shutil
import glob, io, base64
from tqdm.notebook import tqdm as tqdm

# Environment import and set logger level to display error only
import gym
from gym import logger as gymlogger, envs
gymlogger.set_level(40) #error only

# Monitor wrapper to capture videos
from gym.wrappers import Monitor

# Pybullet environmetn registration
import pybullet_envs

# Plotting and notebook imports
from IPython.display import HTML, clear_output
from IPython import display

# start virtual display
from pyvirtualdisplay import Display
pydisplay = Display(visible=0, size=(640, 480))
pydisplay.start()

# Utility methods to capture and show videos

In [None]:
"""
Utility functions to enable video recording of gym environment and displaying it
"""
def wrap_env(env_id, env):
    """
    Wrapper for recording video of the environment.
    """
    outdir = f"./videos/{env_id}"
    if os.path.exists(outdir):
        shutil.rmtree(outdir)
    os.makedirs(outdir, exist_ok=True)
    env = Monitor(env, outdir, force=True)
    return env, outdir

def concatenate_videos(video_dir):
    """
    Merge all mp4 videos in video_dir.
    """
    outfile = os.path.join(video_dir, 'merged_video.mp4')
    cmd = "ffmpeg -i \"concat:"
    mp4list = glob.glob(os.path.join(video_dir, '*.mp4'))
    tmpfiles = []
    # build ffmpeg command and create temp files
    for f in mp4list:
        file = os.path.join(video_dir, "temp" + str(mp4list.index(f) + 1) + ".ts")
        os.system("ffmpeg -i " + f + " -c copy -bsf:v h264_mp4toannexb -f mpegts " + file)
        tmpfiles.append(file)
    for f in tmpfiles:
        cmd += f
        if tmpfiles.index(f) != len(tmpfiles)-1:
            cmd += "|"
        else:
            cmd += f"\" -c copy  -bsf:a aac_adtstoasc {outfile}"
    # execute ffmpeg command to combine videos
    os.system(cmd)
    # cleanup
    for f in tmpfiles + mp4list:
        if f != outfile:
            os.remove(f)
    # --
    return outfile

def show_video(video_dir):
    """
    Show video in the output of a code cell.
    """
    # merge all videos
    mp4 = concatenate_videos(video_dir)    
    if mp4:
        video = io.open(mp4, 'r+b').read()
        encoded = base64.b64encode(video)
        display.display(HTML(data='''<video alt="test" autoplay 
                    loop controls style="height: 400px;">
                    <source src="data:video/mp4;base64,{0}" type="video/mp4" />
                </video>'''.format(encoded.decode('ascii'))))
    else: 
        print("Could not find video")

# (Optional) Setup Google Drive mount to store your results

In [None]:
mount_google_drive = True
if mount_google_drive:
    from google.colab import drive
    drive.mount('/content/drive')
    os.listdir('/content/drive/My Drive/')

# List all registered environments

In [None]:
envids = [spec.id for spec in envs.registry.all()]
for envid in sorted(envids):
    print(envid)

## Create an agent

Since we are interested in environments we use a very simple agent that chooses a random action at every state.

In [None]:
class RandomAgent(object):
    """The world's simplest agent!"""
    def __init__(self, action_space):
        self.action_space = action_space

    def act(self, observation):
        return self.action_space.sample()

# Interacting with the environment

In [None]:
def run_environment(env_id, n_episodes=10):    
    # Make the environment by calling gym.make with the name of our
    # requested environment. You can directly instantiate an environment
    # as well, e.g. if you made your own.
    if "procgen" in env_id:
        env = gym.make(env_id, render_mode="rgb_array")
        env.metadata["render.modes"] = ["human", "rgb_array"]
    elif "pybullet" in env_id:
        env = gym.make(env_id)
    else:
        env = gym.make(env_id)
    
    # Print state and action space
    print(f"State Space:  {env.observation_space}")
    print(f"Action Space: {env.action_space}")

    # we wrap our environment to record the video
    # often, wrappers are used to implement frame stacking or skipping,
    # sticky actions or parallelization
    env, video_dir = wrap_env(env_id, env)

    # now we seed the environment to get (somewhat) reproducible results
    if not "procgen" in env_id:
        env.seed(42)

    # create our agent
    agent = RandomAgent(env.action_space)

    # setup some variables
    cum_reward = 0
    
    # reset the environment, beginning a new episode
    observation = env.reset()

    # let the agent interact with the environment
    for i in tqdm(range(n_episodes)):
        done = False

        while not done:
            # get the next action from our agent
            action = agent.act(observation)

            # perform that action in the environment
            observation, reward, done, _ = env.step(action)
            cum_reward += reward
            
            # check if the episode is finished
            if done:
                observation = env.reset()

    # close the environment
    env.close()

    # print the cumulative reward
    print(f"Avg. Reward: {cum_reward / n_episodes}")

    # now lets see what the agent did
    show_video(video_dir)

## Control

In [None]:
run_environment("CartPole-v1")

In [None]:
run_environment("MountainCar-v0")

## Box-2D

In [None]:
run_environment("LunarLanderContinuous-v2")

## Atari

In [None]:
run_environment("Breakout-v4")

In [None]:
run_environment("Boxing-v4")

## ProcGen

In [None]:
run_environment("procgen:procgen-coinrun-v0", n_episodes=2)

In [None]:
run_environment("procgen:procgen-chaser-v0")

In [None]:
run_environment("procgen:procgen-bossfight-v0")

## PyBullet

In [None]:
run_environment("HalfCheetahBulletEnv-v0", n_episodes=1)