# Advanced Reinforcement Learning - AI in Python

In [None]:
!apt-get install -y \
    libgl1-mesa-dev \
    libgl1-mesa-glx \
    libglew-dev \
    xvfb \
    libosmesa6-dev \
    software-properties-common \
    patchelf

!pip install \
    free-mujoco-py \
    gym==0.21 \
    gym[box2d] \
    pytorch-lightning==1.5.8 \
    optuna \
    pyvirtualdisplay \
    PyOpenGL \
    PyOpenGL-accelerate

In [None]:
from pyvirtualdisplay import Display
Display(visible=False, size=(1400, 900)).start()

In [None]:
import gym

from base64 import b64encode
from IPython.display import HTML
from gym.wrappers import RecordVideo, RecordEpisodeStatistics, FlattenObservation, FilterObservation

In [None]:
for env in gym.envs.registry.all():
  print(env.id)

#### Create a function to visualize our environments

In [None]:
def display_video(episode=0):
  video_file = open(f'/content/videos/rl-video-episode-{episode}.mp4', "r+b").read()
  video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
  return HTML(f"<video width=600 controls><source src='{video_url}'></video>")

def test_environment(env):
  env.reset()
  done = False
  while not done:
    _, _, done, _ = env.step(env.action_space.sample())
  # env.close()
  return display_video()


def create_environment(name):
  env = gym.make(name)
  env = RecordVideo(env, video_folder='./videos', episode_trigger=lambda x: x % 50 == 0)
  env = RecordEpisodeStatistics(env)
  return env

## Introduction to the tasks in this course

#### Lunar lander

In [None]:
env = create_environment('LunarLander-v2')
test_environment(env)

In [None]:
print(f"""
  Observation dimensions: {env.observation_space.shape}.
  Sample observation: {env.observation_space.sample()}.
  Actions: {env.action_space.n}.
""")

#### Robotic arm: Pick and place the block

In [None]:
env = create_environment('FetchPickAndPlaceDense-v1')
test_environment(env)

In [None]:
sample_observation = env.observation_space.sample()

print("Sample observation:")
for key in sample_observation:
  print(key, sample_observation[key])

print("Sample action")
print(env.action_space.sample())

#### Robotic arm: Push the block to the target

In [None]:
env = create_environment('FetchPushDense-v1')
test_environment(env)

In [None]:
sample_observation = env.observation_space.sample()

print("Sample observation:")
for key in sample_observation:
  print(key, sample_observation[key])

print("Sample action")
print(env.action_space.sample())

#### Shadow hand: Manipulate block

In [None]:
env = create_environment('HandManipulateBlockRotateXYZTouchSensorsDense-v1')
test_environment(env)

In [None]:
sample_observation = env.observation_space.sample()

print("Sample observation:")
for key in sample_observation:
  print(key, sample_observation[key])

print("Sample action")
print(env.action_space.sample())

#### Shadow hand: Manipulate egg

In [None]:
env = create_environment('HandManipulateEggFullDense-v0')
test_environment(env)

In [None]:
sample_observation = env.observation_space.sample()

print("Sample observation:")
for key in sample_observation:
  print(key, sample_observation[key])

print("Sample action")
print(env.action_space.sample())

#### Shadow hand: Manipulate pen

In [None]:
env = create_environment('HandManipulatePenRotateDense-v0')
test_environment(env)

In [None]:
sample_observation = env.observation_space.sample()

print("Sample observation:")
for key in sample_observation:
  print(key, sample_observation[key])

print("Sample action")
print(env.action_space.sample())

#### Shadow hand: Manipulate block

In [None]:
env = create_environment('HandManipulateBlockRotateXYZTouchSensors-v1')
test_environment(env)

In [None]:
sample_observation = env.observation_space.sample()

print("Sample observation:")
for key in sample_observation:
  print(key, sample_observation[key])

print("Sample action")
print(env.action_space.sample())