# Acme: Quickstart
# <div align="left">[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deepmind/acme/examples/quickstart.ipynb)</div>

This is a quick guide to installing Acme and a very simple example of running a D4PG agent.

## Installation

In the first few cells we'll start by installing all of the necessary dependencies (and a few optional ones).

In [0]:
#@title Install necessary dependencies.

!pip install dm-acme
!pip install dm-acme[reverb]
!pip install dm-acme[tf]
!pip install dm-acme[envs]

from IPython.display import clear_output
clear_output()

### Install dm_control

The next cell will install environments provided by `dm_control` _if_ you have an institutional MuJoCo license. This is not necessary, but without this you won't be able to use the `dm_cartpole` environment below and can instead follow this colab using `gym` environments. To do so simply expand the following cell, paste in your license file, and run the cell.

Alternatively, Colab supports using a Jupyter kernel on your local machine which can be accomplished by following the guidelines here: https://research.google.com/colaboratory/local-runtimes.html. This will allow you to install `dm_control` by following instructions in https://github.com/deepmind/dm_control and using a personal MuJoCo license.


In [0]:
#@title Add your License
#@test {"skip": true}
mjkey = """
""".strip()

mujoco_dir = "$HOME/.mujoco"

# Install OpenGL dependencies
!apt-get update && apt-get install -y --no-install-recommends \
  libgl1-mesa-glx libosmesa6 libglew2.0

# Get MuJoCo binaries
!wget -q https://www.roboti.us/download/mujoco200_linux.zip -O mujoco.zip
!unzip -o -q mujoco.zip -d "$mujoco_dir"

# Copy over MuJoCo license
!echo "$mjkey" > "$mujoco_dir/mjkey.txt"

# Install dm_control
!pip install dm_control

# Configure dm_control to use the OSMesa rendering backend
%env MUJOCO_GL=osmesa

# Check that the installation succeeded
try:
  from dm_control import suite
  env = suite.load('cartpole', 'swingup')
  pixels = env.physics.render()
except Exception as e:
  raise e from RuntimeError(
      'Something went wrong during installation. Check the shell output above '
      'for more information.')
else:
  from IPython.display import clear_output
  clear_output()
  del suite, env, pixels

## Import Modules

Now we can import all the relevant modules.

In [0]:
#python3

%%capture
import copy
import IPython


from acme import environment_loop
from acme import networks
from acme.adders import reverb as adders
from acme.agents import actors_tf2 as actors
from acme.datasets import reverb as datasets
from acme.wrappers import gym_wrapper
from acme import specs
from acme import wrappers
from acme.agents import d4pg
from acme.agents import agent
from acme.utils import tf2_utils
from acme.utils import loggers

import gym
import dm_env
import matplotlib.pyplot as plt
import numpy as np
import reverb
import sonnet as snt
import tensorflow as tf

# Import dm_control if it exists.
try:
  from dm_control import suite
except OSError, ModuleNotFoundError:
  pass


## Load an environment

We can now load an environment. In what follows we'll create an environment and grab the environment's specifications.

In [0]:
environment_name = 'gym_mountaincar'  # @param ['dm_cartpole', 'gym_mountaincar']

if 'dm_cartpole' in environment_name:
  environment = suite.load('cartpole', 'balance')
  environment = wrappers.SinglePrecisionWrapper(environment)
  def render(env):
    return env._physics.render(camera_id=0)  #pylint: disable=protected-access

elif 'gym_mountaincar' in environment_name:
  environment = gym_wrapper.GymWrapper(gym.make('MountainCarContinuous-v0'))
  environment = wrappers.SinglePrecisionWrapper(environment)
  def render(env):
    return env.environment.render(mode='rgb_array')
else:
  raise ValueError('Unknown environment: {}.'.format(environment_name))

# Grab the spec of the environment.
environment_spec = specs.make_environment_spec(environment)


 ## Create a D4PG agent

In [0]:
#@title Build agent networks

# Get total number of action dimensions from action spec.
num_dimensions = np.prod(environment_spec.actions.shape, dtype=int)

# Create the shared observation network; here simply a state-less operation.
observation_network = tf2_utils.batch_concat

# Create the deterministic policy network.
policy_network = snt.Sequential([
    networks.LayerNormMLP((256, 256, 256), activate_final=True),
    networks.NearZeroInitializedLinear(num_dimensions),
    networks.TanhToSpec(environment_spec.actions),
])

# Create the distributional critic network.
critic_network = snt.Sequential([
    # The multiplexer concatenates the observations/actions.
    networks.CriticMultiplexer(),
    networks.LayerNormMLP((512, 512, 256), activate_final=True),
    networks.DiscreteValuedHead(vmin=-150., vmax=150., num_atoms=51),
])


In [0]:
# Create a logger for agent specific diagnostics.
agent_logger = loggers.TerminalLogger(label='agent', time_delta=10)

# Create the D4PG agent.
agent = d4pg.D4PG(
    environment_spec=environment_spec,
    policy_network=policy_network,
    critic_network=critic_network,
    observation_network=observation_network,
    logger=agent_logger,
    checkpoint=False
)

## Run a training loop

In [0]:
# Create a logger for agent specific diagnostics.
env_loop_logger = loggers.TerminalLogger(label='env_loop', time_delta=10)

env_loop = environment_loop.EnvironmentLoop(environment, agent, logger=env_loop_logger)
env_loop.run(num_episodes=5)

## (Optional) Visualize an evaluation loop


In [0]:
# Install and import the necessary dependencies for visualization

!sudo apt-get install -y xvfb ffmpeg
!pip install 'gym==0.10.11'
!pip install imageio
!pip install PILLOW
!pip install 'pyglet==1.3.2'
!pip install pyvirtualdisplay

import pyvirtualdisplay
import imageio
import base64

# Set up a virtual display for rendering OpenAI gym environments.
display = pyvirtualdisplay.Display(visible=0, size=(1400, 900)).start()

clear_output()

In [0]:
def display_video(frames, filename='temp.mp4'):
  """Save and display video."""
  # Write video
  with imageio.get_writer(filename, fps=60) as video:
    for frame in frames:
      video.append_data(frame)
  # Read video and display the video
  video = open(filename, 'rb').read()
  b64_video = base64.b64encode(video)
  video_tag = ('<video  width="320" height="240" controls alt="test" '
               'src="data:video/mp4;base64,{0}">').format(b64_video.decode())
  return IPython.display.HTML(video_tag)

In [0]:
# Run the actor in the environment for desired number of steps.
frames = []
num_steps = 100
timestep = environment.reset()

for _ in range(num_steps):
  frames.append(render(environment))
  action = agent.select_action(timestep.observation)
  timestep = environment.step(action)

# Save video of the behaviour.
display_video(np.array(frames))