Copyright 2018 The Dopamine Authors.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

# Dopamine: How to create and train a custom agent

This colab demonstrates how to create a variant of a provided agent (Example 1) and how to create a new agent from
scratch (Example 2).

Run all the cells below in order.

When running the following cell ("Install necessary packages") you will see a notice indicating that you need to restart your runtime. Hit restart, and then continue running the cells below it (from "Necessary imports and globals") onward.

In [None]:
# !sudo apt update
# !sudo apt install -y python3-dev python3-pip build-essential libasound2-dev \
#                    libsdl2-dev libsdl2-image-dev libsdl2-mixer-dev \
#                    libsdl2-ttf-dev libsmpeg-dev libportmidi-dev libfreetype6-dev
!sudo apt update
!sudo apt install -y python3-dev python3-pip build-essential cmake \
                    libopenmpi-dev libglew-dev libgl1-mesa-dev libosmesa6-dev \
                    patchelf ffmpeg xvfb \
                    libsdl2-dev libsdl2-image-dev libsdl2-mixer-dev \
                    libsdl2-ttf-dev libsmpeg-dev libportmidi-dev libfreetype6-dev
!sudo apt-get install -y build-essential python3-dev python3-pip python3-venv
!sudo apt-get install -y libsdl2-dev libsdl2-image-dev libsdl2-mixer-dev libsdl2-ttf-dev


In [None]:
# @title Install necessary packages.
!pip install -U dopamine-rl
!pip install -U gymnasium
!pip install -U ale_py

Collecting baselines
  Using cached baselines-0.1.5-py3-none-any.whl
Collecting joblib (from baselines)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting zmq (from baselines)
  Using cached zmq-0.0.0-py3-none-any.whl
Collecting dill (from baselines)
  Using cached dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Collecting progressbar2 (from baselines)
  Using cached progressbar2-4.5.0-py3-none-any.whl.metadata (16 kB)
Collecting mpi4py (from baselines)
  Using cached mpi4py-4.0.3.tar.gz (466 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting click (from baselines)
  Using cached click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
INFO: pip is looking at multiple versions of gym[atari,classic-control,mujoco,robotics] to determine which version is compatible with other requirements. This cou

['/home/zosov/anaconda3/envs/rl-project-dopamine/lib/python3.11/site-packages/mujoco']


In [None]:
!ls /home/zosov/anaconda3/envs/rl-project-dopamine/lib/m

cmake		    libhistory.a	libpanelw.so	      libtk8.6.so
engines-3	    libhistory.so	libpanelw.so.6	      libtkstub8.6.a
itcl4.2.4	    libhistory.so.8	libpanelw.so.6.4      libtsan.so
libasan.so	    libhistory.so.8.2	libpython3.11.so      libtsan.so.0
libasan.so.6	    libitm.so		libpython3.11.so.1.0  libtsan.so.0.0.0
libasan.so.6.0.0    libitm.so.1		libpython3.so	      libubsan.so
libatomic.so	    libitm.so.1.0.0	libquadmath.so	      libubsan.so.1
libatomic.so.1	    liblsan.so		libquadmath.so.0      libubsan.so.1.0.0
libatomic.so.1.2.0  liblsan.so.0	libquadmath.so.0.0.0  libuuid.a
libbz2.a	    liblsan.so.0.0.0	libreadline.a	      libuuid.so
libbz2.so	    liblzma.so		libreadline.so	      libuuid.so.1
libbz2.so.1.0	    liblzma.so.5	libreadline.so.8      libuuid.so.1.3.0
libbz2.so.1.0.8     liblzma.so.5.6.4	libreadline.so.8.2    libz.a
libcrypto.a	    libmenu.a		libsodium.a	      libz.so
libcrypto.so	    libmenu.so		libsodium.so	      libz.so.1
libcrypto.so.3	    libmenu.so.6	libsodium.so.2

In [1]:
# @title Necessary imports and globals.

import numpy as np
import os
from dopamine.jax.agents.dqn import dqn_agent
from dopamine.discrete_domains import run_experiment
from dopamine.colab import utils as colab_utils
from absl import flags
import gin

BASE_PATH = './logs'  # @param
GAME = 'Asterix'  # @param

2025-03-10 18:28:22.871308: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-10 18:28:23.723975: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741631304.116822     951 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741631304.206269     951 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-10 18:28:25.110673: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

## Example 1: Train a modified version of DQN
Asterix is one of the standard agents provided with Dopamine.
The purpose of this example is to demonstrate how one can modify an existing agent. The modification
we are doing here (choosing actions randomly) is for illustrative purposes: it will clearly perform very
poorly.

In [None]:
# @title Create an agent based on DQN, but choosing actions randomly.

LOG_PATH = os.path.join(BASE_PATH, 'random_dqn', GAME)

class MyRandomDQNAgent(dqn_agent.DQNAgent):
  def __init__(self, num_actions):
    """This maintains all the DQN default argument values."""
    super().__init__(num_actions)

  def step(self, reward, observation):
    """Calls the step function of the parent class, but returns a random action.
    """
    super().step(reward, observation)
    return np.random.randint(self.num_actions)

def create_random_dqn_agent(unused_sess, environment, summary_writer=None):
  """The Runner class will expect a function of this type to create an agent."""
  return MyRandomDQNAgent(num_actions=environment.action_space.n)

random_dqn_config = """
import dopamine.discrete_domains.atari_lib
import dopamine.discrete_domains.run_experiment
atari_lib.create_atari_environment.game_name = '{}'
atari_lib.create_atari_environment.sticky_actions = True
run_experiment.Runner.num_iterations = 200
run_experiment.Runner.training_steps = 10
run_experiment.Runner.max_steps_per_episode = 100
ReplayBuffer.max_capacity = 1_000
ReplayBuffer.batch_size = 32
""".format(GAME)
gin.parse_config(random_dqn_config, skip_unknown=False)

# Create the runner class with this agent. We use very small numbers of steps
# to terminate quickly, as this is mostly meant for demonstrating how one can
# use the framework.
random_dqn_runner = run_experiment.TrainRunner(LOG_PATH, create_random_dqn_agent)

In [None]:
# @title Train MyRandomDQNAgent.
print('Will train agent, please be patient, may be a while...')
random_dqn_runner.run_experiment()
print('Done training!')

In [None]:
# @title Load the training logs.
random_dqn_data = colab_utils.read_experiment(
    LOG_PATH, verbose=True, summary_keys=['train_episode_returns'])
random_dqn_data['agent'] = 'MyRandomDQN'
random_dqn_data['run_number'] = 1
experimental_data = {GAME:  random_dqn_data}


In [None]:
# @title Plot training results.

import seaborn as sns
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(16,8))
sns.lineplot(
    x='iteration', y='train_episode_returns', hue='agent',
    data=experimental_data[GAME], ax=ax)
plt.title(GAME)
plt.show()

## Example 2: Train an agent built from scratch.
The purpose of this example is to demonstrate how one can create an agent from scratch. The agent
created here is meant to demonstrate the bare minimum functionality that is expected from agents. It is
selecting actions in a very suboptimal way, so it will clearly do poorly.

In [None]:
# @title Create a completely new agent from scratch.

LOG_PATH = os.path.join(BASE_PATH, 'sticky_agent', GAME)

class StickyAgent(object):
  """This agent randomly selects an action and sticks to it. It will change
  actions with probability switch_prob."""
  def __init__(self, sess, num_actions, switch_prob=0.1):
    self._sess = sess
    self.summary_writer = None
    self._num_actions = num_actions
    self._switch_prob = switch_prob
    self._last_action = np.random.randint(num_actions)
    self.eval_mode = False

  def _choose_action(self):
    if np.random.random() <= self._switch_prob:
      self._last_action = np.random.randint(self._num_actions)
    return self._last_action

  def bundle_and_checkpoint(self, checkpoint_dir, iteration):
    del checkpoint_dir, iteration  # Unused.

  def unbundle(self, checkpoint_dir, checkpoint_version, data):
    del checkpoint_dir, checkpoint_version, data  # Unused.

  def begin_episode(self, observation):
    del observation  # Unused.
    return self._choose_action()

  def end_episode(self, reward):
    del reward  # Unused.

  def step(self, reward, observation):
    return self._choose_action()

def create_sticky_agent(sess, environment, summary_writer=None):
  """The Runner class will expect a function of this type to create an agent."""
  return StickyAgent(sess, num_actions=environment.action_space.n,
                     switch_prob=0.2)

sticky_config = """
import dopamine.discrete_domains.atari_lib
import dopamine.discrete_domains.run_experiment
atari_lib.create_atari_environment.game_name = '{}'
atari_lib.create_atari_environment.sticky_actions = True
run_experiment.Runner.num_iterations = 200
run_experiment.Runner.training_steps = 10
run_experiment.Runner.max_steps_per_episode = 100
ReplayBuffer.max_capacity = 1_000
ReplayBuffer.batch_size = 32
""".format(GAME)
gin.parse_config(sticky_config, skip_unknown=False)

# Create the runner class with this agent. We use very small numbers of steps
# to terminate quickly, as this is mostly meant for demonstrating how one can
# use the framework.
sticky_runner = run_experiment.TrainRunner(LOG_PATH, create_sticky_agent)

In [None]:
# @title Train StickyAgent.
print('Will train sticky agent, please be patient, may be a while...')
sticky_runner.run_experiment()
print('Done training!')

In [None]:
# @title Load the training logs.
sticky_data = colab_utils.read_experiment(
    LOG_PATH, verbose=True, summary_keys=['train_episode_returns'])
sticky_data['agent'] = 'StickyAgent'
sticky_data['run_number'] = 1
experimental_data[GAME] = experimental_data[GAME].merge(sticky_data,
                                                        how='outer')

In [None]:
# @title Plot training results.

import seaborn as sns
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(16,8))
sns.lineplot(
    x='iteration', y='train_episode_returns', hue='agent',
    data=experimental_data[GAME], ax=ax)
plt.title(GAME)
plt.show()