# DQN with CartPole-v1 Environment

In [0]:
# Installing the gym packgae
!pip -q install gym
# Installing Open MPI required to run stable-baselines
!apt install cmake libopenmpi-dev zlib1g-dev 
# Installing stable-baselines package for DQN implementation
!pip install stable-baselines

# This is an openAI package that provides several environments that can be user to train our reinforcement learning agent
import gym
# Package to plot figures/graphs
import matplotlib as plt

# Importing the DummyVecEnv to initialize a vectorized environment that acts as a wrapper
from stable_baselines.common.vec_env import DummyVecEnv

# Importing the method that will decide how the policy will be calculated (MLP or CNN)
from stable_baselines.deepq.policies import MlpPolicy, CnnPolicy

# Importing the DQN model library
from stable_baselines import DQN

# The following methods create an environment that is provided by the OpenAI gym library
env = gym.make('CartPole-v1')
env = DummyVecEnv([lambda: env])

# Model is initialized here and the method used to calculate policy is given along with the environment setup
model = DQN(MlpPolicy, env, verbose=1)

# Actual training of the DQN netwrok happens here, timesteps denote the number of steps the agent will learn for before terminating.
model.learn(total_timesteps=25000)

# Code to render the agent interacting with environment based on what it has learned
# Unable to render on colad
# obs = env.reset()
# while True:

# Predict action and next state from observations
#   action, _states = model.predict(obs)

# Get observation, reward and if the step is done by performing an action on the environment
#   obs, rewards, dones, info = env.step(action)
#   #     plt.imshow(env.render(mode='rgb_array'))
#   #     plt.show()

Reading package lists... Done
Building dependency tree       
Reading state information... Done
zlib1g-dev is already the newest version (1:1.2.11.dfsg-0ubuntu2).
zlib1g-dev set to manually installed.
The following additional packages will be installed:
  autotools-dev cmake-data file ibverbs-providers libarchive13 libfabric1
  libhwloc-dev libhwloc-plugins libhwloc5 libibverbs-dev libibverbs1
  libjsoncpp1 libltdl-dev libltdl7 liblzo2-2 libmagic-mgc libmagic1
  libnl-3-200 libnl-route-3-200 libnuma-dev libnuma1 libopenmpi2 libpciaccess0
  libpsm-infinipath1 librdmacm1 librhash0 libtool libuv1 ocl-icd-libopencl1
  openmpi-bin openmpi-common
Suggested packages:
  cmake-doc ninja-build lrzip libhwloc-contrib-plugins libtool-doc openmpi-doc
  pciutils autoconf automaken gcj-jdk
The following NEW packages will be installed:
  autotools-dev cmake cmake-data file ibverbs-providers libarchive13
  libfabric1 libhwloc-dev libhwloc-plugins libhwloc5 libibverbs-dev
  libibverbs1 libjsoncpp1 liblt

  result = entry_point.load(False)


--------------------------------------
| % time spent exploring  | 34       |
| episodes                | 100      |
| mean 100 episode reward | 16.9     |
| steps                   | 1668     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 200      |
| mean 100 episode reward | 30.2     |
| steps                   | 4692     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 300      |
| mean 100 episode reward | 131      |
| steps                   | 17820    |
--------------------------------------


<stable_baselines.deepq.dqn.DQN at 0x7f3bc27d6c50>

# DQN with Breakout-v0 Environment

In [2]:
# # Installing the gym packgae
# !pip -q install gym
# # Installing Open MPI required to run stable-baselines
# !apt install cmake libopenmpi-dev zlib1g-dev 
# # Installing stable-baselines package for DQN implementation
# !pip install stable-baselines

# Import package to create atari environment
from stable_baselines.common.cmd_util import make_atari

# Importing the method that will decide how the policy will be calculated (MLP or CNN)
from stable_baselines.deepq.policies import MlpPolicy, CnnPolicy

# Importing the DQN model library
from stable_baselines import DQN

# Initialize and create the environment model
env = make_atari('BreakoutNoFrameskip-v0')

# Create the DQN model network by setting up the agent's policy and the environment
model =DQN(MlpPolicy, env, verbose=1)

# Actual training of the DQN netwrok happens here, timesteps denote the number of steps the agent will learn for before terminating.
model.learn(total_timesteps=1000000)

# Code to render the agent interacting with environment based on what it has learned
# Unable to render on colad
# obs = env.reset()
# while True:

# Predict action and next state from observations
#   action, _states = model.predict(obs)

# Get observation, reward and if the step is done by performing an action on the environment
#   obs, rewards, dones, info = env.step(action)
#   #     plt.imshow(env.render(mode='rgb_array'))
#   #     plt.show()

  result = entry_point.load(False)


--------------------------------------
| % time spent exploring  | 81       |
| episodes                | 100      |
| mean 100 episode reward | 1.4      |
| steps                   | 18803    |
--------------------------------------
--------------------------------------
| % time spent exploring  | 63       |
| episodes                | 200      |
| mean 100 episode reward | 1.2      |
| steps                   | 37230    |
--------------------------------------
--------------------------------------
| % time spent exploring  | 43       |
| episodes                | 300      |
| mean 100 episode reward | 1.4      |
| steps                   | 57202    |
--------------------------------------
--------------------------------------
| % time spent exploring  | 22       |
| episodes                | 400      |
| mean 100 episode reward | 1.6      |
| steps                   | 78851    |
--------------------------------------
--------------------------------------
| % time spent exploring 

KeyboardInterrupt: ignored