In [3]:
import os
import gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env


#### Test Environment

In [2]:
# First, download and unzip http://www.atarimania.com/roms/Roms.rar inside ../ROMS folder
environment_name = 'Breakout-v0'
env = gym.make(environment_name)

In [3]:
env.action_space

Discrete(4)

In [4]:
env.observation_space

Box([[[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 ...

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]], [[[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 ...

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
 

In [5]:
episodes = 5
from time import sleep
for episode in range(1, episodes+1):
    obs = env.reset()
    score = 0
    done = False

    while not done:
        env.render()
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        score += reward
    print(f'Episode:{episode}, Score:{score}')
env.close()

Episode:1, Score:1.0
Episode:2, Score:2.0
Episode:3, Score:3.0
Episode:4, Score:4.0
Episode:5, Score:1.0


#### Vectorize Environment and Train Model

In [7]:
# Training 4 environments at the same time, to improve learning speed
env = make_atari_env('Breakout-v0', n_envs=4)
env = VecFrameStack(env, n_stack=4)

In [4]:
log_path = os.path.join('Training', 'Logs')
model = A2C('CnnPolicy', env, verbose=1, tensorboard_log=log_path)

NameError: name 'env' is not defined

In [13]:
model.learn(total_timesteps=100000)

Logging to Training/Logs/A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 305      |
|    ep_rew_mean        | 2.03     |
| time/                 |          |
|    fps                | 191      |
|    iterations         | 100      |
|    time_elapsed       | 10       |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -1.33    |
|    explained_variance | 0.299    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.195    |
|    value_loss         | 0.0626   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 306      |
|    ep_rew_mean        | 2.05     |
| time/                 |          |
|    fps                | 224      |
|    iterations         | 200      |
|    time_elapsed       | 17       |
|    total_timesteps    | 4000     |
| train

<stable_baselines3.a2c.a2c.A2C at 0x7f75326f3130>

#### Save and Reload Model 

In [5]:
a2c_path = os.path.join('Training', 'Saved_Models', 'A2C_Breakout_Model')
model.save(a2c_path)


NameError: name 'model' is not defined

In [15]:
del model

In [8]:
model = A2C.load(a2c_path, env)

Wrapping the env in a VecTransposeImage.


#### Evaluate and Test

In [12]:
# Evaluate with only 1 env
env = make_atari_env('Breakout-v0', n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)

evaluate_policy(model, env, n_eval_episodes=10, render=True)


(6.6, 1.624807680927192)

In [11]:
env.close()