# Import Dependencies

In [3]:
import os
import gymnasium as gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env

In [4]:
import ale_py
gym.register_envs(ale_py)

# Test Environment

In [5]:
env_name = "ALE/Breakout-v5"
env = gym.make(env_name) # render_mode='human'

In [6]:
env.reset()

(array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        ...,
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]], shape=(210, 160, 3), dtype=uint8),
 {'lives': 5, 'episode_frame_number': 0, 'frame_number': 0})

In [7]:
env.action_space

Discrete(4)

In [8]:
env.observation_space

Box(0, 255, (210, 160, 3), uint8)

In [9]:
episodes = 5

for episode in range(1, episodes + 1):
    state, info = env.reset()
    done = False
    score = 0

    while not done:
        action = env.action_space.sample()
        next_state, reward, terminated, truncated, info = env.step(action)

        done = terminated or truncated
        score += reward
        state = next_state

    print(f"Episode {episode} score: {score}")

env.close()

Episode 1 score: 2.0
Episode 2 score: 0.0
Episode 3 score: 0.0
Episode 4 score: 3.0
Episode 5 score: 1.0


# Vectorise Environment and Train Model

In [12]:
env = make_atari_env(env_name, n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=4)

In [12]:
env.reset()

array([[[[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        ...,

        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]]],


       [[[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0

In [14]:
log_path = os.path.join("training", "logs")
model = A2C("CnnPolicy", env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [15]:
model.learn(total_timesteps=100000)

Logging to training\logs\A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 233      |
|    ep_rew_mean        | 1.74     |
| time/                 |          |
|    fps                | 166      |
|    iterations         | 100      |
|    time_elapsed       | 12       |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -1.38    |
|    explained_variance | 0.414    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.518    |
|    value_loss         | 0.281    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 237      |
|    ep_rew_mean        | 1.87     |
| time/                 |          |
|    fps                | 175      |
|    iterations         | 200      |
|    time_elapsed       | 22       |
|    total_timesteps    | 4000     |
| train

<stable_baselines3.a2c.a2c.A2C at 0x285adc3b3b0>

# Save and Reload Model

In [ ]:
a2c_path = os.path.join("training", "saved models", "A2C_Breakout_Model")

In [16]:
model.save(a2c_path)

In [ ]:
del model

In [13]:
model = A2C.load(a2c_path, env)

Wrapping the env in a VecTransposeImage.


# Evaluate and Test

In [14]:
env = make_atari_env(env_name, n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)

In [15]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

(np.float64(6.2), np.float64(1.2489995996796797))