In [4]:
import gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os

  from .autonotebook import tqdm as notebook_tqdm


# Test Enviornment #

In [2]:
!python -m atari_py.import_roms ./ROMS/ROMS

In [3]:
env_name = "Breakout-v0"
env = gym.make(env_name)

A.L.E: Arcade Learning Environment (version 0.7.4+069f8bd)
[Powered by Stella]


In [None]:
env.reset()

In [13]:
env.action_space

Discrete(4)

In [None]:
env.observation_space

In [8]:
episodes = 5
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    
    while not done:
        env.render()
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        score+=reward
    print("Episodes {} Score {}".format(episode, score))
        
env.close()
    

Episodes 1 Score 0.0
Episodes 2 Score 0.0
Episodes 3 Score 2.0
Episodes 4 Score 1.0
Episodes 5 Score 0.0


# Vectorize Enviornment and Train Model #

In [13]:
env = make_atari_env("Breakout-v0", n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=4)

In [5]:
env.render()

In [7]:
env.close()

In [14]:
log_path = os.path.join("Training", "Logs")
model = A2C("CnnPolicy", env, verbose=1, tensorboard_log=log_path)

Using cuda device
Wrapping the env in a VecTransposeImage.


In [15]:
model.learn(total_timesteps=500000)

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 706      |
|    ep_rew_mean        | 10.7     |
| time/                 |          |
|    fps                | 256      |
|    iterations         | 21800    |
|    time_elapsed       | 1700     |
|    total_timesteps    | 436000   |
| train/                |          |
|    entropy_loss       | -0.151   |
|    explained_variance | 0.31     |
|    learning_rate      | 0.0007   |
|    n_updates          | 21799    |
|    policy_loss        | -0.0351  |
|    value_loss         | 0.191    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 707      |
|    ep_rew_mean        | 10.7     |
| time/                 |          |
|    fps                | 256      |
|    iterations         | 21900    |
|    time_elapsed       | 1708     |
|    total_timesteps    | 438000   |
| train/                |          |
|

<stable_baselines3.a2c.a2c.A2C at 0x7fdf8be05280>

# Save and reload Model #

In [16]:
a2c_path = os.path.join("Training", "Saved Models", "A2C_Breakout_Model")
model.save(a2c_path)

In [17]:
del model

In [18]:
model = A2C.load(a2c_path, env)

Wrapping the env in a VecTransposeImage.


# Evaluate and test #

In [19]:
env = make_atari_env("Breakout-v0", n_envs=1, seed=0)
env = VecFrameStack(env,n_stack=4)

In [28]:
evaluate_policy(model, env, n_eval_episodes=50, render=True)

(11.96, 4.651709363234122)

In [29]:
env.close()