# 1.Import dependencies

In [3]:
import gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os

# 2.Test Environment

In [4]:
environment_name = "Breakout-v0"

In [5]:
env = gym.make(environment_name)

In [6]:
env.action_space

Discrete(4)

In [7]:
env.observation_space

Box([[[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 ...

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]], [[[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 ...

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [255 255 255]
  [255 255 255]
  [255 255 255]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
 

In [11]:
episodes = 5
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Episode:1 Score:1.0
Episode:2 Score:3.0
Episode:3 Score:4.0
Episode:4 Score:1.0
Episode:5 Score:0.0


# 3.Vectorise Environment and Train Model

In [12]:
env = make_atari_env('Breakout-v0', n_envs=4, seed=0)

In [18]:
env = VecFrameStack(env, n_stack=4)

In [19]:
log_path = os.path.join('Training', 'Logs')

In [20]:
model = A2C("CnnPolicy", env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [21]:
model.learn(total_timesteps=400000)

Logging to Training\Logs\A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 262      |
|    ep_rew_mean        | 1.12     |
| time/                 |          |
|    fps                | 25       |
|    iterations         | 100      |
|    time_elapsed       | 77       |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -1.38    |
|    explained_variance | -0.459   |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.0299  |
|    value_loss         | 0.00541  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 275      |
|    ep_rew_mean        | 1.39     |
| time/                 |          |
|    fps                | 38       |
|    iterations         | 200      |
|    time_elapsed       | 103      |
|    total_timesteps    | 4000     |
| train

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 363      |
|    ep_rew_mean        | 3.35     |
| time/                 |          |
|    fps                | 69       |
|    iterations         | 1400     |
|    time_elapsed       | 401      |
|    total_timesteps    | 28000    |
| train/                |          |
|    entropy_loss       | -0.858   |
|    explained_variance | 0.959    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1399     |
|    policy_loss        | 0.0474   |
|    value_loss         | 0.0535   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 380      |
|    ep_rew_mean        | 3.76     |
| time/                 |          |
|    fps                | 70       |
|    iterations         | 1500     |
|    time_elapsed       | 424      |
|    total_timesteps    | 30000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 436      |
|    ep_rew_mean        | 4.94     |
| time/                 |          |
|    fps                | 76       |
|    iterations         | 2800     |
|    time_elapsed       | 733      |
|    total_timesteps    | 56000    |
| train/                |          |
|    entropy_loss       | -0.629   |
|    explained_variance | 0.809    |
|    learning_rate      | 0.0007   |
|    n_updates          | 2799     |
|    policy_loss        | -0.0805  |
|    value_loss         | 0.0819   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 449      |
|    ep_rew_mean        | 5.18     |
| time/                 |          |
|    fps                | 76       |
|    iterations         | 2900     |
|    time_elapsed       | 757      |
|    total_timesteps    | 58000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 544      |
|    ep_rew_mean        | 7.1      |
| time/                 |          |
|    fps                | 79       |
|    iterations         | 4200     |
|    time_elapsed       | 1057     |
|    total_timesteps    | 84000    |
| train/                |          |
|    entropy_loss       | -0.0294  |
|    explained_variance | 0.727    |
|    learning_rate      | 0.0007   |
|    n_updates          | 4199     |
|    policy_loss        | 0.0215   |
|    value_loss         | 0.236    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 540      |
|    ep_rew_mean        | 7        |
| time/                 |          |
|    fps                | 79       |
|    iterations         | 4300     |
|    time_elapsed       | 1080     |
|    total_timesteps    | 86000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 545      |
|    ep_rew_mean        | 7.14     |
| time/                 |          |
|    fps                | 80       |
|    iterations         | 5500     |
|    time_elapsed       | 1359     |
|    total_timesteps    | 110000   |
| train/                |          |
|    entropy_loss       | -0.351   |
|    explained_variance | 0.609    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5499     |
|    policy_loss        | -0.00753 |
|    value_loss         | 0.0817   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 549      |
|    ep_rew_mean        | 7.32     |
| time/                 |          |
|    fps                | 80       |
|    iterations         | 5600     |
|    time_elapsed       | 1382     |
|    total_timesteps    | 112000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 572      |
|    ep_rew_mean        | 7.48     |
| time/                 |          |
|    fps                | 81       |
|    iterations         | 6900     |
|    time_elapsed       | 1689     |
|    total_timesteps    | 138000   |
| train/                |          |
|    entropy_loss       | -0.277   |
|    explained_variance | 0.943    |
|    learning_rate      | 0.0007   |
|    n_updates          | 6899     |
|    policy_loss        | 0.0939   |
|    value_loss         | 0.044    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 563      |
|    ep_rew_mean        | 7.36     |
| time/                 |          |
|    fps                | 81       |
|    iterations         | 7000     |
|    time_elapsed       | 1713     |
|    total_timesteps    | 140000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 575      |
|    ep_rew_mean        | 7.99     |
| time/                 |          |
|    fps                | 82       |
|    iterations         | 8200     |
|    time_elapsed       | 1999     |
|    total_timesteps    | 164000   |
| train/                |          |
|    entropy_loss       | -0.126   |
|    explained_variance | 0.703    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8199     |
|    policy_loss        | 0.0646   |
|    value_loss         | 0.212    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 582      |
|    ep_rew_mean        | 8.08     |
| time/                 |          |
|    fps                | 82       |
|    iterations         | 8300     |
|    time_elapsed       | 2023     |
|    total_timesteps    | 166000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 596      |
|    ep_rew_mean        | 8.43     |
| time/                 |          |
|    fps                | 81       |
|    iterations         | 9600     |
|    time_elapsed       | 2350     |
|    total_timesteps    | 192000   |
| train/                |          |
|    entropy_loss       | -0.331   |
|    explained_variance | 0.683    |
|    learning_rate      | 0.0007   |
|    n_updates          | 9599     |
|    policy_loss        | 0.00882  |
|    value_loss         | 0.182    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 602      |
|    ep_rew_mean        | 8.47     |
| time/                 |          |
|    fps                | 81       |
|    iterations         | 9700     |
|    time_elapsed       | 2374     |
|    total_timesteps    | 194000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 647      |
|    ep_rew_mean        | 9.65     |
| time/                 |          |
|    fps                | 80       |
|    iterations         | 11000    |
|    time_elapsed       | 2742     |
|    total_timesteps    | 220000   |
| train/                |          |
|    entropy_loss       | -0.438   |
|    explained_variance | 0.308    |
|    learning_rate      | 0.0007   |
|    n_updates          | 10999    |
|    policy_loss        | -0.00253 |
|    value_loss         | 0.236    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 634      |
|    ep_rew_mean        | 9.36     |
| time/                 |          |
|    fps                | 80       |
|    iterations         | 11100    |
|    time_elapsed       | 2766     |
|    total_timesteps    | 222000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 693      |
|    ep_rew_mean        | 10.3     |
| time/                 |          |
|    fps                | 80       |
|    iterations         | 12400    |
|    time_elapsed       | 3064     |
|    total_timesteps    | 248000   |
| train/                |          |
|    entropy_loss       | -0.175   |
|    explained_variance | 0.859    |
|    learning_rate      | 0.0007   |
|    n_updates          | 12399    |
|    policy_loss        | -0.0206  |
|    value_loss         | 0.0815   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 698      |
|    ep_rew_mean        | 10.5     |
| time/                 |          |
|    fps                | 80       |
|    iterations         | 12500    |
|    time_elapsed       | 3088     |
|    total_timesteps    | 250000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 673      |
|    ep_rew_mean        | 10.2     |
| time/                 |          |
|    fps                | 81       |
|    iterations         | 13800    |
|    time_elapsed       | 3400     |
|    total_timesteps    | 276000   |
| train/                |          |
|    entropy_loss       | -0.262   |
|    explained_variance | 0.815    |
|    learning_rate      | 0.0007   |
|    n_updates          | 13799    |
|    policy_loss        | -0.0122  |
|    value_loss         | 0.21     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 677      |
|    ep_rew_mean        | 10.3     |
| time/                 |          |
|    fps                | 81       |
|    iterations         | 13900    |
|    time_elapsed       | 3423     |
|    total_timesteps    | 278000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 703      |
|    ep_rew_mean        | 10.6     |
| time/                 |          |
|    fps                | 81       |
|    iterations         | 15200    |
|    time_elapsed       | 3741     |
|    total_timesteps    | 304000   |
| train/                |          |
|    entropy_loss       | -0.293   |
|    explained_variance | 0.154    |
|    learning_rate      | 0.0007   |
|    n_updates          | 15199    |
|    policy_loss        | 0.235    |
|    value_loss         | 0.226    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 693      |
|    ep_rew_mean        | 10.4     |
| time/                 |          |
|    fps                | 81       |
|    iterations         | 15300    |
|    time_elapsed       | 3765     |
|    total_timesteps    | 306000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 671      |
|    ep_rew_mean        | 10.2     |
| time/                 |          |
|    fps                | 81       |
|    iterations         | 16600    |
|    time_elapsed       | 4090     |
|    total_timesteps    | 332000   |
| train/                |          |
|    entropy_loss       | -0.246   |
|    explained_variance | 0.666    |
|    learning_rate      | 0.0007   |
|    n_updates          | 16599    |
|    policy_loss        | -0.0352  |
|    value_loss         | 0.259    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 676      |
|    ep_rew_mean        | 10.4     |
| time/                 |          |
|    fps                | 81       |
|    iterations         | 16700    |
|    time_elapsed       | 4106     |
|    total_timesteps    | 334000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 712      |
|    ep_rew_mean        | 11       |
| time/                 |          |
|    fps                | 84       |
|    iterations         | 18000    |
|    time_elapsed       | 4275     |
|    total_timesteps    | 360000   |
| train/                |          |
|    entropy_loss       | -0.122   |
|    explained_variance | 0.889    |
|    learning_rate      | 0.0007   |
|    n_updates          | 17999    |
|    policy_loss        | 0.00715  |
|    value_loss         | 0.0454   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 720      |
|    ep_rew_mean        | 11.2     |
| time/                 |          |
|    fps                | 84       |
|    iterations         | 18100    |
|    time_elapsed       | 4290     |
|    total_timesteps    | 362000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 749      |
|    ep_rew_mean        | 12.2     |
| time/                 |          |
|    fps                | 86       |
|    iterations         | 19400    |
|    time_elapsed       | 4465     |
|    total_timesteps    | 388000   |
| train/                |          |
|    entropy_loss       | -0.532   |
|    explained_variance | 0.692    |
|    learning_rate      | 0.0007   |
|    n_updates          | 19399    |
|    policy_loss        | -0.0139  |
|    value_loss         | 0.214    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 747      |
|    ep_rew_mean        | 12       |
| time/                 |          |
|    fps                | 87       |
|    iterations         | 19500    |
|    time_elapsed       | 4479     |
|    total_timesteps    | 390000   |
| train/                |          |
|

<stable_baselines3.a2c.a2c.A2C at 0x247eb80b310>

# 4.Save and reload Model

In [22]:
a2c_path = os.path.join('Training', 'Saved Models', 'A2C_model')

In [23]:
model.save(a2c_path)

In [24]:
del model

In [25]:
#done inorder to pass it on evaluatepolicy
env = make_atari_env('Breakout-v0', n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)

In [26]:
model = A2C.load(a2c_path, env)

Wrapping the env in a VecTransposeImage.


# 5.Evaluate and Test

In [27]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

  logger.warn(


(13.8, 6.446704584514479)

In [30]:
#pass

In [29]:
env.close()