# Import Dependencies

In [1]:
import gymnasium as gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os

# Test Environment

In [7]:
env = gym.make('Breakout-v0', render_mode='human')

In [None]:
env.step(env.action_space.sample())

In [None]:
episodes = 5
for episode in range(1, episodes + 1):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        env.render()
        action = env.action_space.sample()
        obs, reward, done, _, info = env.step(action)
        score += reward
    print('Episode:{} Score:{}'.format(episode, score))

In [None]:
env.close()

# Vectorise env and train model

In [7]:
env = make_atari_env('Breakout-v0', n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=4)

In [6]:
log_path = os.path.join('Training', 'Logs')
model = A2C('CnnPolicy', env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [19]:
model.learn(total_timesteps=1000000)

Logging to Training\Logs\A2C_2
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 267      |
|    ep_rew_mean        | 1.37     |
| time/                 |          |
|    fps                | 118      |
|    iterations         | 100      |
|    time_elapsed       | 16       |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -1.38    |
|    explained_variance | 0.00283  |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.00464 |
|    value_loss         | 0.00192  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 284      |
|    ep_rew_mean        | 1.63     |
| time/                 |          |
|    fps                | 120      |
|    iterations         | 200      |
|    time_elapsed       | 33       |
|    total_timesteps    | 4000     |
| train

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 336      |
|    ep_rew_mean        | 2.67     |
| time/                 |          |
|    fps                | 118      |
|    iterations         | 1400     |
|    time_elapsed       | 236      |
|    total_timesteps    | 28000    |
| train/                |          |
|    entropy_loss       | -1.24    |
|    explained_variance | 0.823    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1399     |
|    policy_loss        | -0.0228  |
|    value_loss         | 0.0868   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 347      |
|    ep_rew_mean        | 2.89     |
| time/                 |          |
|    fps                | 118      |
|    iterations         | 1500     |
|    time_elapsed       | 254      |
|    total_timesteps    | 30000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 445      |
|    ep_rew_mean        | 5.14     |
| time/                 |          |
|    fps                | 120      |
|    iterations         | 2800     |
|    time_elapsed       | 463      |
|    total_timesteps    | 56000    |
| train/                |          |
|    entropy_loss       | -0.125   |
|    explained_variance | 0.958    |
|    learning_rate      | 0.0007   |
|    n_updates          | 2799     |
|    policy_loss        | -0.0158  |
|    value_loss         | 0.0644   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 441      |
|    ep_rew_mean        | 5.14     |
| time/                 |          |
|    fps                | 120      |
|    iterations         | 2900     |
|    time_elapsed       | 481      |
|    total_timesteps    | 58000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 511      |
|    ep_rew_mean        | 6.48     |
| time/                 |          |
|    fps                | 120      |
|    iterations         | 4200     |
|    time_elapsed       | 695      |
|    total_timesteps    | 84000    |
| train/                |          |
|    entropy_loss       | -0.246   |
|    explained_variance | 0.839    |
|    learning_rate      | 0.0007   |
|    n_updates          | 4199     |
|    policy_loss        | 0.00202  |
|    value_loss         | 0.0222   |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 506       |
|    ep_rew_mean        | 6.37      |
| time/                 |           |
|    fps                | 121       |
|    iterations         | 4300      |
|    time_elapsed       | 709       |
|    total_timesteps    | 86000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 513      |
|    ep_rew_mean        | 6.39     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 5500     |
|    time_elapsed       | 882      |
|    total_timesteps    | 110000   |
| train/                |          |
|    entropy_loss       | -0.043   |
|    explained_variance | 0.762    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5499     |
|    policy_loss        | 0.00142  |
|    value_loss         | 0.0978   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 500      |
|    ep_rew_mean        | 6.24     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 5600     |
|    time_elapsed       | 898      |
|    total_timesteps    | 112000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 571      |
|    ep_rew_mean        | 7.52     |
| time/                 |          |
|    fps                | 122      |
|    iterations         | 6900     |
|    time_elapsed       | 1128     |
|    total_timesteps    | 138000   |
| train/                |          |
|    entropy_loss       | -0.137   |
|    explained_variance | 0.839    |
|    learning_rate      | 0.0007   |
|    n_updates          | 6899     |
|    policy_loss        | 0.00354  |
|    value_loss         | 0.0267   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 563      |
|    ep_rew_mean        | 7.45     |
| time/                 |          |
|    fps                | 122      |
|    iterations         | 7000     |
|    time_elapsed       | 1143     |
|    total_timesteps    | 140000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 575      |
|    ep_rew_mean        | 7.71     |
| time/                 |          |
|    fps                | 121      |
|    iterations         | 8300     |
|    time_elapsed       | 1364     |
|    total_timesteps    | 166000   |
| train/                |          |
|    entropy_loss       | -0.144   |
|    explained_variance | 0.837    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8299     |
|    policy_loss        | -0.0518  |
|    value_loss         | 0.172    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 573      |
|    ep_rew_mean        | 7.71     |
| time/                 |          |
|    fps                | 121      |
|    iterations         | 8400     |
|    time_elapsed       | 1380     |
|    total_timesteps    | 168000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 618      |
|    ep_rew_mean        | 8.8      |
| time/                 |          |
|    fps                | 120      |
|    iterations         | 9700     |
|    time_elapsed       | 1604     |
|    total_timesteps    | 194000   |
| train/                |          |
|    entropy_loss       | -0.229   |
|    explained_variance | -0.345   |
|    learning_rate      | 0.0007   |
|    n_updates          | 9699     |
|    policy_loss        | 0.00737  |
|    value_loss         | 0.184    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 629      |
|    ep_rew_mean        | 9        |
| time/                 |          |
|    fps                | 120      |
|    iterations         | 9800     |
|    time_elapsed       | 1619     |
|    total_timesteps    | 196000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 675      |
|    ep_rew_mean        | 10.1     |
| time/                 |          |
|    fps                | 122      |
|    iterations         | 11000    |
|    time_elapsed       | 1799     |
|    total_timesteps    | 220000   |
| train/                |          |
|    entropy_loss       | -0.0689  |
|    explained_variance | 0.0156   |
|    learning_rate      | 0.0007   |
|    n_updates          | 10999    |
|    policy_loss        | -0.00747 |
|    value_loss         | 0.167    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 679      |
|    ep_rew_mean        | 10.1     |
| time/                 |          |
|    fps                | 122      |
|    iterations         | 11100    |
|    time_elapsed       | 1816     |
|    total_timesteps    | 222000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 669      |
|    ep_rew_mean        | 9.91     |
| time/                 |          |
|    fps                | 121      |
|    iterations         | 12300    |
|    time_elapsed       | 2019     |
|    total_timesteps    | 246000   |
| train/                |          |
|    entropy_loss       | -0.0633  |
|    explained_variance | 0.185    |
|    learning_rate      | 0.0007   |
|    n_updates          | 12299    |
|    policy_loss        | -0.00491 |
|    value_loss         | 0.464    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 670      |
|    ep_rew_mean        | 9.9      |
| time/                 |          |
|    fps                | 121      |
|    iterations         | 12400    |
|    time_elapsed       | 2036     |
|    total_timesteps    | 248000   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 662       |
|    ep_rew_mean        | 9.6       |
| time/                 |           |
|    fps                | 121       |
|    iterations         | 13700     |
|    time_elapsed       | 2260      |
|    total_timesteps    | 274000    |
| train/                |           |
|    entropy_loss       | -0.0158   |
|    explained_variance | 0.955     |
|    learning_rate      | 0.0007    |
|    n_updates          | 13699     |
|    policy_loss        | -0.000163 |
|    value_loss         | 0.0869    |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 677      |
|    ep_rew_mean        | 9.95     |
| time/                 |          |
|    fps                | 121      |
|    iterations         | 13800    |
|    time_elapsed       | 2275     |
|    total_timesteps    | 276000   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 691      |
|    ep_rew_mean        | 10.2     |
| time/                 |          |
|    fps                | 120      |
|    iterations         | 15000    |
|    time_elapsed       | 2479     |
|    total_timesteps    | 300000   |
| train/                |          |
|    entropy_loss       | -0.0033  |
|    explained_variance | -1.32    |
|    learning_rate      | 0.0007   |
|    n_updates          | 14999    |
|    policy_loss        | 4.17e-05 |
|    value_loss         | 0.161    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 690      |
|    ep_rew_mean        | 10.2     |
| time/                 |          |
|    fps                | 120      |
|    iterations         | 15100    |
|    time_elapsed       | 2496     |
|    total_timesteps    | 302000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 644      |
|    ep_rew_mean        | 9.13     |
| time/                 |          |
|    fps                | 121      |
|    iterations         | 16400    |
|    time_elapsed       | 2704     |
|    total_timesteps    | 328000   |
| train/                |          |
|    entropy_loss       | -0.0154  |
|    explained_variance | 0.754    |
|    learning_rate      | 0.0007   |
|    n_updates          | 16399    |
|    policy_loss        | 3.37e-05 |
|    value_loss         | 0.0205   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 634      |
|    ep_rew_mean        | 8.93     |
| time/                 |          |
|    fps                | 121      |
|    iterations         | 16500    |
|    time_elapsed       | 2719     |
|    total_timesteps    | 330000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 694      |
|    ep_rew_mean        | 10.2     |
| time/                 |          |
|    fps                | 122      |
|    iterations         | 17800    |
|    time_elapsed       | 2917     |
|    total_timesteps    | 356000   |
| train/                |          |
|    entropy_loss       | -0.108   |
|    explained_variance | 0.322    |
|    learning_rate      | 0.0007   |
|    n_updates          | 17799    |
|    policy_loss        | 0.0147   |
|    value_loss         | 0.247    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 684      |
|    ep_rew_mean        | 10.1     |
| time/                 |          |
|    fps                | 122      |
|    iterations         | 17900    |
|    time_elapsed       | 2932     |
|    total_timesteps    | 358000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 670      |
|    ep_rew_mean        | 9.99     |
| time/                 |          |
|    fps                | 122      |
|    iterations         | 19200    |
|    time_elapsed       | 3131     |
|    total_timesteps    | 384000   |
| train/                |          |
|    entropy_loss       | -0.0848  |
|    explained_variance | 0.657    |
|    learning_rate      | 0.0007   |
|    n_updates          | 19199    |
|    policy_loss        | 0.0054   |
|    value_loss         | 0.385    |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 685       |
|    ep_rew_mean        | 10.4      |
| time/                 |           |
|    fps                | 122       |
|    iterations         | 19300     |
|    time_elapsed       | 3146      |
|    total_timesteps    | 386000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 724      |
|    ep_rew_mean        | 11.4     |
| time/                 |          |
|    fps                | 123      |
|    iterations         | 20500    |
|    time_elapsed       | 3330     |
|    total_timesteps    | 410000   |
| train/                |          |
|    entropy_loss       | -0.155   |
|    explained_variance | 0.0764   |
|    learning_rate      | 0.0007   |
|    n_updates          | 20499    |
|    policy_loss        | 0.0233   |
|    value_loss         | 0.204    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 733      |
|    ep_rew_mean        | 11.5     |
| time/                 |          |
|    fps                | 123      |
|    iterations         | 20600    |
|    time_elapsed       | 3345     |
|    total_timesteps    | 412000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 720      |
|    ep_rew_mean        | 11.1     |
| time/                 |          |
|    fps                | 123      |
|    iterations         | 21900    |
|    time_elapsed       | 3541     |
|    total_timesteps    | 438000   |
| train/                |          |
|    entropy_loss       | -0.0834  |
|    explained_variance | 0.641    |
|    learning_rate      | 0.0007   |
|    n_updates          | 21899    |
|    policy_loss        | 0.00629  |
|    value_loss         | 0.223    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 721      |
|    ep_rew_mean        | 11.1     |
| time/                 |          |
|    fps                | 123      |
|    iterations         | 22000    |
|    time_elapsed       | 3555     |
|    total_timesteps    | 440000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 728      |
|    ep_rew_mean        | 11.3     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 23200    |
|    time_elapsed       | 3735     |
|    total_timesteps    | 464000   |
| train/                |          |
|    entropy_loss       | -0.0684  |
|    explained_variance | 0.6      |
|    learning_rate      | 0.0007   |
|    n_updates          | 23199    |
|    policy_loss        | 0.00681  |
|    value_loss         | 0.143    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 733      |
|    ep_rew_mean        | 11.4     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 23300    |
|    time_elapsed       | 3750     |
|    total_timesteps    | 466000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 705      |
|    ep_rew_mean        | 11.1     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 24600    |
|    time_elapsed       | 3948     |
|    total_timesteps    | 492000   |
| train/                |          |
|    entropy_loss       | -0.15    |
|    explained_variance | 0.805    |
|    learning_rate      | 0.0007   |
|    n_updates          | 24599    |
|    policy_loss        | 0.0388   |
|    value_loss         | 0.113    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 712      |
|    ep_rew_mean        | 11.3     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 24700    |
|    time_elapsed       | 3963     |
|    total_timesteps    | 494000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 754      |
|    ep_rew_mean        | 11.9     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 26000    |
|    time_elapsed       | 4161     |
|    total_timesteps    | 520000   |
| train/                |          |
|    entropy_loss       | -0.233   |
|    explained_variance | 0.919    |
|    learning_rate      | 0.0007   |
|    n_updates          | 25999    |
|    policy_loss        | -0.00768 |
|    value_loss         | 0.106    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 752      |
|    ep_rew_mean        | 11.9     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 26100    |
|    time_elapsed       | 4176     |
|    total_timesteps    | 522000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 552      |
|    ep_rew_mean        | 7.68     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 27400    |
|    time_elapsed       | 4379     |
|    total_timesteps    | 548000   |
| train/                |          |
|    entropy_loss       | -0.0429  |
|    explained_variance | 0.1      |
|    learning_rate      | 0.0007   |
|    n_updates          | 27399    |
|    policy_loss        | 0.000582 |
|    value_loss         | 0.0615   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 614      |
|    ep_rew_mean        | 9.17     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 27500    |
|    time_elapsed       | 4398     |
|    total_timesteps    | 550000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 777      |
|    ep_rew_mean        | 12.8     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 28800    |
|    time_elapsed       | 4623     |
|    total_timesteps    | 576000   |
| train/                |          |
|    entropy_loss       | -0.0228  |
|    explained_variance | 0.776    |
|    learning_rate      | 0.0007   |
|    n_updates          | 28799    |
|    policy_loss        | 3.68e-05 |
|    value_loss         | 0.036    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 778      |
|    ep_rew_mean        | 12.7     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 28900    |
|    time_elapsed       | 4642     |
|    total_timesteps    | 578000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 767      |
|    ep_rew_mean        | 12.3     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 30200    |
|    time_elapsed       | 4869     |
|    total_timesteps    | 604000   |
| train/                |          |
|    entropy_loss       | -0.126   |
|    explained_variance | 0.808    |
|    learning_rate      | 0.0007   |
|    n_updates          | 30199    |
|    policy_loss        | 0.00417  |
|    value_loss         | 0.111    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 756      |
|    ep_rew_mean        | 12.1     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 30300    |
|    time_elapsed       | 4885     |
|    total_timesteps    | 606000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 794      |
|    ep_rew_mean        | 13.6     |
| time/                 |          |
|    fps                | 122      |
|    iterations         | 31600    |
|    time_elapsed       | 5159     |
|    total_timesteps    | 632000   |
| train/                |          |
|    entropy_loss       | -0.104   |
|    explained_variance | 0.778    |
|    learning_rate      | 0.0007   |
|    n_updates          | 31599    |
|    policy_loss        | 0.00617  |
|    value_loss         | 0.274    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 795      |
|    ep_rew_mean        | 13.8     |
| time/                 |          |
|    fps                | 122      |
|    iterations         | 31700    |
|    time_elapsed       | 5179     |
|    total_timesteps    | 634000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 783      |
|    ep_rew_mean        | 13       |
| time/                 |          |
|    fps                | 120      |
|    iterations         | 32900    |
|    time_elapsed       | 5466     |
|    total_timesteps    | 658000   |
| train/                |          |
|    entropy_loss       | -0.146   |
|    explained_variance | -1.72    |
|    learning_rate      | 0.0007   |
|    n_updates          | 32899    |
|    policy_loss        | 0.00933  |
|    value_loss         | 0.326    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 798      |
|    ep_rew_mean        | 13.2     |
| time/                 |          |
|    fps                | 120      |
|    iterations         | 33000    |
|    time_elapsed       | 5496     |
|    total_timesteps    | 660000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 798      |
|    ep_rew_mean        | 13.5     |
| time/                 |          |
|    fps                | 117      |
|    iterations         | 34200    |
|    time_elapsed       | 5797     |
|    total_timesteps    | 684000   |
| train/                |          |
|    entropy_loss       | -0.204   |
|    explained_variance | 0.501    |
|    learning_rate      | 0.0007   |
|    n_updates          | 34199    |
|    policy_loss        | 0.038    |
|    value_loss         | 0.492    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 800      |
|    ep_rew_mean        | 13.5     |
| time/                 |          |
|    fps                | 117      |
|    iterations         | 34300    |
|    time_elapsed       | 5820     |
|    total_timesteps    | 686000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 803      |
|    ep_rew_mean        | 13.5     |
| time/                 |          |
|    fps                | 115      |
|    iterations         | 35600    |
|    time_elapsed       | 6159     |
|    total_timesteps    | 712000   |
| train/                |          |
|    entropy_loss       | -0.0533  |
|    explained_variance | 0.935    |
|    learning_rate      | 0.0007   |
|    n_updates          | 35599    |
|    policy_loss        | -0.00259 |
|    value_loss         | 0.0796   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 807      |
|    ep_rew_mean        | 13.6     |
| time/                 |          |
|    fps                | 115      |
|    iterations         | 35700    |
|    time_elapsed       | 6180     |
|    total_timesteps    | 714000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 788      |
|    ep_rew_mean        | 13.2     |
| time/                 |          |
|    fps                | 113      |
|    iterations         | 36900    |
|    time_elapsed       | 6476     |
|    total_timesteps    | 738000   |
| train/                |          |
|    entropy_loss       | -0.0965  |
|    explained_variance | 0.707    |
|    learning_rate      | 0.0007   |
|    n_updates          | 36899    |
|    policy_loss        | -0.00545 |
|    value_loss         | 0.204    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 793      |
|    ep_rew_mean        | 13.3     |
| time/                 |          |
|    fps                | 113      |
|    iterations         | 37000    |
|    time_elapsed       | 6504     |
|    total_timesteps    | 740000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 845      |
|    ep_rew_mean        | 14.1     |
| time/                 |          |
|    fps                | 112      |
|    iterations         | 38200    |
|    time_elapsed       | 6791     |
|    total_timesteps    | 764000   |
| train/                |          |
|    entropy_loss       | -0.0928  |
|    explained_variance | 0.779    |
|    learning_rate      | 0.0007   |
|    n_updates          | 38199    |
|    policy_loss        | 0.025    |
|    value_loss         | 0.0306   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 867      |
|    ep_rew_mean        | 14.6     |
| time/                 |          |
|    fps                | 112      |
|    iterations         | 38300    |
|    time_elapsed       | 6812     |
|    total_timesteps    | 766000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 859      |
|    ep_rew_mean        | 15.2     |
| time/                 |          |
|    fps                | 110      |
|    iterations         | 39600    |
|    time_elapsed       | 7141     |
|    total_timesteps    | 792000   |
| train/                |          |
|    entropy_loss       | -0.124   |
|    explained_variance | -0.103   |
|    learning_rate      | 0.0007   |
|    n_updates          | 39599    |
|    policy_loss        | 0.00766  |
|    value_loss         | 0.334    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 861      |
|    ep_rew_mean        | 15.2     |
| time/                 |          |
|    fps                | 110      |
|    iterations         | 39700    |
|    time_elapsed       | 7164     |
|    total_timesteps    | 794000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 853      |
|    ep_rew_mean        | 14.5     |
| time/                 |          |
|    fps                | 109      |
|    iterations         | 40900    |
|    time_elapsed       | 7459     |
|    total_timesteps    | 818000   |
| train/                |          |
|    entropy_loss       | -0.0144  |
|    explained_variance | 0.652    |
|    learning_rate      | 0.0007   |
|    n_updates          | 40899    |
|    policy_loss        | -0.0006  |
|    value_loss         | 0.041    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 864      |
|    ep_rew_mean        | 14.9     |
| time/                 |          |
|    fps                | 109      |
|    iterations         | 41000    |
|    time_elapsed       | 7489     |
|    total_timesteps    | 820000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 856      |
|    ep_rew_mean        | 15.2     |
| time/                 |          |
|    fps                | 108      |
|    iterations         | 42200    |
|    time_elapsed       | 7793     |
|    total_timesteps    | 844000   |
| train/                |          |
|    entropy_loss       | -0.0653  |
|    explained_variance | 0.886    |
|    learning_rate      | 0.0007   |
|    n_updates          | 42199    |
|    policy_loss        | 0.00842  |
|    value_loss         | 0.0478   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 854      |
|    ep_rew_mean        | 15.1     |
| time/                 |          |
|    fps                | 108      |
|    iterations         | 42300    |
|    time_elapsed       | 7815     |
|    total_timesteps    | 846000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 867      |
|    ep_rew_mean        | 15.3     |
| time/                 |          |
|    fps                | 107      |
|    iterations         | 43600    |
|    time_elapsed       | 8122     |
|    total_timesteps    | 872000   |
| train/                |          |
|    entropy_loss       | -0.163   |
|    explained_variance | 0.795    |
|    learning_rate      | 0.0007   |
|    n_updates          | 43599    |
|    policy_loss        | -0.161   |
|    value_loss         | 0.106    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 863      |
|    ep_rew_mean        | 15.1     |
| time/                 |          |
|    fps                | 107      |
|    iterations         | 43700    |
|    time_elapsed       | 8144     |
|    total_timesteps    | 874000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 874      |
|    ep_rew_mean        | 15.6     |
| time/                 |          |
|    fps                | 106      |
|    iterations         | 45000    |
|    time_elapsed       | 8461     |
|    total_timesteps    | 900000   |
| train/                |          |
|    entropy_loss       | -0.284   |
|    explained_variance | 0.927    |
|    learning_rate      | 0.0007   |
|    n_updates          | 44999    |
|    policy_loss        | -0.0153  |
|    value_loss         | 0.0226   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 877      |
|    ep_rew_mean        | 15.7     |
| time/                 |          |
|    fps                | 106      |
|    iterations         | 45100    |
|    time_elapsed       | 8489     |
|    total_timesteps    | 902000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 902      |
|    ep_rew_mean        | 16.5     |
| time/                 |          |
|    fps                | 105      |
|    iterations         | 46400    |
|    time_elapsed       | 8817     |
|    total_timesteps    | 928000   |
| train/                |          |
|    entropy_loss       | -0.329   |
|    explained_variance | 0.909    |
|    learning_rate      | 0.0007   |
|    n_updates          | 46399    |
|    policy_loss        | -0.00363 |
|    value_loss         | 0.127    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 889      |
|    ep_rew_mean        | 16.1     |
| time/                 |          |
|    fps                | 105      |
|    iterations         | 46500    |
|    time_elapsed       | 8840     |
|    total_timesteps    | 930000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 865      |
|    ep_rew_mean        | 15.6     |
| time/                 |          |
|    fps                | 104      |
|    iterations         | 47700    |
|    time_elapsed       | 9107     |
|    total_timesteps    | 954000   |
| train/                |          |
|    entropy_loss       | -0.148   |
|    explained_variance | 0.829    |
|    learning_rate      | 0.0007   |
|    n_updates          | 47699    |
|    policy_loss        | 0.00935  |
|    value_loss         | 0.275    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 872      |
|    ep_rew_mean        | 15.7     |
| time/                 |          |
|    fps                | 104      |
|    iterations         | 47800    |
|    time_elapsed       | 9128     |
|    total_timesteps    | 956000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 912      |
|    ep_rew_mean        | 16.1     |
| time/                 |          |
|    fps                | 104      |
|    iterations         | 49100    |
|    time_elapsed       | 9412     |
|    total_timesteps    | 982000   |
| train/                |          |
|    entropy_loss       | -0.118   |
|    explained_variance | 0.722    |
|    learning_rate      | 0.0007   |
|    n_updates          | 49099    |
|    policy_loss        | 0.0191   |
|    value_loss         | 0.273    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 913      |
|    ep_rew_mean        | 16.3     |
| time/                 |          |
|    fps                | 104      |
|    iterations         | 49200    |
|    time_elapsed       | 9436     |
|    total_timesteps    | 984000   |
| train/                |          |
|

<stable_baselines3.a2c.a2c.A2C at 0x2466a0e4350>

# Save And Reload Model

In [8]:
a2c_path = os.path.join('Training', 'Saved Models', 'A2C_Breakout_Model_1M')

In [None]:
model.save(a2c_path)

In [None]:
del model

In [9]:
model = A2C.load(a2c_path, env)

Wrapping the env in a VecTransposeImage.


# Evaluate and Test

In [10]:
env = make_atari_env('Breakout-v0', n_envs=1, seed=0)

In [11]:
env = VecFrameStack(env, n_stack=4)

In [10]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

(11.5, 6.360031446463139)

In [17]:
env.close()