## 1. Import Dependencies

In [2]:
!pip install atari_py



In [1]:
from typing import Any, Dict

import torch as th

In [1]:
import gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os

# Added for video logs
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.logger import Video

## 2. Test Environment

In [5]:
!python -m atari_py.import_roms .\ROMS

copying adventure.bin from .\ROMS\Adventure (1980) (Atari, Warren Robinett) (CX2613, CX2613P) (PAL).bin to C:\Users\danie\anaconda3\lib\site-packages\atari_py\atari_roms\adventure.bin
copying air_raid.bin from .\ROMS\Air Raid (Men-A-Vision) (PAL) ~.bin to C:\Users\danie\anaconda3\lib\site-packages\atari_py\atari_roms\air_raid.bin
copying alien.bin from .\ROMS\Alien (1982) (20th Century Fox Video Games, Douglas 'Dallas North' Neubauer) (11006) ~.bin to C:\Users\danie\anaconda3\lib\site-packages\atari_py\atari_roms\alien.bin
copying amidar.bin from .\ROMS\Amidar (1982) (Parker Brothers, Ed Temple) (PB5310) ~.bin to C:\Users\danie\anaconda3\lib\site-packages\atari_py\atari_roms\amidar.bin
copying assault.bin from .\ROMS\Assault (AKA Sky Alien) (1983) (Bomb - Onbase) (CA281).bin to C:\Users\danie\anaconda3\lib\site-packages\atari_py\atari_roms\assault.bin
copying asterix.bin from .\ROMS\Asterix (AKA Taz) (1984) (Atari, Jerome Domurat, Steve Woita) (CX2696).bin to C:\Users\danie\anaconda3\l

In [18]:
environment_name = 'Breakout-v0'
env = gym.make(environment_name, render_mode = 'human')

In [164]:
env.reset()

array([[[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       ...,

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]]], dtype=uint8)

In [19]:
episodes = 5
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    
    while not done: 
        #env.render() not supported anymore
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        score += reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Episode:1 Score:2.0


KeyboardInterrupt: 

## 3. Vectorise Environment and Train Model

In [26]:
env = make_atari_env('Breakout-v0', n_envs = 4, seed = 0)
env = VecFrameStack(env, n_stack=4)

In [27]:
env.render()

In [48]:
log_path = os.path.join('Training', 'Logs')
model = A2C('CnnPolicy', env, verbose = 1, tensorboard_log = log_path)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [49]:
model.learn(total_timesteps = 2000000)

Logging to Training\Logs\A2C_2
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 277      |
|    ep_rew_mean        | 1.46     |
| time/                 |          |
|    fps                | 161      |
|    iterations         | 100      |
|    time_elapsed       | 12       |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | 0.0844   |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.207    |
|    value_loss         | 0.253    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 292      |
|    ep_rew_mean        | 1.78     |
| time/                 |          |
|    fps                | 186      |
|    iterations         | 200      |
|    time_elapsed       | 21       |
|    total_timesteps    | 4000     |
| train

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 358      |
|    ep_rew_mean        | 3.12     |
| time/                 |          |
|    fps                | 208      |
|    iterations         | 1400     |
|    time_elapsed       | 134      |
|    total_timesteps    | 28000    |
| train/                |          |
|    entropy_loss       | -1.02    |
|    explained_variance | 0.712    |
|    learning_rate      | 0.0007   |
|    n_updates          | 1399     |
|    policy_loss        | -0.315   |
|    value_loss         | 0.225    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 364      |
|    ep_rew_mean        | 3.33     |
| time/                 |          |
|    fps                | 209      |
|    iterations         | 1500     |
|    time_elapsed       | 143      |
|    total_timesteps    | 30000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 442      |
|    ep_rew_mean        | 5.09     |
| time/                 |          |
|    fps                | 214      |
|    iterations         | 2800     |
|    time_elapsed       | 261      |
|    total_timesteps    | 56000    |
| train/                |          |
|    entropy_loss       | -0.938   |
|    explained_variance | 0.937    |
|    learning_rate      | 0.0007   |
|    n_updates          | 2799     |
|    policy_loss        | 0.304    |
|    value_loss         | 0.158    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 441      |
|    ep_rew_mean        | 4.99     |
| time/                 |          |
|    fps                | 214      |
|    iterations         | 2900     |
|    time_elapsed       | 270      |
|    total_timesteps    | 58000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 440      |
|    ep_rew_mean        | 5.04     |
| time/                 |          |
|    fps                | 216      |
|    iterations         | 4200     |
|    time_elapsed       | 387      |
|    total_timesteps    | 84000    |
| train/                |          |
|    entropy_loss       | -0.969   |
|    explained_variance | 0.567    |
|    learning_rate      | 0.0007   |
|    n_updates          | 4199     |
|    policy_loss        | -0.0105  |
|    value_loss         | 0.0269   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 436      |
|    ep_rew_mean        | 5        |
| time/                 |          |
|    fps                | 216      |
|    iterations         | 4300     |
|    time_elapsed       | 396      |
|    total_timesteps    | 86000    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 509      |
|    ep_rew_mean        | 6.36     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 5600     |
|    time_elapsed       | 512      |
|    total_timesteps    | 112000   |
| train/                |          |
|    entropy_loss       | -0.22    |
|    explained_variance | 0.89     |
|    learning_rate      | 0.0007   |
|    n_updates          | 5599     |
|    policy_loss        | 0.00252  |
|    value_loss         | 0.0869   |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 509       |
|    ep_rew_mean        | 6.44      |
| time/                 |           |
|    fps                | 217       |
|    iterations         | 5700      |
|    time_elapsed       | 523       |
|    total_timesteps    | 114000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 520      |
|    ep_rew_mean        | 6.81     |
| time/                 |          |
|    fps                | 217      |
|    iterations         | 6900     |
|    time_elapsed       | 633      |
|    total_timesteps    | 138000   |
| train/                |          |
|    entropy_loss       | -0.388   |
|    explained_variance | 0.807    |
|    learning_rate      | 0.0007   |
|    n_updates          | 6899     |
|    policy_loss        | 0.0537   |
|    value_loss         | 0.0844   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 532      |
|    ep_rew_mean        | 6.9      |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 7000     |
|    time_elapsed       | 641      |
|    total_timesteps    | 140000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 576      |
|    ep_rew_mean        | 8.05     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 8300     |
|    time_elapsed       | 758      |
|    total_timesteps    | 166000   |
| train/                |          |
|    entropy_loss       | -0.239   |
|    explained_variance | 0.933    |
|    learning_rate      | 0.0007   |
|    n_updates          | 8299     |
|    policy_loss        | 0.0337   |
|    value_loss         | 0.0661   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 572      |
|    ep_rew_mean        | 7.94     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 8400     |
|    time_elapsed       | 768      |
|    total_timesteps    | 168000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 564      |
|    ep_rew_mean        | 7.73     |
| time/                 |          |
|    fps                | 219      |
|    iterations         | 9700     |
|    time_elapsed       | 885      |
|    total_timesteps    | 194000   |
| train/                |          |
|    entropy_loss       | -0.258   |
|    explained_variance | 0.643    |
|    learning_rate      | 0.0007   |
|    n_updates          | 9699     |
|    policy_loss        | -0.0455  |
|    value_loss         | 0.297    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 570      |
|    ep_rew_mean        | 7.92     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 9800     |
|    time_elapsed       | 895      |
|    total_timesteps    | 196000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 608      |
|    ep_rew_mean        | 8.51     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 11000    |
|    time_elapsed       | 1004     |
|    total_timesteps    | 220000   |
| train/                |          |
|    entropy_loss       | -0.131   |
|    explained_variance | 0.717    |
|    learning_rate      | 0.0007   |
|    n_updates          | 10999    |
|    policy_loss        | 0.0359   |
|    value_loss         | 0.165    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 595      |
|    ep_rew_mean        | 8.11     |
| time/                 |          |
|    fps                | 219      |
|    iterations         | 11100    |
|    time_elapsed       | 1013     |
|    total_timesteps    | 222000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 623      |
|    ep_rew_mean        | 8.78     |
| time/                 |          |
|    fps                | 219      |
|    iterations         | 12400    |
|    time_elapsed       | 1131     |
|    total_timesteps    | 248000   |
| train/                |          |
|    entropy_loss       | -0.267   |
|    explained_variance | 0.787    |
|    learning_rate      | 0.0007   |
|    n_updates          | 12399    |
|    policy_loss        | 0.133    |
|    value_loss         | 0.0868   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 630      |
|    ep_rew_mean        | 9.03     |
| time/                 |          |
|    fps                | 219      |
|    iterations         | 12500    |
|    time_elapsed       | 1140     |
|    total_timesteps    | 250000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 638      |
|    ep_rew_mean        | 9.2      |
| time/                 |          |
|    fps                | 219      |
|    iterations         | 13800    |
|    time_elapsed       | 1258     |
|    total_timesteps    | 276000   |
| train/                |          |
|    entropy_loss       | -0.222   |
|    explained_variance | 0.825    |
|    learning_rate      | 0.0007   |
|    n_updates          | 13799    |
|    policy_loss        | 0.0216   |
|    value_loss         | 0.286    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 643      |
|    ep_rew_mean        | 9.51     |
| time/                 |          |
|    fps                | 219      |
|    iterations         | 13900    |
|    time_elapsed       | 1268     |
|    total_timesteps    | 278000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 708      |
|    ep_rew_mean        | 11.1     |
| time/                 |          |
|    fps                | 219      |
|    iterations         | 15200    |
|    time_elapsed       | 1386     |
|    total_timesteps    | 304000   |
| train/                |          |
|    entropy_loss       | -0.11    |
|    explained_variance | 0.819    |
|    learning_rate      | 0.0007   |
|    n_updates          | 15199    |
|    policy_loss        | 0.0506   |
|    value_loss         | 0.17     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 708      |
|    ep_rew_mean        | 11.1     |
| time/                 |          |
|    fps                | 219      |
|    iterations         | 15300    |
|    time_elapsed       | 1395     |
|    total_timesteps    | 306000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 689      |
|    ep_rew_mean        | 10.3     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 16500    |
|    time_elapsed       | 1507     |
|    total_timesteps    | 330000   |
| train/                |          |
|    entropy_loss       | -0.15    |
|    explained_variance | 0.932    |
|    learning_rate      | 0.0007   |
|    n_updates          | 16499    |
|    policy_loss        | 0.033    |
|    value_loss         | 0.047    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 688      |
|    ep_rew_mean        | 10.4     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 16600    |
|    time_elapsed       | 1516     |
|    total_timesteps    | 332000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 724      |
|    ep_rew_mean        | 11.2     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 17900    |
|    time_elapsed       | 1635     |
|    total_timesteps    | 358000   |
| train/                |          |
|    entropy_loss       | -0.195   |
|    explained_variance | 0.873    |
|    learning_rate      | 0.0007   |
|    n_updates          | 17899    |
|    policy_loss        | 0.00572  |
|    value_loss         | 0.083    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 735      |
|    ep_rew_mean        | 11.5     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 18000    |
|    time_elapsed       | 1644     |
|    total_timesteps    | 360000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 742      |
|    ep_rew_mean        | 12.2     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 19200    |
|    time_elapsed       | 1753     |
|    total_timesteps    | 384000   |
| train/                |          |
|    entropy_loss       | -0.0941  |
|    explained_variance | 0.892    |
|    learning_rate      | 0.0007   |
|    n_updates          | 19199    |
|    policy_loss        | 0.132    |
|    value_loss         | 0.154    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 740      |
|    ep_rew_mean        | 12.1     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 19300    |
|    time_elapsed       | 1762     |
|    total_timesteps    | 386000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 811      |
|    ep_rew_mean        | 13.7     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 20500    |
|    time_elapsed       | 1876     |
|    total_timesteps    | 410000   |
| train/                |          |
|    entropy_loss       | -0.0953  |
|    explained_variance | 0.822    |
|    learning_rate      | 0.0007   |
|    n_updates          | 20499    |
|    policy_loss        | 0.012    |
|    value_loss         | 0.189    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 804      |
|    ep_rew_mean        | 13.5     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 20600    |
|    time_elapsed       | 1885     |
|    total_timesteps    | 412000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 724      |
|    ep_rew_mean        | 11.5     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 21900    |
|    time_elapsed       | 2002     |
|    total_timesteps    | 438000   |
| train/                |          |
|    entropy_loss       | -0.154   |
|    explained_variance | 0.429    |
|    learning_rate      | 0.0007   |
|    n_updates          | 21899    |
|    policy_loss        | -0.0613  |
|    value_loss         | 0.33     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 735      |
|    ep_rew_mean        | 11.7     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 22000    |
|    time_elapsed       | 2012     |
|    total_timesteps    | 440000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 736      |
|    ep_rew_mean        | 12       |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 23300    |
|    time_elapsed       | 2129     |
|    total_timesteps    | 466000   |
| train/                |          |
|    entropy_loss       | -0.124   |
|    explained_variance | 0.849    |
|    learning_rate      | 0.0007   |
|    n_updates          | 23299    |
|    policy_loss        | -0.0184  |
|    value_loss         | 0.0569   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 742      |
|    ep_rew_mean        | 12.2     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 23400    |
|    time_elapsed       | 2138     |
|    total_timesteps    | 468000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 821      |
|    ep_rew_mean        | 14       |
| time/                 |          |
|    fps                | 219      |
|    iterations         | 24600    |
|    time_elapsed       | 2246     |
|    total_timesteps    | 492000   |
| train/                |          |
|    entropy_loss       | -0.0943  |
|    explained_variance | 0.145    |
|    learning_rate      | 0.0007   |
|    n_updates          | 24599    |
|    policy_loss        | 0.0186   |
|    value_loss         | 0.762    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 812      |
|    ep_rew_mean        | 13.8     |
| time/                 |          |
|    fps                | 219      |
|    iterations         | 24700    |
|    time_elapsed       | 2255     |
|    total_timesteps    | 494000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 806      |
|    ep_rew_mean        | 13.6     |
| time/                 |          |
|    fps                | 219      |
|    iterations         | 26000    |
|    time_elapsed       | 2373     |
|    total_timesteps    | 520000   |
| train/                |          |
|    entropy_loss       | -0.167   |
|    explained_variance | 0.687    |
|    learning_rate      | 0.0007   |
|    n_updates          | 25999    |
|    policy_loss        | -0.043   |
|    value_loss         | 0.317    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 811      |
|    ep_rew_mean        | 13.6     |
| time/                 |          |
|    fps                | 219      |
|    iterations         | 26100    |
|    time_elapsed       | 2382     |
|    total_timesteps    | 522000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 804      |
|    ep_rew_mean        | 13.5     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 27400    |
|    time_elapsed       | 2503     |
|    total_timesteps    | 548000   |
| train/                |          |
|    entropy_loss       | -0.15    |
|    explained_variance | 0.395    |
|    learning_rate      | 0.0007   |
|    n_updates          | 27399    |
|    policy_loss        | -0.0597  |
|    value_loss         | 0.564    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 794      |
|    ep_rew_mean        | 13.3     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 27500    |
|    time_elapsed       | 2512     |
|    total_timesteps    | 550000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 847      |
|    ep_rew_mean        | 15       |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 28800    |
|    time_elapsed       | 2631     |
|    total_timesteps    | 576000   |
| train/                |          |
|    entropy_loss       | -0.183   |
|    explained_variance | 0.508    |
|    learning_rate      | 0.0007   |
|    n_updates          | 28799    |
|    policy_loss        | 0.0158   |
|    value_loss         | 0.155    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 845      |
|    ep_rew_mean        | 14.9     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 28900    |
|    time_elapsed       | 2640     |
|    total_timesteps    | 578000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 815      |
|    ep_rew_mean        | 14       |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 30200    |
|    time_elapsed       | 2759     |
|    total_timesteps    | 604000   |
| train/                |          |
|    entropy_loss       | -0.352   |
|    explained_variance | 0.828    |
|    learning_rate      | 0.0007   |
|    n_updates          | 30199    |
|    policy_loss        | -0.0659  |
|    value_loss         | 0.407    |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 823       |
|    ep_rew_mean        | 14.3      |
| time/                 |           |
|    fps                | 218       |
|    iterations         | 30300     |
|    time_elapsed       | 2769      |
|    total_timesteps    | 606000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 821      |
|    ep_rew_mean        | 14.5     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 31500    |
|    time_elapsed       | 2880     |
|    total_timesteps    | 630000   |
| train/                |          |
|    entropy_loss       | -0.0531  |
|    explained_variance | 0.806    |
|    learning_rate      | 0.0007   |
|    n_updates          | 31499    |
|    policy_loss        | 0.00498  |
|    value_loss         | 0.0562   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 819      |
|    ep_rew_mean        | 14.4     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 31600    |
|    time_elapsed       | 2889     |
|    total_timesteps    | 632000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 856      |
|    ep_rew_mean        | 15.5     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 32800    |
|    time_elapsed       | 2999     |
|    total_timesteps    | 656000   |
| train/                |          |
|    entropy_loss       | -0.0899  |
|    explained_variance | -2.16    |
|    learning_rate      | 0.0007   |
|    n_updates          | 32799    |
|    policy_loss        | 0.0661   |
|    value_loss         | 0.234    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 872      |
|    ep_rew_mean        | 15.9     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 32900    |
|    time_elapsed       | 3008     |
|    total_timesteps    | 658000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 857      |
|    ep_rew_mean        | 15.4     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 34200    |
|    time_elapsed       | 3127     |
|    total_timesteps    | 684000   |
| train/                |          |
|    entropy_loss       | -0.0726  |
|    explained_variance | 0.809    |
|    learning_rate      | 0.0007   |
|    n_updates          | 34199    |
|    policy_loss        | -0.00992 |
|    value_loss         | 0.236    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 851      |
|    ep_rew_mean        | 15.1     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 34300    |
|    time_elapsed       | 3136     |
|    total_timesteps    | 686000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 898      |
|    ep_rew_mean        | 16.7     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 35500    |
|    time_elapsed       | 3248     |
|    total_timesteps    | 710000   |
| train/                |          |
|    entropy_loss       | -0.124   |
|    explained_variance | 0.615    |
|    learning_rate      | 0.0007   |
|    n_updates          | 35499    |
|    policy_loss        | -0.0188  |
|    value_loss         | 0.3      |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 885      |
|    ep_rew_mean        | 16.4     |
| time/                 |          |
|    fps                | 218      |
|    iterations         | 35600    |
|    time_elapsed       | 3258     |
|    total_timesteps    | 712000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 863      |
|    ep_rew_mean        | 15.2     |
| time/                 |          |
|    fps                | 217      |
|    iterations         | 36900    |
|    time_elapsed       | 3390     |
|    total_timesteps    | 738000   |
| train/                |          |
|    entropy_loss       | -0.0512  |
|    explained_variance | 0.588    |
|    learning_rate      | 0.0007   |
|    n_updates          | 36899    |
|    policy_loss        | -0.00439 |
|    value_loss         | 0.593    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 866      |
|    ep_rew_mean        | 15.3     |
| time/                 |          |
|    fps                | 217      |
|    iterations         | 37000    |
|    time_elapsed       | 3400     |
|    total_timesteps    | 740000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 803      |
|    ep_rew_mean        | 13.7     |
| time/                 |          |
|    fps                | 216      |
|    iterations         | 38300    |
|    time_elapsed       | 3535     |
|    total_timesteps    | 766000   |
| train/                |          |
|    entropy_loss       | -0.14    |
|    explained_variance | 0.767    |
|    learning_rate      | 0.0007   |
|    n_updates          | 38299    |
|    policy_loss        | -0.0289  |
|    value_loss         | 0.313    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 801      |
|    ep_rew_mean        | 13.5     |
| time/                 |          |
|    fps                | 216      |
|    iterations         | 38400    |
|    time_elapsed       | 3546     |
|    total_timesteps    | 768000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 830      |
|    ep_rew_mean        | 15.4     |
| time/                 |          |
|    fps                | 215      |
|    iterations         | 39700    |
|    time_elapsed       | 3683     |
|    total_timesteps    | 794000   |
| train/                |          |
|    entropy_loss       | -0.264   |
|    explained_variance | 0.231    |
|    learning_rate      | 0.0007   |
|    n_updates          | 39699    |
|    policy_loss        | -0.0306  |
|    value_loss         | 0.0744   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 826      |
|    ep_rew_mean        | 15.2     |
| time/                 |          |
|    fps                | 215      |
|    iterations         | 39800    |
|    time_elapsed       | 3693     |
|    total_timesteps    | 796000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 885      |
|    ep_rew_mean        | 16       |
| time/                 |          |
|    fps                | 214      |
|    iterations         | 41000    |
|    time_elapsed       | 3819     |
|    total_timesteps    | 820000   |
| train/                |          |
|    entropy_loss       | -0.038   |
|    explained_variance | -3.88    |
|    learning_rate      | 0.0007   |
|    n_updates          | 40999    |
|    policy_loss        | 0.00172  |
|    value_loss         | 0.846    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 884      |
|    ep_rew_mean        | 15.9     |
| time/                 |          |
|    fps                | 214      |
|    iterations         | 41100    |
|    time_elapsed       | 3830     |
|    total_timesteps    | 822000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 867      |
|    ep_rew_mean        | 15.5     |
| time/                 |          |
|    fps                | 213      |
|    iterations         | 42400    |
|    time_elapsed       | 3967     |
|    total_timesteps    | 848000   |
| train/                |          |
|    entropy_loss       | -0.148   |
|    explained_variance | -4.19    |
|    learning_rate      | 0.0007   |
|    n_updates          | 42399    |
|    policy_loss        | 0.00928  |
|    value_loss         | 0.209    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 868      |
|    ep_rew_mean        | 15.5     |
| time/                 |          |
|    fps                | 213      |
|    iterations         | 42500    |
|    time_elapsed       | 3978     |
|    total_timesteps    | 850000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 882      |
|    ep_rew_mean        | 16.6     |
| time/                 |          |
|    fps                | 212      |
|    iterations         | 43800    |
|    time_elapsed       | 4116     |
|    total_timesteps    | 876000   |
| train/                |          |
|    entropy_loss       | -0.171   |
|    explained_variance | 0.732    |
|    learning_rate      | 0.0007   |
|    n_updates          | 43799    |
|    policy_loss        | -0.0162  |
|    value_loss         | 0.317    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 893      |
|    ep_rew_mean        | 17       |
| time/                 |          |
|    fps                | 212      |
|    iterations         | 43900    |
|    time_elapsed       | 4127     |
|    total_timesteps    | 878000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 845      |
|    ep_rew_mean        | 15       |
| time/                 |          |
|    fps                | 211      |
|    iterations         | 45200    |
|    time_elapsed       | 4267     |
|    total_timesteps    | 904000   |
| train/                |          |
|    entropy_loss       | -0.193   |
|    explained_variance | -0.0417  |
|    learning_rate      | 0.0007   |
|    n_updates          | 45199    |
|    policy_loss        | 0.00582  |
|    value_loss         | 0.0832   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 848      |
|    ep_rew_mean        | 15.2     |
| time/                 |          |
|    fps                | 211      |
|    iterations         | 45300    |
|    time_elapsed       | 4278     |
|    total_timesteps    | 906000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 912      |
|    ep_rew_mean        | 17.8     |
| time/                 |          |
|    fps                | 211      |
|    iterations         | 46500    |
|    time_elapsed       | 4406     |
|    total_timesteps    | 930000   |
| train/                |          |
|    entropy_loss       | -0.224   |
|    explained_variance | 0.531    |
|    learning_rate      | 0.0007   |
|    n_updates          | 46499    |
|    policy_loss        | 0.0376   |
|    value_loss         | 0.0658   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 894      |
|    ep_rew_mean        | 17.2     |
| time/                 |          |
|    fps                | 211      |
|    iterations         | 46600    |
|    time_elapsed       | 4416     |
|    total_timesteps    | 932000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 760      |
|    ep_rew_mean        | 13.4     |
| time/                 |          |
|    fps                | 210      |
|    iterations         | 47800    |
|    time_elapsed       | 4545     |
|    total_timesteps    | 956000   |
| train/                |          |
|    entropy_loss       | -0.0861  |
|    explained_variance | 0.378    |
|    learning_rate      | 0.0007   |
|    n_updates          | 47799    |
|    policy_loss        | -0.00755 |
|    value_loss         | 0.337    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 757      |
|    ep_rew_mean        | 13       |
| time/                 |          |
|    fps                | 210      |
|    iterations         | 47900    |
|    time_elapsed       | 4555     |
|    total_timesteps    | 958000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 847      |
|    ep_rew_mean        | 14.9     |
| time/                 |          |
|    fps                | 209      |
|    iterations         | 49200    |
|    time_elapsed       | 4694     |
|    total_timesteps    | 984000   |
| train/                |          |
|    entropy_loss       | -0.209   |
|    explained_variance | 0.335    |
|    learning_rate      | 0.0007   |
|    n_updates          | 49199    |
|    policy_loss        | 0.0244   |
|    value_loss         | 0.0885   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 828      |
|    ep_rew_mean        | 14.5     |
| time/                 |          |
|    fps                | 209      |
|    iterations         | 49300    |
|    time_elapsed       | 4705     |
|    total_timesteps    | 986000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 883      |
|    ep_rew_mean        | 15.8     |
| time/                 |          |
|    fps                | 208      |
|    iterations         | 50500    |
|    time_elapsed       | 4833     |
|    total_timesteps    | 1010000  |
| train/                |          |
|    entropy_loss       | -0.208   |
|    explained_variance | 0.834    |
|    learning_rate      | 0.0007   |
|    n_updates          | 50499    |
|    policy_loss        | 0.0366   |
|    value_loss         | 0.0578   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 873      |
|    ep_rew_mean        | 15.2     |
| time/                 |          |
|    fps                | 208      |
|    iterations         | 50600    |
|    time_elapsed       | 4843     |
|    total_timesteps    | 1012000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 854      |
|    ep_rew_mean        | 15.2     |
| time/                 |          |
|    fps                | 208      |
|    iterations         | 51900    |
|    time_elapsed       | 4983     |
|    total_timesteps    | 1038000  |
| train/                |          |
|    entropy_loss       | -0.303   |
|    explained_variance | 0.896    |
|    learning_rate      | 0.0007   |
|    n_updates          | 51899    |
|    policy_loss        | -0.265   |
|    value_loss         | 0.156    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 851      |
|    ep_rew_mean        | 15.3     |
| time/                 |          |
|    fps                | 208      |
|    iterations         | 52000    |
|    time_elapsed       | 4994     |
|    total_timesteps    | 1040000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 766      |
|    ep_rew_mean        | 13.6     |
| time/                 |          |
|    fps                | 207      |
|    iterations         | 53200    |
|    time_elapsed       | 5125     |
|    total_timesteps    | 1064000  |
| train/                |          |
|    entropy_loss       | -0.187   |
|    explained_variance | 0.824    |
|    learning_rate      | 0.0007   |
|    n_updates          | 53199    |
|    policy_loss        | -0.00979 |
|    value_loss         | 0.333    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 762      |
|    ep_rew_mean        | 13.5     |
| time/                 |          |
|    fps                | 207      |
|    iterations         | 53300    |
|    time_elapsed       | 5136     |
|    total_timesteps    | 1066000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 807      |
|    ep_rew_mean        | 14.4     |
| time/                 |          |
|    fps                | 206      |
|    iterations         | 54600    |
|    time_elapsed       | 5276     |
|    total_timesteps    | 1092000  |
| train/                |          |
|    entropy_loss       | -0.175   |
|    explained_variance | 0.774    |
|    learning_rate      | 0.0007   |
|    n_updates          | 54599    |
|    policy_loss        | 0.00156  |
|    value_loss         | 0.196    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 791      |
|    ep_rew_mean        | 13.9     |
| time/                 |          |
|    fps                | 206      |
|    iterations         | 54700    |
|    time_elapsed       | 5287     |
|    total_timesteps    | 1094000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 889      |
|    ep_rew_mean        | 16.3     |
| time/                 |          |
|    fps                | 206      |
|    iterations         | 56000    |
|    time_elapsed       | 5428     |
|    total_timesteps    | 1120000  |
| train/                |          |
|    entropy_loss       | -0.0962  |
|    explained_variance | 0.891    |
|    learning_rate      | 0.0007   |
|    n_updates          | 55999    |
|    policy_loss        | -0.0237  |
|    value_loss         | 0.202    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 887      |
|    ep_rew_mean        | 16.3     |
| time/                 |          |
|    fps                | 206      |
|    iterations         | 56100    |
|    time_elapsed       | 5439     |
|    total_timesteps    | 1122000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 919      |
|    ep_rew_mean        | 18       |
| time/                 |          |
|    fps                | 205      |
|    iterations         | 57400    |
|    time_elapsed       | 5579     |
|    total_timesteps    | 1148000  |
| train/                |          |
|    entropy_loss       | -0.0412  |
|    explained_variance | 0.464    |
|    learning_rate      | 0.0007   |
|    n_updates          | 57399    |
|    policy_loss        | -0.00393 |
|    value_loss         | 0.13     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 920      |
|    ep_rew_mean        | 17.7     |
| time/                 |          |
|    fps                | 205      |
|    iterations         | 57500    |
|    time_elapsed       | 5591     |
|    total_timesteps    | 1150000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 859      |
|    ep_rew_mean        | 16.6     |
| time/                 |          |
|    fps                | 205      |
|    iterations         | 58800    |
|    time_elapsed       | 5731     |
|    total_timesteps    | 1176000  |
| train/                |          |
|    entropy_loss       | -0.174   |
|    explained_variance | 0.864    |
|    learning_rate      | 0.0007   |
|    n_updates          | 58799    |
|    policy_loss        | -0.054   |
|    value_loss         | 0.221    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 867      |
|    ep_rew_mean        | 16.9     |
| time/                 |          |
|    fps                | 205      |
|    iterations         | 58900    |
|    time_elapsed       | 5741     |
|    total_timesteps    | 1178000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 874      |
|    ep_rew_mean        | 16.2     |
| time/                 |          |
|    fps                | 204      |
|    iterations         | 60200    |
|    time_elapsed       | 5882     |
|    total_timesteps    | 1204000  |
| train/                |          |
|    entropy_loss       | -0.126   |
|    explained_variance | 0.764    |
|    learning_rate      | 0.0007   |
|    n_updates          | 60199    |
|    policy_loss        | 0.0218   |
|    value_loss         | 0.0485   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 866      |
|    ep_rew_mean        | 16       |
| time/                 |          |
|    fps                | 204      |
|    iterations         | 60300    |
|    time_elapsed       | 5893     |
|    total_timesteps    | 1206000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 908      |
|    ep_rew_mean        | 17.3     |
| time/                 |          |
|    fps                | 204      |
|    iterations         | 61500    |
|    time_elapsed       | 6024     |
|    total_timesteps    | 1230000  |
| train/                |          |
|    entropy_loss       | -0.104   |
|    explained_variance | -4.65    |
|    learning_rate      | 0.0007   |
|    n_updates          | 61499    |
|    policy_loss        | 0.00763  |
|    value_loss         | 0.212    |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 920       |
|    ep_rew_mean        | 17.6      |
| time/                 |           |
|    fps                | 204       |
|    iterations         | 61600     |
|    time_elapsed       | 6036      |
|    total_timesteps    | 1232000   |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 921      |
|    ep_rew_mean        | 17.6     |
| time/                 |          |
|    fps                | 203      |
|    iterations         | 62800    |
|    time_elapsed       | 6166     |
|    total_timesteps    | 1256000  |
| train/                |          |
|    entropy_loss       | -0.14    |
|    explained_variance | 0.88     |
|    learning_rate      | 0.0007   |
|    n_updates          | 62799    |
|    policy_loss        | -0.0513  |
|    value_loss         | 0.191    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 910      |
|    ep_rew_mean        | 17.4     |
| time/                 |          |
|    fps                | 203      |
|    iterations         | 62900    |
|    time_elapsed       | 6177     |
|    total_timesteps    | 1258000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 896      |
|    ep_rew_mean        | 16.6     |
| time/                 |          |
|    fps                | 203      |
|    iterations         | 64200    |
|    time_elapsed       | 6319     |
|    total_timesteps    | 1284000  |
| train/                |          |
|    entropy_loss       | -0.197   |
|    explained_variance | 0.572    |
|    learning_rate      | 0.0007   |
|    n_updates          | 64199    |
|    policy_loss        | -0.00879 |
|    value_loss         | 0.133    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 913      |
|    ep_rew_mean        | 17.4     |
| time/                 |          |
|    fps                | 203      |
|    iterations         | 64300    |
|    time_elapsed       | 6330     |
|    total_timesteps    | 1286000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 950      |
|    ep_rew_mean        | 17.9     |
| time/                 |          |
|    fps                | 202      |
|    iterations         | 65600    |
|    time_elapsed       | 6471     |
|    total_timesteps    | 1312000  |
| train/                |          |
|    entropy_loss       | -0.0294  |
|    explained_variance | 0.907    |
|    learning_rate      | 0.0007   |
|    n_updates          | 65599    |
|    policy_loss        | 0.000917 |
|    value_loss         | 0.0482   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 940      |
|    ep_rew_mean        | 17.8     |
| time/                 |          |
|    fps                | 202      |
|    iterations         | 65700    |
|    time_elapsed       | 6482     |
|    total_timesteps    | 1314000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 868      |
|    ep_rew_mean        | 16.1     |
| time/                 |          |
|    fps                | 202      |
|    iterations         | 67000    |
|    time_elapsed       | 6624     |
|    total_timesteps    | 1340000  |
| train/                |          |
|    entropy_loss       | -0.0942  |
|    explained_variance | 0.743    |
|    learning_rate      | 0.0007   |
|    n_updates          | 66999    |
|    policy_loss        | 0.0313   |
|    value_loss         | 0.263    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 868      |
|    ep_rew_mean        | 16       |
| time/                 |          |
|    fps                | 202      |
|    iterations         | 67100    |
|    time_elapsed       | 6635     |
|    total_timesteps    | 1342000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 838      |
|    ep_rew_mean        | 15       |
| time/                 |          |
|    fps                | 201      |
|    iterations         | 68400    |
|    time_elapsed       | 6777     |
|    total_timesteps    | 1368000  |
| train/                |          |
|    entropy_loss       | -0.153   |
|    explained_variance | 0.499    |
|    learning_rate      | 0.0007   |
|    n_updates          | 68399    |
|    policy_loss        | 0.00126  |
|    value_loss         | 0.624    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 841      |
|    ep_rew_mean        | 15.2     |
| time/                 |          |
|    fps                | 201      |
|    iterations         | 68500    |
|    time_elapsed       | 6789     |
|    total_timesteps    | 1370000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 922      |
|    ep_rew_mean        | 17.8     |
| time/                 |          |
|    fps                | 201      |
|    iterations         | 69700    |
|    time_elapsed       | 6919     |
|    total_timesteps    | 1394000  |
| train/                |          |
|    entropy_loss       | -0.00469 |
|    explained_variance | 0.781    |
|    learning_rate      | 0.0007   |
|    n_updates          | 69699    |
|    policy_loss        | 0.000315 |
|    value_loss         | 0.268    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 943      |
|    ep_rew_mean        | 18.4     |
| time/                 |          |
|    fps                | 201      |
|    iterations         | 69800    |
|    time_elapsed       | 6930     |
|    total_timesteps    | 1396000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 917      |
|    ep_rew_mean        | 17.1     |
| time/                 |          |
|    fps                | 201      |
|    iterations         | 71100    |
|    time_elapsed       | 7071     |
|    total_timesteps    | 1422000  |
| train/                |          |
|    entropy_loss       | -0.028   |
|    explained_variance | -3.72    |
|    learning_rate      | 0.0007   |
|    n_updates          | 71099    |
|    policy_loss        | 0.00288  |
|    value_loss         | 0.236    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 925      |
|    ep_rew_mean        | 17.4     |
| time/                 |          |
|    fps                | 201      |
|    iterations         | 71200    |
|    time_elapsed       | 7082     |
|    total_timesteps    | 1424000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 890      |
|    ep_rew_mean        | 16.2     |
| time/                 |          |
|    fps                | 200      |
|    iterations         | 72400    |
|    time_elapsed       | 7212     |
|    total_timesteps    | 1448000  |
| train/                |          |
|    entropy_loss       | -0.0494  |
|    explained_variance | 0.757    |
|    learning_rate      | 0.0007   |
|    n_updates          | 72399    |
|    policy_loss        | 0.00122  |
|    value_loss         | 0.0822   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 887      |
|    ep_rew_mean        | 16.4     |
| time/                 |          |
|    fps                | 200      |
|    iterations         | 72500    |
|    time_elapsed       | 7223     |
|    total_timesteps    | 1450000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 919      |
|    ep_rew_mean        | 17.7     |
| time/                 |          |
|    fps                | 200      |
|    iterations         | 73700    |
|    time_elapsed       | 7354     |
|    total_timesteps    | 1474000  |
| train/                |          |
|    entropy_loss       | -0.223   |
|    explained_variance | 0.565    |
|    learning_rate      | 0.0007   |
|    n_updates          | 73699    |
|    policy_loss        | -0.093   |
|    value_loss         | 0.833    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 916      |
|    ep_rew_mean        | 17.7     |
| time/                 |          |
|    fps                | 200      |
|    iterations         | 73800    |
|    time_elapsed       | 7364     |
|    total_timesteps    | 1476000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 929      |
|    ep_rew_mean        | 17.6     |
| time/                 |          |
|    fps                | 200      |
|    iterations         | 75000    |
|    time_elapsed       | 7496     |
|    total_timesteps    | 1500000  |
| train/                |          |
|    entropy_loss       | -0.153   |
|    explained_variance | 0.666    |
|    learning_rate      | 0.0007   |
|    n_updates          | 74999    |
|    policy_loss        | 0.0442   |
|    value_loss         | 0.144    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 942      |
|    ep_rew_mean        | 18.1     |
| time/                 |          |
|    fps                | 200      |
|    iterations         | 75100    |
|    time_elapsed       | 7507     |
|    total_timesteps    | 1502000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 925      |
|    ep_rew_mean        | 17.4     |
| time/                 |          |
|    fps                | 199      |
|    iterations         | 76400    |
|    time_elapsed       | 7650     |
|    total_timesteps    | 1528000  |
| train/                |          |
|    entropy_loss       | -0.0699  |
|    explained_variance | 0.463    |
|    learning_rate      | 0.0007   |
|    n_updates          | 76399    |
|    policy_loss        | 0.0446   |
|    value_loss         | 0.0995   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 919      |
|    ep_rew_mean        | 17.2     |
| time/                 |          |
|    fps                | 199      |
|    iterations         | 76500    |
|    time_elapsed       | 7661     |
|    total_timesteps    | 1530000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 943      |
|    ep_rew_mean        | 18.2     |
| time/                 |          |
|    fps                | 199      |
|    iterations         | 77700    |
|    time_elapsed       | 7793     |
|    total_timesteps    | 1554000  |
| train/                |          |
|    entropy_loss       | -0.0873  |
|    explained_variance | 0.578    |
|    learning_rate      | 0.0007   |
|    n_updates          | 77699    |
|    policy_loss        | -0.0195  |
|    value_loss         | 1.73     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 952      |
|    ep_rew_mean        | 18.4     |
| time/                 |          |
|    fps                | 199      |
|    iterations         | 77800    |
|    time_elapsed       | 7804     |
|    total_timesteps    | 1556000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 924      |
|    ep_rew_mean        | 17.6     |
| time/                 |          |
|    fps                | 199      |
|    iterations         | 79000    |
|    time_elapsed       | 7937     |
|    total_timesteps    | 1580000  |
| train/                |          |
|    entropy_loss       | -0.0437  |
|    explained_variance | -0.0107  |
|    learning_rate      | 0.0007   |
|    n_updates          | 78999    |
|    policy_loss        | 0.00268  |
|    value_loss         | 0.299    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 962      |
|    ep_rew_mean        | 18.6     |
| time/                 |          |
|    fps                | 199      |
|    iterations         | 79100    |
|    time_elapsed       | 7948     |
|    total_timesteps    | 1582000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 967      |
|    ep_rew_mean        | 18.9     |
| time/                 |          |
|    fps                | 198      |
|    iterations         | 80300    |
|    time_elapsed       | 8080     |
|    total_timesteps    | 1606000  |
| train/                |          |
|    entropy_loss       | -0.053   |
|    explained_variance | 0.888    |
|    learning_rate      | 0.0007   |
|    n_updates          | 80299    |
|    policy_loss        | -0.00172 |
|    value_loss         | 0.0216   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 968      |
|    ep_rew_mean        | 19       |
| time/                 |          |
|    fps                | 198      |
|    iterations         | 80400    |
|    time_elapsed       | 8091     |
|    total_timesteps    | 1608000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 957      |
|    ep_rew_mean        | 19.2     |
| time/                 |          |
|    fps                | 198      |
|    iterations         | 81600    |
|    time_elapsed       | 8223     |
|    total_timesteps    | 1632000  |
| train/                |          |
|    entropy_loss       | -0.0903  |
|    explained_variance | 0.875    |
|    learning_rate      | 0.0007   |
|    n_updates          | 81599    |
|    policy_loss        | 0.00257  |
|    value_loss         | 0.0437   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 960      |
|    ep_rew_mean        | 19.4     |
| time/                 |          |
|    fps                | 198      |
|    iterations         | 81700    |
|    time_elapsed       | 8234     |
|    total_timesteps    | 1634000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 912      |
|    ep_rew_mean        | 17.4     |
| time/                 |          |
|    fps                | 198      |
|    iterations         | 83000    |
|    time_elapsed       | 8376     |
|    total_timesteps    | 1660000  |
| train/                |          |
|    entropy_loss       | -0.123   |
|    explained_variance | 0.322    |
|    learning_rate      | 0.0007   |
|    n_updates          | 82999    |
|    policy_loss        | -0.0456  |
|    value_loss         | 1.18     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 912      |
|    ep_rew_mean        | 17.4     |
| time/                 |          |
|    fps                | 198      |
|    iterations         | 83100    |
|    time_elapsed       | 8387     |
|    total_timesteps    | 1662000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 924      |
|    ep_rew_mean        | 17.8     |
| time/                 |          |
|    fps                | 197      |
|    iterations         | 84300    |
|    time_elapsed       | 8518     |
|    total_timesteps    | 1686000  |
| train/                |          |
|    entropy_loss       | -0.0481  |
|    explained_variance | -0.291   |
|    learning_rate      | 0.0007   |
|    n_updates          | 84299    |
|    policy_loss        | -0.0219  |
|    value_loss         | 0.121    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 923      |
|    ep_rew_mean        | 17.6     |
| time/                 |          |
|    fps                | 197      |
|    iterations         | 84400    |
|    time_elapsed       | 8529     |
|    total_timesteps    | 1688000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 937      |
|    ep_rew_mean        | 17.7     |
| time/                 |          |
|    fps                | 197      |
|    iterations         | 85600    |
|    time_elapsed       | 8662     |
|    total_timesteps    | 1712000  |
| train/                |          |
|    entropy_loss       | -0.11    |
|    explained_variance | 0.727    |
|    learning_rate      | 0.0007   |
|    n_updates          | 85599    |
|    policy_loss        | 0.0382   |
|    value_loss         | 0.0809   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 938      |
|    ep_rew_mean        | 17.9     |
| time/                 |          |
|    fps                | 197      |
|    iterations         | 85700    |
|    time_elapsed       | 8673     |
|    total_timesteps    | 1714000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 920      |
|    ep_rew_mean        | 17.6     |
| time/                 |          |
|    fps                | 197      |
|    iterations         | 87000    |
|    time_elapsed       | 8818     |
|    total_timesteps    | 1740000  |
| train/                |          |
|    entropy_loss       | -0.0298  |
|    explained_variance | 0.644    |
|    learning_rate      | 0.0007   |
|    n_updates          | 86999    |
|    policy_loss        | 0.00227  |
|    value_loss         | 0.629    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 905      |
|    ep_rew_mean        | 17.5     |
| time/                 |          |
|    fps                | 197      |
|    iterations         | 87100    |
|    time_elapsed       | 8830     |
|    total_timesteps    | 1742000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 937      |
|    ep_rew_mean        | 18.2     |
| time/                 |          |
|    fps                | 197      |
|    iterations         | 88400    |
|    time_elapsed       | 8973     |
|    total_timesteps    | 1768000  |
| train/                |          |
|    entropy_loss       | -0.03    |
|    explained_variance | 0.769    |
|    learning_rate      | 0.0007   |
|    n_updates          | 88399    |
|    policy_loss        | -0.00171 |
|    value_loss         | 0.155    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 938      |
|    ep_rew_mean        | 18.2     |
| time/                 |          |
|    fps                | 197      |
|    iterations         | 88500    |
|    time_elapsed       | 8984     |
|    total_timesteps    | 1770000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 899      |
|    ep_rew_mean        | 17.1     |
| time/                 |          |
|    fps                | 196      |
|    iterations         | 89800    |
|    time_elapsed       | 9128     |
|    total_timesteps    | 1796000  |
| train/                |          |
|    entropy_loss       | -0.0389  |
|    explained_variance | 0.738    |
|    learning_rate      | 0.0007   |
|    n_updates          | 89799    |
|    policy_loss        | 0.00322  |
|    value_loss         | 0.101    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 906      |
|    ep_rew_mean        | 17.1     |
| time/                 |          |
|    fps                | 196      |
|    iterations         | 89900    |
|    time_elapsed       | 9139     |
|    total_timesteps    | 1798000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 938      |
|    ep_rew_mean        | 17.6     |
| time/                 |          |
|    fps                | 196      |
|    iterations         | 91200    |
|    time_elapsed       | 9282     |
|    total_timesteps    | 1824000  |
| train/                |          |
|    entropy_loss       | -0.0634  |
|    explained_variance | 0.92     |
|    learning_rate      | 0.0007   |
|    n_updates          | 91199    |
|    policy_loss        | -0.0449  |
|    value_loss         | 0.0301   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 949      |
|    ep_rew_mean        | 17.8     |
| time/                 |          |
|    fps                | 196      |
|    iterations         | 91300    |
|    time_elapsed       | 9293     |
|    total_timesteps    | 1826000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 990      |
|    ep_rew_mean        | 19.7     |
| time/                 |          |
|    fps                | 196      |
|    iterations         | 92600    |
|    time_elapsed       | 9437     |
|    total_timesteps    | 1852000  |
| train/                |          |
|    entropy_loss       | -0.0963  |
|    explained_variance | 0.544    |
|    learning_rate      | 0.0007   |
|    n_updates          | 92599    |
|    policy_loss        | -0.00389 |
|    value_loss         | 0.0898   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1e+03    |
|    ep_rew_mean        | 20.5     |
| time/                 |          |
|    fps                | 196      |
|    iterations         | 92700    |
|    time_elapsed       | 9448     |
|    total_timesteps    | 1854000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 968      |
|    ep_rew_mean        | 19.2     |
| time/                 |          |
|    fps                | 196      |
|    iterations         | 94000    |
|    time_elapsed       | 9590     |
|    total_timesteps    | 1880000  |
| train/                |          |
|    entropy_loss       | -0.00563 |
|    explained_variance | -0.277   |
|    learning_rate      | 0.0007   |
|    n_updates          | 93999    |
|    policy_loss        | 0.00151  |
|    value_loss         | 0.237    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 970      |
|    ep_rew_mean        | 19.5     |
| time/                 |          |
|    fps                | 196      |
|    iterations         | 94100    |
|    time_elapsed       | 9601     |
|    total_timesteps    | 1882000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 923      |
|    ep_rew_mean        | 17.7     |
| time/                 |          |
|    fps                | 195      |
|    iterations         | 95300    |
|    time_elapsed       | 9734     |
|    total_timesteps    | 1906000  |
| train/                |          |
|    entropy_loss       | -0.155   |
|    explained_variance | 0.378    |
|    learning_rate      | 0.0007   |
|    n_updates          | 95299    |
|    policy_loss        | -0.0281  |
|    value_loss         | 0.0454   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 920      |
|    ep_rew_mean        | 17.9     |
| time/                 |          |
|    fps                | 195      |
|    iterations         | 95400    |
|    time_elapsed       | 9745     |
|    total_timesteps    | 1908000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 948      |
|    ep_rew_mean        | 19.1     |
| time/                 |          |
|    fps                | 195      |
|    iterations         | 96700    |
|    time_elapsed       | 9888     |
|    total_timesteps    | 1934000  |
| train/                |          |
|    entropy_loss       | -0.0689  |
|    explained_variance | 0.56     |
|    learning_rate      | 0.0007   |
|    n_updates          | 96699    |
|    policy_loss        | 0.00716  |
|    value_loss         | 0.0529   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 962      |
|    ep_rew_mean        | 19.4     |
| time/                 |          |
|    fps                | 195      |
|    iterations         | 96800    |
|    time_elapsed       | 9899     |
|    total_timesteps    | 1936000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 987      |
|    ep_rew_mean        | 19.6     |
| time/                 |          |
|    fps                | 195      |
|    iterations         | 98000    |
|    time_elapsed       | 10032    |
|    total_timesteps    | 1960000  |
| train/                |          |
|    entropy_loss       | -0.101   |
|    explained_variance | 0.643    |
|    learning_rate      | 0.0007   |
|    n_updates          | 97999    |
|    policy_loss        | -0.0356  |
|    value_loss         | 1        |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 984      |
|    ep_rew_mean        | 19.6     |
| time/                 |          |
|    fps                | 195      |
|    iterations         | 98100    |
|    time_elapsed       | 10043    |
|    total_timesteps    | 1962000  |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 981      |
|    ep_rew_mean        | 19       |
| time/                 |          |
|    fps                | 195      |
|    iterations         | 99300    |
|    time_elapsed       | 10176    |
|    total_timesteps    | 1986000  |
| train/                |          |
|    entropy_loss       | -0.132   |
|    explained_variance | 0.742    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99299    |
|    policy_loss        | -0.0305  |
|    value_loss         | 0.408    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 988      |
|    ep_rew_mean        | 19.2     |
| time/                 |          |
|    fps                | 195      |
|    iterations         | 99400    |
|    time_elapsed       | 10187    |
|    total_timesteps    | 1988000  |
| train/                |          |
|

<stable_baselines3.a2c.a2c.A2C at 0x2bc4e598760>

## 4. Save and Reload Model

First thing to do: Save the new 2M Model

In [100]:
a2c_path = os.path.join('Training', 'Saved Models', 'A2C_2M_Model')
model.save(a2c_path)

In [51]:
del model

In [21]:
a2c_path = os.path.join('Training', 'Saved Models', 'A2C_2M_Model')

In [22]:
model = A2C.load(a2c_path, env)

Wrapping the env in a VecTransposeImage.


## 5. Evaluate and Test

In [20]:
env = make_atari_env('Breakout-v0', n_envs = 2, seed = 0)
env = VecFrameStack(env, n_stack = 4)

In [23]:
env.render()

In [24]:
evaluate_policy(model, env, n_eval_episodes=10, render = True)

(21.1, 8.619164692706596)

In [41]:
env.close()

### Tensorboard Video Logs

In [27]:
class VideoRecorderCallback(BaseCallback):
    def __init__(self, eval_env: gym.Env, render_freq: int, n_eval_episodes: int = 1, deterministic: bool = True):
        """
        Records a video of an agent's trajectory traversing ``eval_env`` and logs it to TensorBoard

        :param eval_env: A gym environment from which the trajectory is recorded
        :param render_freq: Render the agent's trajectory every eval_freq call of the callback.
        :param n_eval_episodes: Number of episodes to render
        :param deterministic: Whether to use deterministic or stochastic policy
        """
        super().__init__()
        self._eval_env = eval_env
        self._render_freq = render_freq
        self._n_eval_episodes = n_eval_episodes
        self._deterministic = deterministic

    def _on_step(self) -> bool:
        if self.n_calls % self._render_freq == 0:
            screens = []

            def grab_screens(_locals: Dict[str, Any], _globals: Dict[str, Any]) -> None:
                """
                Renders the environment in its current state, recording the screen in the captured `screens` list

                :param _locals: A dictionary containing all local variables of the callback's scope
                :param _globals: A dictionary containing all global variables of the callback's scope
                """
                screen = self._eval_env.render(mode="rgb_array")
                # PyTorch uses CxHxW vs HxWxC gym (and tensorflow) image convention
                screens.append(screen.transpose(2, 0, 1))

            evaluate_policy(
                self.model,
                self._eval_env,
                callback=grab_screens,
                n_eval_episodes=self._n_eval_episodes,
                deterministic=self._deterministic,
            )
            self.logger.record(
                "trajectory/video",
                Video(th.ByteTensor([screens]), fps=40),
                exclude=("stdout", "log", "json", "csv"),
            )
        return True

In [28]:
log_path = os.path.join('Training', 'Logs')
model = A2C('CnnPolicy', env, verbose = 1, tensorboard_log = log_path)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [29]:
environment_name = 'Breakout-v0'
env = gym.make(environment_name, render_mode = 'human')

In [30]:
env.render()

Error: render(mode='human') is deprecated. Please supply `render_mode` when constructing your environment, e.g., gym.make(ID, render_mode='human'). The new `render_mode` keyword argument supports DPI scaling, audio, and native framerates.

In [31]:
video_recorder = VideoRecorderCallback(gym.make(environment_name), render_freq=5000)

In [32]:
model.learn(total_timesteps=300000, callback=video_recorder)

Logging to Training\Logs\A2C_5
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 287      |
|    ep_rew_mean        | 1.56     |
| time/                 |          |
|    fps                | 158      |
|    iterations         | 100      |
|    time_elapsed       | 12       |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -1.23    |
|    explained_variance | -0.156   |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.25    |
|    value_loss         | 0.222    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 292      |
|    ep_rew_mean        | 1.71     |
| time/                 |          |
|    fps                | 185      |
|    iterations         | 200      |
|    time_elapsed       | 21       |
|    total_timesteps    | 4000     |
| train



ValueError: Error: Unexpected observation shape (1, 210, 160, 3) for Box environment, please use (4, 84, 84) or (n_env, 4, 84, 84) for the observation shape.

## Viewing Logs in Tensorboard

In [64]:
training_log_path = os.path.join(log_path, 'A2C_2')

In [65]:
training_log_path

'Training\\Logs\\A2C_2'

In [66]:
!tensorboard --logdir={training_log_path}

^C
