# Import Dependencies

In [1]:
import gymnasium as gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os




In [2]:
env = gym.make("Breakout-v4", render_mode = "human")
# making the environment
# This environment is made to visualize the game (no further use in the project)

NameNotFound: Environment `Breakout` doesn't exist.

In [None]:
env.reset()
# this code will return the observations of the environment

In [5]:
env.action_space
# this will return the number of actions and this actions are discrete.

Discrete(4)

In [6]:
env.observation_space
# returns the observation space of the environment
# RGB image of the environment is returned 

Box(0, 255, (210, 160, 3), uint8)

# Testing the environment

In [82]:
# running the environment with random actions
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        action = env.action_space.sample()
        state, reward, done, truncated, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Episode:1 Score:5.0
Episode:2 Score:0.0
Episode:3 Score:2.0
Episode:4 Score:1.0
Episode:5 Score:1.0


In [81]:
env.close()

# Vectorise Environment and Train Model

In [8]:
# Vectorized environments are environments that run multiple independent copies of the same environment in parallel using multiprocessing.
env = make_atari_env("Breakout-v4", n_envs=4, seed=0)
# creates a vector environment for atari games
env = VecFrameStack(env, n_stack=4)
# stacking up all the environment together
env.metadata['render_fps'] = 30 
# setting the rendering fps

In [9]:
env.reset()

array([[[[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        ...,

        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]]],


       [[[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0

In [10]:
env.render("human")
# rendering the environment in render mode = human
# the environment rendered here is the vector environment created by stacking 4 envs 

In [11]:
log_path = os.path.join('C://', 'Users', 'vyshn', 'Documents System', '6th Sem', 'RL', 'RL_Projects', 'Training', 'Logs')
# setting the log path for tensorboard log
model = A2C('CnnPolicy', env, verbose = 1, tensorboard_log = log_path) 
# initializing the A2C model with resspect to vector environment

Using cpu device
Wrapping the env in a VecTransposeImage.


In [6]:
model.learn(total_timesteps = 100000)
# training the model

Logging to C:Users\vyshn\Documents System\6th Sem\RL\RL_Projects\Training\Logs\A2C_6
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 268      |
|    ep_rew_mean        | 1.29     |
| time/                 |          |
|    fps                | 72       |
|    iterations         | 100      |
|    time_elapsed       | 27       |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -1.38    |
|    explained_variance | 0.00734  |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.199    |
|    value_loss         | 0.143    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 275      |
|    ep_rew_mean        | 1.43     |
| time/                 |          |
|    fps                | 72       |
|    iterations         | 200      |
|    time_elapsed       | 5

<stable_baselines3.a2c.a2c.A2C at 0x19abc253f90>

In [12]:
a2c_path = os.path.join('C:\\', 'Users', 'vyshn', 'Documents System', '6th Sem', 'RL', 'RL_Projects', 'Training', 'Saved Models', 'A2C_Breakout_Model')
# setting the path for saving the model

In [13]:
a2c_path

'C:\\Users\\vyshn\\Documents System\\6th Sem\\RL\\RL_Projects\\Training\\Saved Models\\A2C_Breakout_Model'

In [7]:
model.save(a2c_path)
# saving the model

In [14]:
del model
# deleting the model

In [3]:
env = make_atari_env('Breakout-v4', n_envs=1, seed=0)
# no. of env = 1
env = VecFrameStack(env, n_stack=4)
env.metadata['render_fps'] = 30
# making environment for testing the model (just one environment is made)

NameNotFound: Environment `Breakout` doesn't exist.

In [16]:
model = A2C.load(a2c_path, env)
# loading back the model from the saved path

Wrapping the env in a VecTransposeImage.


In [19]:
evaluate_policy(model, env, n_eval_episodes = 10, render = True)
# evaluating the model

(7.3, 2.1931712199461306)

In [20]:
obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render("human")
# model playing the game (breakout ball)

KeyboardInterrupt: 

In [21]:
env.close()