# 1. Importing

In [None]:
import gym 
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os

# 2. Testing Environment

In [None]:
# http://www.atarimania.com/roms/Roms.rar ---> download this file and extract it on the folder you are working in order
# to be able to test and use the "Break Out" environment.

In [None]:
!python -m atari_py.import_roms .\ROMS
# You might have a problem with a missing DLL file (ale_c.dll). If that happens I solved this by issuing this command
# on a conda command line: conda install -c conda-forge atari_py

In [None]:
environment_name = "Breakout-v0"

In [None]:
env = gym.make(environment_name)

In [None]:
env.reset() # gives a reset of the environment

In [None]:
env.action_space # it is a discrete space with 4 possible actions

In [None]:
env.observation_space # probably an image

In [None]:
episodes = 5
for episode in range (1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    
    while not done:
        env.render()
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        score+=reward
    print('Episode: {}, Score: {}'.format(episode, score))
env.close()
    

# 3. Vectorizing and Training Model
 ### We vectorize the environment in order to train in parallel thus improving the speed of agent training.

In [None]:
env = make_atari_env('Breakout-v0', n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=4)

In [None]:
env.render() # The image shows we have 4 environments in parallel.

In [None]:
env.close() # Closing the environment window image.

In [None]:
log_path = os.path.join('Training', 'Logs')
model = A2C('CnnPolicy', env, verbose=1, tensorboard_log=log_path)

In [None]:
model.learn(total_timesteps=100000)

# 4. Save and Reload Model

In [None]:
a2c_path = os.path.join('Training', 'Saved Models', 'A2C_100K_model')

In [None]:
model.save(a2c_path)

In [None]:
del model

In [None]:
env = make_atari_env('Breakout-v0', n_envs=1, seed=0) # n_envs=1 for testing we can use just one environment.
env = VecFrameStack(env, n_stack=4) # n_stack must continue 9 because it is how the model was trained.

In [None]:
model = A2C.load(a2c_path, env)

# 5 Evaluate and testing

In [None]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

In [None]:
env.close()