# Same notebook as breakout.ipynb, but using OpenAI's atari wrappers instead

In [None]:
import sys
sys.path.append('..')

In [None]:
from agents.ddqn import *
from environments.openai import *
from utils.train import *
from utils.logger import *
from utils.render import *

In [None]:
import gym
import matplotlib.pyplot as plt
import numpy as np
import torch
import pandas as pd

In [None]:
# initialize environment
raw_env = gym.make('BreakoutNoFrameskip-v4')
env = Wrap_Deepmind(raw_env)

### Scaling hyperparameters in accordance with environment parameter changes
#### num_steps += 1 represents 4 frames (so really num_steps += 4)

In [None]:
# checking environment observation/action space for network parameteres
print(raw_env.action_space.n)

In [None]:
in_channels = 4
num_actions = 4

net_params = [
        torch.nn.Conv2d(in_channels, 32, kernel_size=8, stride=4, padding=0),
        nn.ReLU(),
        torch.nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0),
        nn.ReLU(),
        torch.nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(64*7*7,512),
        nn.ReLU(),
        nn.Linear(512, num_actions)
        ]

In [None]:
# initialize agent
observation_space = raw_env.observation_space
action_space = raw_env.action_space

memory_size = 1000000 # ONE MILLION

params = {'epsilon':1.0, 'epsilon_min':0.1, 'epsilon_decay': None, 'eps_ff': 1000000, 'eps_interval':0.9, 'eps_start':1.0, 'gamma':0.99, 'alpha':6e-5, 
          'network_params': net_params, 'memory_size':memory_size, 'device':'cuda:0', 'batch_size':32, 'target_net_updates':1000}

agent = DQNAgent(observation_space, action_space, **params)

In [None]:
agent.network

In [None]:
logger = Logger('training_info')

In [None]:
save_dir = '../models/breakout/'
training_params = {'total_steps':15000000, 'logger':logger, 'save_freq':1000000, 'e_verbose':1000000, 'file_name': 'space invaders ddqn', 'save_dir':save_dir}

### Deepmind Atari Preprocesing:
* Extremely high RAM usage
* Frameskips between stacked inputs
    * Each input spans 16 frames
* Inputs have overlap
    * (x1, x2, x3, x4) --env.step--> (x2, x3, x4, x5)
    * x1 and x2 differ by 4 frames
* READ: https://danieltakeshi.github.io/2016/11/25/frame-skipping-and-preprocessing-for-deep-q-networks-on-atari-2600-games/

### My Atari Preprocessing:
* Lower RAM usage
* No frameskips between stacked inputs. 
    * Each input spans 4 frames
* No overlap between frames unless environment termination
    * (x1, x2, x3, x4) --env.step--> (x5, x6, x7, x8)

In [None]:
standard_train(agent, env, **training_params)

# if doesnt work, change noop, monitor, and firereset. all useless. also make it so the user can specify which environments to use.

In [None]:
path = '../models/spaceinvaders/si ddqn 2.pth'
save = '../models/spaceinvaders/si ddqn 2 final.pth'

In [None]:
agent.save(save)

In [None]:
render_agent(agent, env, save, 3)