# OpenAI Space Invaders

## State 0: Import Libraries

In [9]:
import numpy as np
import tensorflow as tf
import retro
import random

from collections import deque

from skimage import transform
from skimage.color import rgb2gray

import matplotlib.pyplot as plt

import warnings

## State 1: Creating the Environment

In [10]:
env = retro.make(game='SpaceInvaders-Atari2600')

print("The size of our frame is: ", env.observation_space)
print("The action size is :", env.action_space.n)

#One hot encoded version of actions
#Actions = {[1,0,0,0,0,0,0,0], [0,1,0,0,0,0,0,0]...}
possible_actions = np.array(np.identity(env.action_space.n, dtype=int).tolist())

RuntimeError: Cannot create multiple emulator instances per process

## State 2: Preprocessing

In [11]:
def preprocess_fram(frame):
    
    #Grayscale the frame
    gray = rgb2gray(frame)
    
    #Crop the frame
    cropped_fram = gray[8:-12,4,-12]
    
    #Normalized frames
    normalized_frame = cropped_frame/255.0
    
    #Resize
    preprocessed_frame = transform.resize(cropped_frame, [110, 84])
    
    return preprocessed_frame

In [12]:
stack_size = 4

stacked_frames = deque([np.zeros((110,84), dtype=np.int) for i in range(stack_size)], maxlen=4)

def stack_frames(stacked_frames, state, is_new_episode):
    #preprocess
    frame = preprocess_frame(state)
    
    if is_new_episode:
        #clear stacked frames
        stacked_frames = deque([np.zeros((110,84), dtype=np.int) for i in range(stack_size)], maxlen=4)
        
        #Because new episode, copy the same frame 4 times
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        
        #Stack the frames
        stacked_state = np.stack(stacked_frames, axis=2)
        
    else:
        #Append frame to deque(automatically removes the oldest frame)
        stacked_frames.append(frame)
        
        #build the stacked state
        stacked_state = np.stack(stacked_frames, axis=2)
        
    return stacked_state, stacked_frames

## State 3: Hyperparameter setup

In [None]:
### Model hyperparameters
state_size = [110, 84, 4] #input stack is of size 4
action_size = env.action_space.n #8
learning_rate = 0.00025

### Training hyperparameters
total_episodes = 50
max_steps = 50000
batch_size = 64

### Exploration parameters
explore_start = 1.0 #Maximum exploration probability
explore_stop = 0.01 #minimum exploration probability
decay_rate = 0.00001 #decay for exploration probability

### Q-learning hyperparameters
gamma = 0.9

## Memory hyperparameters
pretrain_length = batch_size #Number of experiences stored in the memory when initialized for the first time

memory_size = 1000000 #maximum experiences

#Preprocessing hyperparameters
stack_size = 4

#Training
training = False


