In [1]:
import tensorflow as tf
import numpy as np

import retro

from skimage import transform
from skimage.color import rgb2gray

import matplotlib.pyplot as plt

from collections import deque

import random

import warnings

warnings.filterwarnings('ignore')

In [2]:
env = retro.make(game='SpaceInvaders-Atari2600')

In [6]:
print("Frame Size:", env.observation_space)
print("Action Size is", env.action_space.n)

#Hot encoded versions of our actions
pssible_actions = np.array(np.identity(env.action_space.n, dtype=int).tolist())

Frame Size: Box(210, 160, 3)
Action Size is 8


In [8]:
def preprocess_frame(frame):
    gray = rgb2gray(frame)
    cropped_frame = gray[8:-12,4:-12]
    normalized_frame = cropped_frame/255
    preprocessed_frame = transform.resize(normalized_frame,[110,84])
    return preprocessed_frame

In [16]:
stack_size = 4
stacked_frames = deque([np.zeros((110,84), dtype=int) for i in range(stack_size)], maxlen=4)

def stack_frames(stacked_frames, state, is_new_episode):
    frame = preprocess_frame(frame)
    
    if is_new_episode:
        stacked_frames = deque([np.zeros((110,84), dtype=int) for i in range(stack_size)], maxlen=4)
        
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
    else:
        stacked_frames.append(frame)
        
    stacked_state = np.stack(stacked_frames, axis=2)
        
    return stacked_state, stacked_frames

In [17]:
### MODEL HYPERPARAMETERS
state_size = [110, 84, 4]      # Our input is a stack of 4 frames hence 110x84x4 (Width, height, channels) 
action_size = env.action_space.n # 8 possible actions
learning_rate =  0.00025      # Alpha (aka learning rate)

### TRAINING HYPERPARAMETERS
total_episodes = 50            # Total episodes for training
max_steps = 50000              # Max possible steps in an episode
batch_size = 64                # Batch size

# Exploration parameters for epsilon greedy strategy
explore_start = 1.0            # exploration probability at start
explore_stop = 0.01            # minimum exploration probability 
decay_rate = 0.00001           # exponential decay rate for exploration prob

# Q learning hyperparameters
gamma = 0.9                    # Discounting rate

### MEMORY HYPERPARAMETERS
pretrain_length = batch_size   # Number of experiences stored in the Memory when initialized for the first time
memory_size = 1000000          # Number of experiences the Memory can keep

### PREPROCESSING HYPERPARAMETERS
stack_size = 4                 # Number of frames stacked

### MODIFY THIS TO FALSE IF YOU JUST WANT TO SEE THE TRAINED AGENT
training = False

## TURN THIS TO TRUE IF YOU WANT TO RENDER THE ENVIRONMENT
episode_render = False