In [8]:
from IPython.display import HTML

In [9]:
import tensorflow as tf      # Deep Learning library
import numpy as np           # Handle matrices
from vizdoom import *        # Doom Environment

import random                # Handling random number generation
import time                  # Handling time calculation
from skimage import transform# Help us to preprocess the frames

from collections import deque# Ordered collection with ends
import matplotlib.pyplot as plt # Display graphs

import warnings # This ignore all the warning messages that are normally printed during the training because of skiimage
warnings.filterwarnings('ignore')

Step 2: Create our environment 🎮

A monster is spawned randomly somewhere along the opposite wall.
Player can only go left/right and shoot.
1 hit is enough to kill the monster.
Episode finishes when monster is killed or on timeout (300). 

REWARDS:

+101 for killing the monster

-5 for missing
Episode ends after killing the monster or on timeout.
living reward = -1

In [10]:
"""
Here we create our environment
"""
def create_environment():
    game = DoomGame()
    
    # Load the correct configuration
    game.load_config("basic.cfg")
    
    # Load the correct scenario (in our case basic scenario)
    game.set_doom_scenario_path("basic.wad")
    
    game.init()
    
    # Here our possible actions
    left = [1, 0, 0]
    right = [0, 1, 0]
    shoot = [0, 0, 1]
    possible_actions = [left, right, shoot]
    
    return game, possible_actions

"""
Here we perform random actions to test the environment
"""
def test_environment():
    game = DoomGame()
    game.load_config("basic.cfg")
    game.set_doom_scenario_path("basic.wad")
    game.init()
    shoot = [0,0,1]
    left = [1,0,0]
    right = [0,1,0]
    actions = [shoot, left, right]
    
    episodes = 10
    for i in range(episodes):
        game.new_episode()
        while not game.is_episode_finished():
            state = game.get_state()
            img = state.screen_buffer
            misc = state.game_variables
            action = random.choice(actions)
            print(action)
            rewards = game.make_action(action)
            print("\treward:", reward)
            time.sleep(0.02)
        print("Result: ", game.get_total_reward)
        time.sleep(2)
    game.close()
        

In [11]:
game, possible_actions = create_environment()

preprocessing

In [12]:

"""
    preprocess_frame:
    Take a frame.
    Resize it.
        __________________
        |                 |
        |                 |
        |                 |
        |                 |
        |_________________|
        
        to
        _____________
        |            |
        |            |
        |            |
        |____________|
    Normalize it.
    
    return preprocessed_frame
    
    """

def preprocess_frame(frame):
    #greyscale frame already done in our vizdoom config
    #x = np.mean(frame, -1)
    
    #crop the screen (remove the roof because it contains no information)
    cropped_frame = frame[30:-10,30:-30]
    
    #normalize pixel values
    normalized_frame = cropped_frame/255.0
    
    #resize
    preprocessed_frame = transform.resize(normalized_frame, [84,84])
    return preprocessed_frame

stack frames

Stacking frames is really important because it helps us to give have a sense of motion to our Neural Network.

-First we preprocess frame
-Then we append the frame to the deque that automatically removes the oldest frame
-Finally we build the stacked state

This is how work stack:

-For the first frame, we feed 4 frames
-At each timestep, we add the new frame to deque and then we stack them to form a new stacked frame
-And so on stack
-If we're done, we create a new stack with 4 new frames (because we are in a new episode).

In [13]:
stack_size = 4  #4 frames in a stack

#init deque with zero images one array for each image
stacked_frames = deque([np.zeros((84,84), dtype=np.int) for i in range(stack_size)], maxlen = 4)

def stacked_frames(stacked_frames, state, is_new_episode):
    #preprocess frame
    frame = preprocess_frame(state)
    
    if is_new_episode:
        #clear our stacked frames
        stacked_frames = deque([np.zeros((84,84), dtype=np.int) for i in range(stack_size)], maxlen=4)
        
        #since its a new episode the first frame is copied 4 times
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        
        #stack the frames
        stacked_state = np.stack(stacked_frames, axis = 2)
        
    else: 
        #append frame to deque, automaticall removes the oldest frame
        stacked_frames.append(frame)
            
        #build the stacked state (first dimension specifies different frames
        stacked_state = np.stack(stacked_frames, axis = 2)
            

    return stacked_state, stacked_frames

Step 4: Set up our hyperparameters ⚗️
In this part we'll set up our different hyperparameters. But when you implement a Neural Network by yourself you will not implement hyperparamaters at once but progressively.

First, you begin by defining the neural networks hyperparameters when you implement the model.
Then, you'll add the training hyperparameters when you implement the training algorithm.

In [14]:
### MODEL HYPERPARAMETERS
state_size = [84, 84, 4]    #input is stack of 4 frames witzh 84x84 pixels each
action_size = game.get_available_buttons_size()   #should be 3 possible actions: left right shoot
learning_rate = 0.0002      #alpha / learning rate

### TRAINING HYPERPARAMETERS
total_episodes = 500
max_steps = 100
batch_size = 64


# Exploration parameters for epsilon greedy strategy
explore_start = 1.0   # exploratio pürobabaility at start
explore_stop = 0.01   # minimum exploration probability
decay_rate = 0.0001   # exponential decay rate for exploration prob

#Q Learning hyperparameters
gamma = 0.95     # Discounting rate


### MEMORY HYPERPARAMETERS
pretrain_length = batch_size   # Number of experiences stored in the Memory when initialized for the first time
memory_size = 1000000          # Number of experiences the Memory can keep


### MODIFY THIS TO FALSE IF YOU JUST WANT OT SEE THE TRAINED AGENT
training = True


## TURN THIS TO TRUE IF YOU WANT TO RENDER THE ENVIRONMENT
episode_render = False


Step 5: Create our Deep Q-learning Neural Network model 🧠
Model This is our Deep Q-learning model:

-We take a stack of 4 frames as input

-It passes through 3 convnets

-Then it is flatened

-Finally it passes through 2 FC layers

-It outputs a Q value for each actions

In [15]:
class DQNetwork:
    def __init__(self, state_size, action_size, learning_rate, name = 'DQNetwork'):
            self.state_size = state_size
            self.action_size = action_size
            self.learning_rate = learning_rate
            
            with tf.variable_scope(name):
                # we create the palceholders
                # *state_size means that we take each elements of state_size in tuple hence is like if we wrote
                # [None, 84, 84, 4]
                self.inputs_ = tf.placeholder(tf.float32, [None, *state_size], name = "inputs")
                self.actions_ = tf.placeholder(tf.float32, [None, 3], name = "actions_")
                
                # Remember that target_Q us the R(s,a) + ymax Qhat(s', a')
                self.target_Q = tf.placeholder(tf.float32, [None], name = "target")
                
                """
                First convnet:
                CNN
                BatchNormalization
                ELU
                """
                
                #Input is 84x84
                    

In [21]:
s
s
s
s
s
s
s

s
s
s
s
s
s
s
s
s
s
s
s
s
s
s
s
s
s
s
s



NameError: name 's' is not defined