In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import vizdoom
import os
import time
import keras
import random
from keras.layers import Conv2D, Dense, Flatten, MaxPooling2D
from keras.models import Sequential
from keras.optimizers import SGD
from collections import deque
import numpy as np
from skimage import transform
from IPython.display import display, clear_output

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
game = vizdoom.DoomGame()
game.load_config("doom/basic.cfg")
#game.set_window_visible(True)
game.set_doom_scenario_path("doom/basic.wad")

In [3]:
num_episodes       = 500
num_steps          = 100
replay_buffer_size = 1000000
learning_rate      = 0.001
discount_factor    = 0.7
batch_size         = 64

In [4]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='elu', padding="valid", input_shape=(84, 84, 4)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='elu', padding="valid"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation='elu', padding="valid"))
model.add(Flatten())
model.add(Dense(512, activation='elu'))
model.add(Dense(128, activation='elu'))
model.add(Dense(game.get_available_buttons_size(), activation=None))
model.summary()
model.compile(loss="mse", optimizer=SGD(lr=learning_rate))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 82, 82, 32)        1184      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 41, 41, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 39, 39, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 19, 19, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 17, 17, 128)       73856     
_________________________________________________________________
flatten_1 (Flatten)          (None, 36992)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               18940416  
__________

In [5]:
def preprocess_frame(frame):
    # Crop the screen (remove the roof because it contains no information)
    cropped_frame = frame[30:-10, 30:-30]
    
    # Normalize Pixel Values
    normalized_frame = cropped_frame / 255.0
    
    # Resize
    preprocessed_frame = transform.resize(normalized_frame, [84, 84])
    
    return preprocessed_frame

replay_buffer = deque(maxlen=replay_buffer_size)

game.set_episode_timeout(num_steps)
game.init()

for episode in range(num_episodes):
    game.new_episode()
    
    # Init frame stack
    frame_stack = deque(maxlen=4)
    initial_frame = preprocess_frame(game.get_state().screen_buffer)
    for _ in range(4):
        frame_stack.append(initial_frame)
    state = np.stack(frame_stack, axis=2)
    
    while True:
        display_str = "-- Episode {}/{} --\n".format(episode, num_episodes)
        
        # Get action with highest Q-value for current state
        action = np.argmax(model.predict_on_batch(np.expand_dims(state, axis=0)))
        action_one_hot = [False] * 3
        action_one_hot[action] = True
        
        # Take action and get reward
        reward = game.make_action(action_one_hot)
        
        # Break if episode is over
        if game.is_episode_finished():
            break
        
        # Get new state
        frame_stack.append(preprocess_frame(game.get_state().screen_buffer))
        new_state = np.stack(frame_stack, axis=2)
        
        # Store experience
        replay_buffer.append((state, action, reward, new_state))
        state = new_state
        
        # Train network on expreiences
        loss = 0
        if len(replay_buffer) >= batch_size:
            # Get batch
            replay_batch      = random.sample(replay_buffer, batch_size)
            replay_state      = np.array([r[0] for r in replay_batch])
            replay_reward     = np.array([r[2] for r in replay_batch])
            replay_next_state = np.array([r[3] for r in replay_batch])
            
            #print("replay_state", replay_state)
            #print("replay_reward", replay_reward)
            #print("replay_next_state",replay_next_state)
            
            #print("model.predict_on_batch(replay_next_state)", model.predict_on_batch(replay_next_state))
            
            # Q_target = reward + gamma * max_a' Q(s')
            Q_target = np.expand_dims(replay_reward, axis=1) + discount_factor * model.predict_on_batch(replay_next_state)
            
            #print("Q_target",Q_target)
            
            #print("Training...")
            loss += model.train_on_batch(replay_state, Q_target)
            
    display_str += "Loss: {}\n".format(loss)
    clear_output(wait=True)
    display(display_str)
        
print("Done!")
game.close()

'-- Episode 96/500 --\nLoss: nan\n'

KeyboardInterrupt: 