In [58]:
import tensorflow as tf
import random
import numpy as np
from vizdoom import *
from collections import deque
from skimage import transform

  from ._conv import register_converters as _register_converters


# Setup environment 

In [11]:
def create_env():
    game = DoomGame()
    game.load_config("./ViZDoom-1.1.5pre-Win-Python36-x86_64/ViZDoom-1.1.5pre-Win-Python36-x86_64/vizdoom/scenarios/basic.cfg")
    game.set_doom_scenario_path("./ViZDoom-1.1.5pre-Win-Python36-x86_64/ViZDoom-1.1.5pre-Win-Python36-x86_64/vizdoom/scenarios/basic.wad")
#     game.init()
    
    shoot = [0,0,1]
    left = [1,0,0]
    right = [0,1,0]
    possible_action = [shoot,left,right]
    
    return game,possible

In [9]:
def test():
    game = DoomGame()
    game.load_config("./ViZDoom-1.1.5pre-Win-Python36-x86_64/ViZDoom-1.1.5pre-Win-Python36-x86_64/vizdoom/scenarios/basic.cfg")
    game.set_doom_scenario_path("./ViZDoom-1.1.5pre-Win-Python36-x86_64/ViZDoom-1.1.5pre-Win-Python36-x86_64/vizdoom/scenarios/basic.wad")
#     game.set_mode(Mode.ASYNC_PLAYER)

    game.init()

    shoot = [0,0,1]
    left = [1,0,0]
    right = [0,1,0]

    episodes = 10
    for i in range(episodes):
        game.new_episode()
        while not game.is_episode_finished():
            state = game.get_state()
            img = state.screen_buffer
            misc = state.game_variables
            action = random.choice([shoot, left, right]) # A random agent
            reward = game.make_action(action)
            time.sleep(0.02) # Just to make it slower
            
            
        print("Total reward: " + str(game.get_total_reward()))
    game.close()

In [57]:
class Inp_data:

    def __init__(self):
        self.que = deque(maxlen=4)
    
    def add(self,frame):
        self.que.append(frame)
        return np.stack(self.que,axis=0)

In [56]:
def preprocess(frame):
    
    frame = frame.mean(axis=0,dtype = np.int)
    crop_frame = frame[30:-10,30:-30]
    frame = crop_frame/255
    return transform.resize(frame,[84,84])

In [59]:
imgsize = [84,84,4]
batch_size = 64


In [163]:
class Dqn:
    
    def __init__(self,scope):
        self.scope = scope
        with tf.variable_scope(scope):
            self._input = tf.placeholder(dtype=tf.float32,shape=[None,imgsize[0],imgsize[1],imgsize[2]],name="input")
            self._action_id = tf.placeholder(dtype=tf.float32,shape=[None,3],name="actionId")
            self._target = tf.placeholder(dtype=tf.float32,shape=[None],name="target")
            
            self._conv1 = tf.layers.conv2d(self._input,
                                     filters=32,
                                     kernel_size=[8,8],
                                     strides=[4,4],
                                     padding="VALID",
                                     kernel_initializer=tf.contrib.layers.xavier_initializer(), 
                                     name="conv1")
            
            self._conv1_batch = tf.layers.batch_normalization(self._conv1,trainable=True,name="b_conv1")
            self._conv1_out = tf.nn.elu(self._conv1_batch,name = "e_conv1")
            
            self._conv2 = tf.layers.conv2d(self._conv1_out,
                                     filters=64,
                                     kernel_size=[4,4],
                                     strides=[2,2],
                                     padding="VALID",
                                     kernel_initializer=tf.contrib.layers.xavier_initializer(), 
                                     name="conv2")
            
            self._conv2_batch = tf.layers.batch_normalization(self._conv2,trainable=True,name="b_conv2")
            self._conv2_out = tf.nn.elu(self._conv2_batch,name = "e_conv2")
            
            self._conv3 = tf.layers.conv2d(self._conv2_out,
                                     filters=128,
                                     kernel_size=[4,4],
                                     strides=[2,2],
                                     padding="VALID",
                                     kernel_initializer=tf.contrib.layers.xavier_initializer(), 
                                     name="conv3")
            
            self._conv3_batch = tf.layers.batch_normalization(self._conv3,trainable=True,name="b_conv3")
            self._conv3_out = tf.nn.elu(self._conv3_batch,name = "e_conv3")
            
            
            self._flat = tf.layers.flatten(self._conv3_out,name="flat")
            
            self._dense1 = tf.layers.dense(self._flat,
                            units=512,
                            activation=tf.nn.elu,
                            kernel_initializer=tf.contrib.layers.xavier_initializer())
             
            self.out = tf.layers.dense(self._dense1,
                            units=512,
                            kernel_initializer=tf.contrib.layers.xavier_initializer())
    
            self._predict = tf.reduce_sum(tf.multiply(self.out, self._action_id), axis=1)
            
            self._loss = tf.reduce_mean(tf.squared_difference(self._target, self._predict))
            self._optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6).minimize(self._loss)
            
            
    def predict(self,sess,frames):
        return sess.run(self._out, feed_dict={self._input:frames})
    
    def update(self,sess,states,action_ids,targets):
        return sess.run([self._loss,self._optimizer],feed_dict={self._input:states, self._action_id:action_ids, self._target:targets})

In [159]:
def copy_model_weights(sess,q_target,q_predict):
    
    q_t = sorted([var for var in tf.trainable_variables() if var.name.startswith(q_target)], key = lambda x:x.name)
    q_p = sorted([var for var in tf.trainable_variables() if var.name.startwith(q_predict)], key = lambda x:x.name)
    
    for idx in range(len(q_t)):
        sess.run(tf.assign(q_t[idx],q_p[idx]))

In [160]:
class ReplayMemory():
    
    def __init__(self,size):
        self.memory = deque(maxlen=size)
    def store(self,sarsd):
        self.memory.append(sarsd)
    def sample(self,batch_size):
        return random.sample(self.memory,batch_size)

In [161]:
tf.reset_default_graph()
sess = tf.InteractiveSession()