In [1]:
import cv2
import random
import numpy as np
import argparse
import tensorflow as tf

from game import wrapped_flappy_bird as game
from util.models import Actor, Critic
from util.replay_buffer import ReplayBuffer, PrioritizedReplayBuffer
from util.schedules import LinearSchedule
from util.tf_util import scope_vars, update_target

In [8]:
import easydict
args = easydict.EasyDict()
args.n_actions = 2

print(args)

{'n_actions': 2}


In [3]:
game_state = game.GameState()
do_nothing = np.zeros(args.n_actions)
do_nothing[0] = 1
x_t, r_0, terminal = game_state.frame_step(do_nothing)
x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)
ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)  # s_t : 80 * 80 * 4

In [4]:
import tensorflow.contrib.layers as layers
def Actor(img_in, num_actions, scope, reuse=False):
    """As described in https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf"""
    with tf.variable_scope(scope, reuse=reuse):
        out = img_in
        with tf.variable_scope("convnet"):
            # original architecture
            out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
            out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
            out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
        out = layers.flatten(out)

        with tf.variable_scope("action_value"):
            out = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
            out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)

        with tf.variable_scope("action_prob"):
            out = tf.nn.softmax(out)
        return out

In [6]:
s = tf.placeholder("float", [None, 80, 80, 4], name="state")
actor_eval = Actor(s, args.n_actions, scope="actor_eval")  # [None * n_actions]

In [11]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [27]:
a = actor_eval.eval(feed_dict={s: [s_t]})[0]
a = np.array([0.3,0.7])

In [42]:
np.random.choice(np.arange(len(a)),p=a)

1

In [55]:
a = np.random.rand(4,4)
a

array([[ 0.82965391,  0.64452692,  0.00580582,  0.6433026 ],
       [ 0.58212653,  0.7342581 ,  0.18471371,  0.20681278],
       [ 0.63554106,  0.82074312,  0.32437369,  0.16361654],
       [ 0.50036648,  0.22341574,  0.55833854,  0.07370358]])

In [57]:
a.reshape(-1)

array([ 0.82965391,  0.64452692,  0.00580582,  0.6433026 ,  0.58212653,
        0.7342581 ,  0.18471371,  0.20681278,  0.63554106,  0.82074312,
        0.32437369,  0.16361654,  0.50036648,  0.22341574,  0.55833854,
        0.07370358])

In [58]:
a = tf.one_hot([0,1,1,0], 2)

In [62]:
w_init = tf.random_normal_initializer(0., .1)
with tf.variable_scope('actor'):
    l_a = tf.layers.dense(s, 200, tf.nn.relu6, kernel_initializer=w_init, name='la')
    a_prob = tf.layers.dense(l_a, 2, tf.nn.softmax, kernel_initializer=w_init, name='ap')
with tf.variable_scope('critic'):
    l_c = tf.layers.dense(s, 100, tf.nn.relu6, kernel_initializer=w_init, name='lc')
    v = tf.layers.dense(l_c, 1, kernel_initializer=w_init, name='v')  # state value

In [71]:
game_state = game.GameState()
do_nothing = np.array([1, 0])
x_t, r_0, done = game_state.frame_step(do_nothing)
x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)
ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)
s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)  # s_t : 80 * 80 * 4

In [80]:
buffer_s = []

In [84]:
buffer_s.append(s_t[np.newaxis,:])

In [83]:
buffer_s[0].shape

(1, 80, 80, 4)

In [85]:
np.vstack(buffer_s).shape

(2, 80, 80, 4)

In [86]:
import torch