In [None]:
import tflearn
import tensorflow as tf

class Actor(object):

    def __init__(self, sess, state_dim, action_dim,learning_rate, tau, batch_size):

        #the length of state dim might be 1 or 3(height,width,channels)

        self.sess = sess
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.learning_rate = learning_rate
        self.tau = tau
        self.batch_size = batch_size

        self.inputs, self.out = self.create_actor_network()

        self.network_params = tf.trainable_variables()

        self.target_inputs, self.target_out = self.create_actor_network()

        self.target_network_params = tf.trainable_variables()[len(self.network_params):]

        self.update_target_network_params = \
            [self.target_network_params[i].assign(tf.multiply(self.network_params[i], self.tau)) +\
                                        tf.multiply(self.target_network_params[i],1. - self.tau) \
                for i in range(len(self.target_network_params))]

        self.initialize_target_network_params = \
            [self.target_network_params[i].assign(self.network_params[i])
             for i in range(len(self.target_network_params))]

        self.action_gradient = tf.placeholder(tf.float32, [None, self.action_dim])

        self.actor_gradients = list(
            map(lambda x:tf.div(x,self.batch_size),
                tf.gradients(self.out, self.network_params, -self.action_gradient)
                ))


        self.optimize = tf.train.AdamOptimizer(self.learning_rate).\
            apply_gradients(zip(self.actor_gradients,self.network_params))

        self.num_trainable_vars = len(self.network_params) + len(self.target_network_params)

    def create_actor_network(self):

        if len(self.state_dim) == 1:
            
            inputs = tflearn.input_data(shape=[None, *self.state_dim])
            net = tflearn.fully_connected(inputs, 400,weight_decay=0.0)
            net = tflearn.activations.relu(net)
            net = tflearn.fully_connected(net,300, weight_decay=0.0)
            net = tflearn.activations.relu(net)

            w_init = tflearn.initializations.uniform(minval=-0.003,maxval=0.003)

            out = tflearn.fully_connected(net, self.action_dim, activation='tanh', \
                                          weights_init=w_init, weight_decay=0.0)

            return inputs, out


        else:
            assert 1 == 0, "wrong state dim input"


    def train(self, inputs, a_gradient):
        self.sess.run(self.optimize, feed_dict ={
            self.inputs: inputs,
            self.action_gradient: a_gradient
        })

    def predict(self, inputs):
        return self.sess.run(self.out, feed_dict={
            self.inputs: inputs
        })

    def predict_target(self, inputs):
        return self.sess.run(self.target_out, feed_dict={
            self.target_inputs: inputs
        })

    def update_target_network(self):
        self.sess.run(self.update_target_network_params)

    def get_num_trainable_vars(self):
        return self.num_trainable_vars

    def initialize_target(self):
        self.sess.run(self.initialize_target_network_params)

In [1]:
import tflearn

In [5]:
import numpy as np

In [None]:
tflearn.initializations.uniform()

In [2]:
import tensorflow as tf

In [11]:
tf.reduce_sum(3)

<tf.Tensor 'Sum_2:0' shape=() dtype=int32>

In [12]:
print(1.9807614e-05)

1.9807614e-05


In [14]:
0.037248693*0.001+0.037241638*0.999

0.037241645055