In [1]:
import os
import tensorflow as tf
import numpy as np
from tensorflow.initializers import random_uniform
class Actor(object):
    def __init__(self, lr, n_actions, name, input_dims, sess, fc1_dims,
                 fc2_dims, action_bound, batch_size=64, chkpt_dir='tmp/ddpg'):
        self.lr = lr
        self.n_actions = n_actions
        self.name = name
        self.fc1_dims = fc1_dims
        self.fc2_dims = fc2_dims
        self.input_dims = input_dims
        self.batch_size = batch_size
        self.sess = sess
        self.action_bound = action_bound
        self.build_network()
        self.params = tf.trainable_variables(scope=self.name)
        self.unnormalized_actor_gradients = tf.gradients(self.mu, self.params, -self.action_gradients)
        self.actor_gradients = list(map(lambda x: tf.div(x, self.batch_size), self.unnormalized_actor_gradients))
        self.optimize = tf.train.AdamOptimizer(self.lr).apply_gradients(zip(self.actor_gradients, self.params))

    def build_network(self):
        with tf.variable_scope(self.name):
            self.input = tf.placeholder(tf.float32, shape=[None, *self.input_dims], name='inputs')
            self.action_gradients = tf.placeholder(tf.float32, shape=[None, self.n_actions], name='gradients')
            f1 = 1. / np.sqrt(self.fc1_dims)
            dense1 = tf.layers.dense(self.input, units=self.fc1_dims, kernel_initializer=random_uniform(-f1, f1), bias_initializer=random_uniform(-f1, f1))
            batch1 = tf.layers.batch_normalization(dense1)
            layer1_activation = tf.nn.tanh(batch1)
            f2 = 1. / np.sqrt(self.fc2_dims)
            dense2 = tf.layers.dense(layer1_activation, units=self.fc2_dims, kernel_initializer=random_uniform(-f2, f2), bias_initializer=random_uniform(-f2, f2))
            batch2 = tf.layers.batch_normalization(dense2)
            layer2_activation = tf.nn.tanh(batch2)
            f3 = 1. / np.sqrt(self.fc2_dims)
            dense3 = tf.layers.dense(layer2_activation, units=32, kernel_initializer=random_uniform(-f3, f3), bias_initializer=random_uniform(-f3, f3))
            batch3 = tf.layers.batch_normalization(dense3)
            layer3_activation = tf.nn.tanh(batch3)
            mu1 = tf.layers.dense(layer3_activation, units=self.n_actions, activation='tanh', kernel_initializer=random_uniform(-f3, f3), bias_initializer=random_uniform(-f3, f3))
            self.mu = mu1

    def predict(self, inputs):
        return self.sess.run(self.mu, feed_dict={self.input: inputs})

    def train(self, inputs, gradients):
        self.sess.run(self.optimize, feed_dict={self.input: inputs, self.action_gradients: gradients})
class Federated_Server(object):
    def __init__(self, name_actor, name_critic, input_dims, n_actions=7, layer1_size=32, layer2_size=32):
        self.sess = tf.Session()
        self.actor = Actor(1, n_actions, name_actor, input_dims, self.sess, layer1_size, layer2_size, 1)
        self.numberOfR=20
        self.actors_params= [self.actor.params] *self.numberOfR
        self.robot_sents=np.zeros(self.numberOfR)
        self.sess.run(tf.global_variables_initializer())

    def federation(self):
        for i in range(len(self.actor.params)):
           for j in range(self.numberOfR):
               self.actor.params[i]+=self.actors_params[j][i]*self.robot_sents[j]
           self.actor.params[i]/=self.numberOfR

        #self.actor.sess.run(self.ServerFederation)
        return self.actor.params
Server_robot = Federated_Server(name_actor='server_actor', name_critic='server_critic', input_dims=[148], n_actions=8,layer1_size=32, layer2_size=32)
print("q")
for i in range(len(Server_robot.actor.params)):
    print(Server_robot.actor.params[i])
#print(Server_robot.actor.params)
# Server_robot.federation()
# print("2")
# print(Server_robot.actor.params)




Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use keras.layers.BatchNormalization instead.  In particular, `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` should not be used (consult the `tf.keras.layers.batch_normalization` documentation).

Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


q
<tf.Variable 'server_actor/dense/kernel:0' shape=(148, 32) dtype=float32_ref>
<tf.Variable 'server_actor/dense/bias:0' shape=(32,) dtype=float32_ref>
<tf.Variable 'server_actor/batch_normalization/gamma:0' shape=(32,) dtype=float32_ref>
<tf.Variable 'server_actor/batch_normalization/beta:0' shape=(32,) dtype=float32_ref>
<tf.Variable 'server_actor/dense_1/kernel:0' shape=(32, 32) dtype=float32_ref>
<tf.Variable 'server_actor/dense_1/bias:0' shape=(32,) dtype=float32_ref>
<tf.Variable 'server_actor/batch_normalization_1/gamma:0' shape=(32,) dtype=float32

In [6]:
import numpy as np
numberOfRobots=5
act_robots = np.zeros((numberOfRobots,8))
act_robot_moves = np.zeros((numberOfRobots, 2))
for y in range(numberOfRobots):
    act_robots[y] = np.random.uniform(-1.0, 1.0), np.random.uniform(-1.0, 1.0), np.random.uniform(
        -1.0, 1.0), np.random.uniform(-1.0, 1.0), np.random.uniform(-1.0,
                                                                    1.0), np.random.uniform(
        -1.0, 1.0), np.random.uniform(-1.0, 1.0), np.random.uniform(-1.0, 1.0)
for x in range(numberOfRobots):
    act0_robot_move = np.random.uniform(-1.0, 1.0), np.random.uniform(-1.0, 1.0)
    act_robot_moves[x] = act0_robot_move
act_robots = np.concatenate(act_robots)
act_robot_moves = np.concatenate(act_robot_moves)
#print(act_robots)
print(act_robot_moves)

[ 0.10072173 -0.98242935 -0.42917585 -0.06212133  0.2034406  -0.43761538
  0.21211754 -0.27313871 -0.59773997 -0.53521827]
