From 3dafe5c7eeb4c5034ecd3ab4b6697702feb7f01d Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Fri, 15 Jun 2018 14:22:36 -0700 Subject: [PATCH] Fix for Discrete observations + Curiosity --- python/unitytrainers/models.py | 2 +- python/unitytrainers/ppo/models.py | 38 ++++++++++++++++++++--------- python/unitytrainers/ppo/trainer.py | 3 +++ 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/python/unitytrainers/models.py b/python/unitytrainers/models.py index 0e673d67b6..83574a93c2 100755 --- a/python/unitytrainers/models.py +++ b/python/unitytrainers/models.py @@ -150,7 +150,7 @@ def create_discrete_observation_encoder(observation_input, s_size, h_size, activ :param num_layers: number of hidden layers to create. :return: List of hidden layer tensors. """ - with tf.name_scope(scope): + with tf.variable_scope(scope): vector_in = tf.reshape(observation_input, [-1]) state_onehot = tf.one_hot(vector_in, s_size) hidden = state_onehot diff --git a/python/unitytrainers/ppo/models.py b/python/unitytrainers/ppo/models.py index 12601c0a96..a06875e0e0 100644 --- a/python/unitytrainers/ppo/models.py +++ b/python/unitytrainers/ppo/models.py @@ -91,18 +91,34 @@ def create_curiosity_encoders(self): encoded_next_state_list.append(hidden_next_visual) if self.o_size > 0: - # Create input op for next (t+1) vector observation. - self.next_vector_in = tf.placeholder(shape=[None, self.o_size], dtype=tf.float32, - name='next_vector_observation') # Create the encoder ops for current and next vector input. Not that these encoders are siamese. - encoded_vector_obs = self.create_continuous_observation_encoder(self.vector_in, - self.curiosity_enc_size, - self.swish, 2, "vector_obs_encoder", False) - encoded_next_vector_obs = self.create_continuous_observation_encoder(self.next_vector_in, - self.curiosity_enc_size, - self.swish, 2, "vector_obs_encoder", - True) + if self.brain.vector_observation_space_type == "continuous": + # Create input op for next (t+1) vector observation. + self.next_vector_in = tf.placeholder(shape=[None, self.o_size], dtype=tf.float32, + name='next_vector_observation') + + encoded_vector_obs = self.create_continuous_observation_encoder(self.vector_in, + self.curiosity_enc_size, + self.swish, 2, "vector_obs_encoder", + False) + encoded_next_vector_obs = self.create_continuous_observation_encoder(self.next_vector_in, + self.curiosity_enc_size, + self.swish, 2, + "vector_obs_encoder", + True) + else: + self.next_vector_in = tf.placeholder(shape=[None, 1], dtype=tf.int32, + name='next_vector_observation') + + encoded_vector_obs = self.create_discrete_observation_encoder(self.vector_in, self.o_size, + self.curiosity_enc_size, + self.swish, 2, "vector_obs_encoder", + False) + encoded_next_vector_obs = self.create_discrete_observation_encoder(self.next_vector_in, self.o_size, + self.curiosity_enc_size, + self.swish, 2, "vector_obs_encoder", + True) encoded_state_list.append(encoded_vector_obs) encoded_next_state_list.append(encoded_next_vector_obs) @@ -138,7 +154,7 @@ def create_forward_model(self, encoded_state, encoded_next_state): combined_input = tf.concat([encoded_state, self.selected_actions], axis=1) hidden = tf.layers.dense(combined_input, 256, activation=self.swish) # We compare against the concatenation of all observation streams, hence `self.v_size+1`. - pred_next_state = tf.layers.dense(hidden, self.curiosity_enc_size * (self.v_size+1), activation=None) + pred_next_state = tf.layers.dense(hidden, self.curiosity_enc_size * (self.v_size + 1), activation=None) squared_difference = 0.5 * tf.reduce_sum(tf.squared_difference(pred_next_state, encoded_next_state), axis=1) self.intrinsic_reward = tf.clip_by_value(self.curiosity_strength * squared_difference, 0, 1) diff --git a/python/unitytrainers/ppo/trainer.py b/python/unitytrainers/ppo/trainer.py index fa583e7e07..549c5e9d63 100755 --- a/python/unitytrainers/ppo/trainer.py +++ b/python/unitytrainers/ppo/trainer.py @@ -432,6 +432,9 @@ def update_model(self): else: feed_dict[self.model.vector_in] = np.array(buffer['vector_obs'][start:end]).reshape( [-1, self.brain.num_stacked_vector_observations]) + if self.use_curiosity: + feed_dict[self.model.next_vector_in] = np.array(buffer['next_vector_in'][start:end]) \ + .reshape([-1, self.brain.num_stacked_vector_observations]) if self.use_visual_obs: for i, _ in enumerate(self.model.visual_in): _obs = np.array(buffer['visual_obs%d' % i][start:end])