diff --git a/python/unitytrainers/models.py b/python/unitytrainers/models.py index 0e673d67b6..83574a93c2 100755 --- a/python/unitytrainers/models.py +++ b/python/unitytrainers/models.py @@ -150,7 +150,7 @@ def create_discrete_observation_encoder(observation_input, s_size, h_size, activ :param num_layers: number of hidden layers to create. :return: List of hidden layer tensors. """ - with tf.name_scope(scope): + with tf.variable_scope(scope): vector_in = tf.reshape(observation_input, [-1]) state_onehot = tf.one_hot(vector_in, s_size) hidden = state_onehot diff --git a/python/unitytrainers/ppo/models.py b/python/unitytrainers/ppo/models.py index 9cc6093b28..0943b68ac5 100644 --- a/python/unitytrainers/ppo/models.py +++ b/python/unitytrainers/ppo/models.py @@ -91,18 +91,34 @@ def create_curiosity_encoders(self): encoded_next_state_list.append(hidden_next_visual) if self.o_size > 0: - # Create input op for next (t+1) vector observation. - self.next_vector_in = tf.placeholder(shape=[None, self.o_size], dtype=tf.float32, - name='next_vector_observation') # Create the encoder ops for current and next vector input. Not that these encoders are siamese. - encoded_vector_obs = self.create_continuous_observation_encoder(self.vector_in, - self.curiosity_enc_size, - self.swish, 2, "vector_obs_encoder", False) - encoded_next_vector_obs = self.create_continuous_observation_encoder(self.next_vector_in, - self.curiosity_enc_size, - self.swish, 2, "vector_obs_encoder", - True) + if self.brain.vector_observation_space_type == "continuous": + # Create input op for next (t+1) vector observation. + self.next_vector_in = tf.placeholder(shape=[None, self.o_size], dtype=tf.float32, + name='next_vector_observation') + + encoded_vector_obs = self.create_continuous_observation_encoder(self.vector_in, + self.curiosity_enc_size, + self.swish, 2, "vector_obs_encoder", + False) + encoded_next_vector_obs = self.create_continuous_observation_encoder(self.next_vector_in, + self.curiosity_enc_size, + self.swish, 2, + "vector_obs_encoder", + True) + else: + self.next_vector_in = tf.placeholder(shape=[None, 1], dtype=tf.int32, + name='next_vector_observation') + + encoded_vector_obs = self.create_discrete_observation_encoder(self.vector_in, self.o_size, + self.curiosity_enc_size, + self.swish, 2, "vector_obs_encoder", + False) + encoded_next_vector_obs = self.create_discrete_observation_encoder(self.next_vector_in, self.o_size, + self.curiosity_enc_size, + self.swish, 2, "vector_obs_encoder", + True) encoded_state_list.append(encoded_vector_obs) encoded_next_state_list.append(encoded_next_vector_obs) diff --git a/python/unitytrainers/ppo/trainer.py b/python/unitytrainers/ppo/trainer.py index fa583e7e07..549c5e9d63 100755 --- a/python/unitytrainers/ppo/trainer.py +++ b/python/unitytrainers/ppo/trainer.py @@ -432,6 +432,9 @@ def update_model(self): else: feed_dict[self.model.vector_in] = np.array(buffer['vector_obs'][start:end]).reshape( [-1, self.brain.num_stacked_vector_observations]) + if self.use_curiosity: + feed_dict[self.model.next_vector_in] = np.array(buffer['next_vector_in'][start:end]) \ + .reshape([-1, self.brain.num_stacked_vector_observations]) if self.use_visual_obs: for i, _ in enumerate(self.model.visual_in): _obs = np.array(buffer['visual_obs%d' % i][start:end])