From 2cebcd3a7061578d570715d158c7dd2162f6a0f3 Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Fri, 15 Jun 2018 15:35:18 -0700 Subject: [PATCH] Fix for visual observation w/ curiosity --- python/unitytrainers/ppo/models.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/unitytrainers/ppo/models.py b/python/unitytrainers/ppo/models.py index 12601c0a96..9cc6093b28 100644 --- a/python/unitytrainers/ppo/models.py +++ b/python/unitytrainers/ppo/models.py @@ -137,8 +137,9 @@ def create_forward_model(self, encoded_state, encoded_next_state): """ combined_input = tf.concat([encoded_state, self.selected_actions], axis=1) hidden = tf.layers.dense(combined_input, 256, activation=self.swish) - # We compare against the concatenation of all observation streams, hence `self.v_size+1`. - pred_next_state = tf.layers.dense(hidden, self.curiosity_enc_size * (self.v_size+1), activation=None) + # We compare against the concatenation of all observation streams, hence `self.v_size + int(self.o_size > 0)`. + pred_next_state = tf.layers.dense(hidden, self.curiosity_enc_size * (self.v_size + int(self.o_size > 0)), + activation=None) squared_difference = 0.5 * tf.reduce_sum(tf.squared_difference(pred_next_state, encoded_next_state), axis=1) self.intrinsic_reward = tf.clip_by_value(self.curiosity_strength * squared_difference, 0, 1)