diff --git a/python/trainer_config.yaml b/python/trainer_config.yaml index d2ac3bca79..dd76dbf9a4 100644 --- a/python/trainer_config.yaml +++ b/python/trainer_config.yaml @@ -130,12 +130,6 @@ Ball3DHardBrain: gamma: 0.995 beta: 0.001 -BouncerBrain: - normalize: true - max_steps: 5.0e5 - num_layers: 2 - hidden_units: 56 - TennisBrain: normalize: true diff --git a/python/unitytrainers/ppo/trainer.py b/python/unitytrainers/ppo/trainer.py index 549c5e9d63..482d40180a 100755 --- a/python/unitytrainers/ppo/trainer.py +++ b/python/unitytrainers/ppo/trainer.py @@ -204,6 +204,9 @@ def generate_intrinsic_rewards(self, curr_info, next_info): :return: Intrinsic rewards for all agents. """ if self.use_curiosity: + if curr_info.agents != next_info.agents: + raise UnityTrainerException("Training with Curiosity-driven exploration" + " and On-Demand Decision making is currently not supported.") feed_dict = {self.model.batch_size: len(curr_info.vector_observations), self.model.sequence_length: 1} if self.is_continuous_action: feed_dict[self.model.output] = next_info.previous_vector_actions