diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index 401dc4b1fb..3e2b913207 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -254,7 +254,7 @@ def create_torch_policy( behavior_spec, self.trainer_settings, condition_sigma_on_obs=False, # Faster training for PPO - separate_critic=behavior_spec.action_spec.is_continuous(), + separate_critic=True, # Match network architecture with TF ) return policy