diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index d8864d7552..e9e917f1ef 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -422,6 +422,7 @@ def update_policy(self): number_experiences=len(self.training_buffer.update_buffer["actions"]), mean_return=float(np.mean(self.cumulative_returns_since_policy_update)), ) + self.cumulative_returns_since_policy_update = [] n_sequences = max( int(self.trainer_parameters["batch_size"] / self.policy.sequence_length), 1 )