From af07a7df25651e711ef1be0fda125af3a4c694b0 Mon Sep 17 00:00:00 2001
From: Ervin Teng <ervin@unity3d.com>
Date: Mon, 10 Jun 2019 18:35:22 -0700
Subject: [PATCH] Clear cumulative_returns_since_policy_update

Before the CSV file's mean rewards would lag much behind the rest of the code since this buffer was never cleared.
---
 ml-agents/mlagents/trainers/ppo/trainer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index d8864d7552..e9e917f1ef 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -422,6 +422,7 @@ def update_policy(self):
             number_experiences=len(self.training_buffer.update_buffer["actions"]),
             mean_return=float(np.mean(self.cumulative_returns_since_policy_update)),
         )
+        self.cumulative_returns_since_policy_update = []
         n_sequences = max(
             int(self.trainer_parameters["batch_size"] / self.policy.sequence_length), 1
         )