From 95a51cd005fce59392c0615a638425b204650e45 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Mon, 26 Oct 2020 16:27:39 -0700
Subject: [PATCH 1/4] use int64 steps, check for NaN actions

still needs tests and handle torch
---
 ml-agents/mlagents/trainers/policy/tf_policy.py        | 10 +++++++++-
 ml-agents/mlagents/trainers/tf/models.py               |  8 ++++----
 .../mlagents/trainers/tf/tensorflow_to_barracuda.py    |  2 ++
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py
index 47789d1e92..80c36c47c2 100644
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
@@ -270,8 +270,16 @@ def get_action(
         )
 
         self.save_memories(global_agent_ids, run_out.get("memory_out"))
+        action = run_out.get("action")
+        # Fast NaN check on the action
+        # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background.
+        d = np.sum(action)
+        has_nan = np.isnan(d)
+        if has_nan:
+            raise RuntimeError("NaN action detected.")
+
         return ActionInfo(
-            action=run_out.get("action"),
+            action=action,
             value=run_out.get("value"),
             outputs=run_out,
             agent_ids=decision_requests.agent_id,
diff --git a/ml-agents/mlagents/trainers/tf/models.py b/ml-agents/mlagents/trainers/tf/models.py
index 679687ea19..8cbf08b866 100644
--- a/ml-agents/mlagents/trainers/tf/models.py
+++ b/ml-agents/mlagents/trainers/tf/models.py
@@ -42,10 +42,10 @@ class ModelUtils:
     def create_global_steps():
         """Creates TF ops to track and increment global training step."""
         global_step = tf.Variable(
-            0, name="global_step", trainable=False, dtype=tf.int32
+            0, name="global_step", trainable=False, dtype=tf.int64
         )
         steps_to_increment = tf.placeholder(
-            shape=[], dtype=tf.int32, name="steps_to_increment"
+            shape=[], dtype=tf.int64, name="steps_to_increment"
         )
         increment_step = tf.assign(global_step, tf.add(global_step, steps_to_increment))
         return global_step, increment_step, steps_to_increment
@@ -195,7 +195,7 @@ def create_normalizer(vector_obs: tf.Tensor) -> NormalizerTensors:
             "normalization_steps",
             [],
             trainable=False,
-            dtype=tf.int32,
+            dtype=tf.int64,
             initializer=tf.zeros_initializer(),
         )
         running_mean = tf.get_variable(
@@ -244,7 +244,7 @@ def create_normalizer_update(
         # Based on Welford's algorithm for running mean and standard deviation, for batch updates. Discussion here:
         # https://stackoverflow.com/questions/56402955/whats-the-formula-for-welfords-algorithm-for-variance-std-with-batch-updates
         steps_increment = tf.shape(vector_input)[0]
-        total_new_steps = tf.add(steps, steps_increment)
+        total_new_steps = tf.add(steps, tf.cast(steps_increment, dtype=tf.int64))
 
         # Compute the incremental update and divide by the number of new steps.
         input_to_old_mean = tf.subtract(vector_input, running_mean)
diff --git a/ml-agents/mlagents/trainers/tf/tensorflow_to_barracuda.py b/ml-agents/mlagents/trainers/tf/tensorflow_to_barracuda.py
index 80d5371812..59838bf021 100644
--- a/ml-agents/mlagents/trainers/tf/tensorflow_to_barracuda.py
+++ b/ml-agents/mlagents/trainers/tf/tensorflow_to_barracuda.py
@@ -702,6 +702,8 @@ def get_tensor_data(tensor):
         data = tensor.float_val
     if tensor.int_val:
         data = np.array(tensor.int_val, dtype=float)
+    if tensor.int64_val:
+        data = np.array(tensor.int64_val, dtype=float)
     if tensor.bool_val:
         data = np.array(tensor.bool_val, dtype=float)
     return np.array(data).reshape(dims)

From 3d0607bb11cfae0c11461a6178e26a879383c3e3 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Mon, 26 Oct 2020 17:37:25 -0700
Subject: [PATCH 2/4] fix unit test

---
 ml-agents/mlagents/trainers/policy/tf_policy.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py
index 80c36c47c2..f10299dd90 100644
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
@@ -273,10 +273,11 @@ def get_action(
         action = run_out.get("action")
         # Fast NaN check on the action
         # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background.
-        d = np.sum(action)
-        has_nan = np.isnan(d)
-        if has_nan:
-            raise RuntimeError("NaN action detected.")
+        if action is not None:
+            d = np.sum(action)
+            has_nan = np.isnan(d)
+            if has_nan:
+                raise RuntimeError("NaN action detected.")
 
         return ActionInfo(
             action=action,

From bcc0ba00e9c6c91d0521333d4841d929d8005479 Mon Sep 17 00:00:00 2001
From: Ruo-Ping Dong <ruoping.dong@unity3d.com>
Date: Thu, 12 Nov 2020 18:35:00 -0500
Subject: [PATCH 3/4] Check int overflow/ nan action for torch and add tests
 (#4646)

* check nan action for torch

* step overflow test

* use int tensor for global step in torch
---
 ml-agents/mlagents/trainers/policy/policy.py  | 10 ++++++++++
 .../mlagents/trainers/policy/tf_policy.py     | 11 ++--------
 .../mlagents/trainers/policy/torch_policy.py  |  1 +
 .../tests/tensorflow/test_nn_policy.py        | 20 +++++++++++++++++++
 .../trainers/tests/torch/test_policy.py       |  8 ++++++++
 ml-agents/mlagents/trainers/torch/networks.py |  4 +++-
 6 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/ml-agents/mlagents/trainers/policy/policy.py b/ml-agents/mlagents/trainers/policy/policy.py
index 0e23f10ec8..833caecf9f 100644
--- a/ml-agents/mlagents/trainers/policy/policy.py
+++ b/ml-agents/mlagents/trainers/policy/policy.py
@@ -132,6 +132,16 @@ def get_action(
     ) -> ActionInfo:
         raise NotImplementedError
 
+    @staticmethod
+    def check_nan_action(action: Optional[np.ndarray]) -> None:
+        # Fast NaN check on the action
+        # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background.
+        if action is not None:
+            d = np.sum(action)
+            has_nan = np.isnan(d)
+            if has_nan:
+                raise RuntimeError("NaN action detected.")
+
     @abstractmethod
     def update_normalization(self, vector_obs: np.ndarray) -> None:
         pass
diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py
index f10299dd90..7c35d01005 100644
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
@@ -270,17 +270,10 @@ def get_action(
         )
 
         self.save_memories(global_agent_ids, run_out.get("memory_out"))
-        action = run_out.get("action")
-        # Fast NaN check on the action
-        # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background.
-        if action is not None:
-            d = np.sum(action)
-            has_nan = np.isnan(d)
-            if has_nan:
-                raise RuntimeError("NaN action detected.")
+        self.check_nan_action(run_out.get("action"))
 
         return ActionInfo(
-            action=action,
+            action=run_out.get("action"),
             value=run_out.get("value"),
             outputs=run_out,
             agent_ids=decision_requests.agent_id,
diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py
index 7e7fe521d5..5e6e07b674 100644
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
@@ -229,6 +229,7 @@ def get_action(
             decision_requests, global_agent_ids
         )  # pylint: disable=assignment-from-no-return
         self.save_memories(global_agent_ids, run_out.get("memory_out"))
+        self.check_nan_action(run_out.get("action"))
         return ActionInfo(
             action=run_out.get("action"),
             value=run_out.get("value"),
diff --git a/ml-agents/mlagents/trainers/tests/tensorflow/test_nn_policy.py b/ml-agents/mlagents/trainers/tests/tensorflow/test_nn_policy.py
index 3308df44ec..6134619e8e 100644
--- a/ml-agents/mlagents/trainers/tests/tensorflow/test_nn_policy.py
+++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_nn_policy.py
@@ -265,5 +265,25 @@ def test_min_visual_size():
                 enc_func(vis_input, 32, ModelUtils.swish, 1, "test", False)
 
 
+def test_step_overflow():
+    behavior_spec = mb.setup_test_behavior_specs(
+        use_discrete=True, use_visual=False, vector_action_space=[2], vector_obs_space=1
+    )
+
+    policy = TFPolicy(
+        0,
+        behavior_spec,
+        TrainerSettings(network_settings=NetworkSettings(normalize=True)),
+        create_tf_graph=False,
+    )
+    policy.create_input_placeholders()
+    policy.initialize()
+
+    policy.set_step(2 ** 31 - 1)
+    assert policy.get_current_step() == 2 ** 31 - 1
+    policy.increment_step(3)
+    assert policy.get_current_step() == 2 ** 31 + 2
+
+
 if __name__ == "__main__":
     pytest.main()
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_policy.py b/ml-agents/mlagents/trainers/tests/torch/test_policy.py
index 192d0dd229..b21af43fa5 100644
--- a/ml-agents/mlagents/trainers/tests/torch/test_policy.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_policy.py
@@ -140,3 +140,11 @@ def test_sample_actions(rnn, visual, discrete):
 
     if rnn:
         assert memories.shape == (1, 1, policy.m_size)
+
+
+def test_step_overflow():
+    policy = create_policy_mock(TrainerSettings())
+    policy.set_step(2 ** 31 - 1)
+    assert policy.get_current_step() == 2 ** 31 - 1  # step = 2147483647
+    policy.increment_step(3)
+    assert policy.get_current_step() == 2 ** 31 + 2  # step = 2147483650
diff --git a/ml-agents/mlagents/trainers/torch/networks.py b/ml-agents/mlagents/trainers/torch/networks.py
index b89029e404..e9d4a5d96f 100644
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
@@ -488,7 +488,9 @@ def update_normalization(self, vector_obs: List[torch.Tensor]) -> None:
 class GlobalSteps(nn.Module):
     def __init__(self):
         super().__init__()
-        self.__global_step = nn.Parameter(torch.Tensor([0]), requires_grad=False)
+        self.__global_step = nn.Parameter(
+            torch.Tensor([0]).to(torch.int64), requires_grad=False
+        )
 
     @property
     def current_step(self):

From 0c9d4b8560f77501c810f8951d05fd29c249381a Mon Sep 17 00:00:00 2001
From: Ruo-Ping Dong <ruoping.dong@unity3d.com>
Date: Fri, 13 Nov 2020 11:29:26 -0800
Subject: [PATCH 4/4] Update changelog

---
 com.unity.ml-agents/CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index 59030104a8..a9bb1b6b58 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -47,6 +47,8 @@ if they are called recursively (for example, if they call `Agent.EndEpisode()`).
 Previously, this would result in an infinite loop and cause the editor to hang. (#4573)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - Fixed an issue where runs could not be resumed when using TensorFlow and Ghost Training. (#4593)
+- Change the tensor type of step count from int32 to int64 to address the overflow issue when step
+goes larger than 2^31. Previous Tensorflow checkpoints will become incompatible and cannot be loaded. (#4607)
 
 
 ## [1.5.0-preview] - 2020-10-14