From 95a51cd005fce59392c0615a638425b204650e45 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Mon, 26 Oct 2020 16:27:39 -0700 Subject: [PATCH 1/4] use int64 steps, check for NaN actions still needs tests and handle torch --- ml-agents/mlagents/trainers/policy/tf_policy.py | 10 +++++++++- ml-agents/mlagents/trainers/tf/models.py | 8 ++++---- .../mlagents/trainers/tf/tensorflow_to_barracuda.py | 2 ++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py index 47789d1e92..80c36c47c2 100644 --- a/ml-agents/mlagents/trainers/policy/tf_policy.py +++ b/ml-agents/mlagents/trainers/policy/tf_policy.py @@ -270,8 +270,16 @@ def get_action( ) self.save_memories(global_agent_ids, run_out.get("memory_out")) + action = run_out.get("action") + # Fast NaN check on the action + # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background. + d = np.sum(action) + has_nan = np.isnan(d) + if has_nan: + raise RuntimeError("NaN action detected.") + return ActionInfo( - action=run_out.get("action"), + action=action, value=run_out.get("value"), outputs=run_out, agent_ids=decision_requests.agent_id, diff --git a/ml-agents/mlagents/trainers/tf/models.py b/ml-agents/mlagents/trainers/tf/models.py index 679687ea19..8cbf08b866 100644 --- a/ml-agents/mlagents/trainers/tf/models.py +++ b/ml-agents/mlagents/trainers/tf/models.py @@ -42,10 +42,10 @@ class ModelUtils: def create_global_steps(): """Creates TF ops to track and increment global training step.""" global_step = tf.Variable( - 0, name="global_step", trainable=False, dtype=tf.int32 + 0, name="global_step", trainable=False, dtype=tf.int64 ) steps_to_increment = tf.placeholder( - shape=[], dtype=tf.int32, name="steps_to_increment" + shape=[], dtype=tf.int64, name="steps_to_increment" ) increment_step = tf.assign(global_step, tf.add(global_step, steps_to_increment)) return global_step, increment_step, steps_to_increment @@ -195,7 +195,7 @@ def create_normalizer(vector_obs: tf.Tensor) -> NormalizerTensors: "normalization_steps", [], trainable=False, - dtype=tf.int32, + dtype=tf.int64, initializer=tf.zeros_initializer(), ) running_mean = tf.get_variable( @@ -244,7 +244,7 @@ def create_normalizer_update( # Based on Welford's algorithm for running mean and standard deviation, for batch updates. Discussion here: # https://stackoverflow.com/questions/56402955/whats-the-formula-for-welfords-algorithm-for-variance-std-with-batch-updates steps_increment = tf.shape(vector_input)[0] - total_new_steps = tf.add(steps, steps_increment) + total_new_steps = tf.add(steps, tf.cast(steps_increment, dtype=tf.int64)) # Compute the incremental update and divide by the number of new steps. input_to_old_mean = tf.subtract(vector_input, running_mean) diff --git a/ml-agents/mlagents/trainers/tf/tensorflow_to_barracuda.py b/ml-agents/mlagents/trainers/tf/tensorflow_to_barracuda.py index 80d5371812..59838bf021 100644 --- a/ml-agents/mlagents/trainers/tf/tensorflow_to_barracuda.py +++ b/ml-agents/mlagents/trainers/tf/tensorflow_to_barracuda.py @@ -702,6 +702,8 @@ def get_tensor_data(tensor): data = tensor.float_val if tensor.int_val: data = np.array(tensor.int_val, dtype=float) + if tensor.int64_val: + data = np.array(tensor.int64_val, dtype=float) if tensor.bool_val: data = np.array(tensor.bool_val, dtype=float) return np.array(data).reshape(dims) From 3d0607bb11cfae0c11461a6178e26a879383c3e3 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Mon, 26 Oct 2020 17:37:25 -0700 Subject: [PATCH 2/4] fix unit test --- ml-agents/mlagents/trainers/policy/tf_policy.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py index 80c36c47c2..f10299dd90 100644 --- a/ml-agents/mlagents/trainers/policy/tf_policy.py +++ b/ml-agents/mlagents/trainers/policy/tf_policy.py @@ -273,10 +273,11 @@ def get_action( action = run_out.get("action") # Fast NaN check on the action # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background. - d = np.sum(action) - has_nan = np.isnan(d) - if has_nan: - raise RuntimeError("NaN action detected.") + if action is not None: + d = np.sum(action) + has_nan = np.isnan(d) + if has_nan: + raise RuntimeError("NaN action detected.") return ActionInfo( action=action, From bcc0ba00e9c6c91d0521333d4841d929d8005479 Mon Sep 17 00:00:00 2001 From: Ruo-Ping Dong Date: Thu, 12 Nov 2020 18:35:00 -0500 Subject: [PATCH 3/4] Check int overflow/ nan action for torch and add tests (#4646) * check nan action for torch * step overflow test * use int tensor for global step in torch --- ml-agents/mlagents/trainers/policy/policy.py | 10 ++++++++++ .../mlagents/trainers/policy/tf_policy.py | 11 ++-------- .../mlagents/trainers/policy/torch_policy.py | 1 + .../tests/tensorflow/test_nn_policy.py | 20 +++++++++++++++++++ .../trainers/tests/torch/test_policy.py | 8 ++++++++ ml-agents/mlagents/trainers/torch/networks.py | 4 +++- 6 files changed, 44 insertions(+), 10 deletions(-) diff --git a/ml-agents/mlagents/trainers/policy/policy.py b/ml-agents/mlagents/trainers/policy/policy.py index 0e23f10ec8..833caecf9f 100644 --- a/ml-agents/mlagents/trainers/policy/policy.py +++ b/ml-agents/mlagents/trainers/policy/policy.py @@ -132,6 +132,16 @@ def get_action( ) -> ActionInfo: raise NotImplementedError + @staticmethod + def check_nan_action(action: Optional[np.ndarray]) -> None: + # Fast NaN check on the action + # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background. + if action is not None: + d = np.sum(action) + has_nan = np.isnan(d) + if has_nan: + raise RuntimeError("NaN action detected.") + @abstractmethod def update_normalization(self, vector_obs: np.ndarray) -> None: pass diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py index f10299dd90..7c35d01005 100644 --- a/ml-agents/mlagents/trainers/policy/tf_policy.py +++ b/ml-agents/mlagents/trainers/policy/tf_policy.py @@ -270,17 +270,10 @@ def get_action( ) self.save_memories(global_agent_ids, run_out.get("memory_out")) - action = run_out.get("action") - # Fast NaN check on the action - # See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background. - if action is not None: - d = np.sum(action) - has_nan = np.isnan(d) - if has_nan: - raise RuntimeError("NaN action detected.") + self.check_nan_action(run_out.get("action")) return ActionInfo( - action=action, + action=run_out.get("action"), value=run_out.get("value"), outputs=run_out, agent_ids=decision_requests.agent_id, diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py index 7e7fe521d5..5e6e07b674 100644 --- a/ml-agents/mlagents/trainers/policy/torch_policy.py +++ b/ml-agents/mlagents/trainers/policy/torch_policy.py @@ -229,6 +229,7 @@ def get_action( decision_requests, global_agent_ids ) # pylint: disable=assignment-from-no-return self.save_memories(global_agent_ids, run_out.get("memory_out")) + self.check_nan_action(run_out.get("action")) return ActionInfo( action=run_out.get("action"), value=run_out.get("value"), diff --git a/ml-agents/mlagents/trainers/tests/tensorflow/test_nn_policy.py b/ml-agents/mlagents/trainers/tests/tensorflow/test_nn_policy.py index 3308df44ec..6134619e8e 100644 --- a/ml-agents/mlagents/trainers/tests/tensorflow/test_nn_policy.py +++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_nn_policy.py @@ -265,5 +265,25 @@ def test_min_visual_size(): enc_func(vis_input, 32, ModelUtils.swish, 1, "test", False) +def test_step_overflow(): + behavior_spec = mb.setup_test_behavior_specs( + use_discrete=True, use_visual=False, vector_action_space=[2], vector_obs_space=1 + ) + + policy = TFPolicy( + 0, + behavior_spec, + TrainerSettings(network_settings=NetworkSettings(normalize=True)), + create_tf_graph=False, + ) + policy.create_input_placeholders() + policy.initialize() + + policy.set_step(2 ** 31 - 1) + assert policy.get_current_step() == 2 ** 31 - 1 + policy.increment_step(3) + assert policy.get_current_step() == 2 ** 31 + 2 + + if __name__ == "__main__": pytest.main() diff --git a/ml-agents/mlagents/trainers/tests/torch/test_policy.py b/ml-agents/mlagents/trainers/tests/torch/test_policy.py index 192d0dd229..b21af43fa5 100644 --- a/ml-agents/mlagents/trainers/tests/torch/test_policy.py +++ b/ml-agents/mlagents/trainers/tests/torch/test_policy.py @@ -140,3 +140,11 @@ def test_sample_actions(rnn, visual, discrete): if rnn: assert memories.shape == (1, 1, policy.m_size) + + +def test_step_overflow(): + policy = create_policy_mock(TrainerSettings()) + policy.set_step(2 ** 31 - 1) + assert policy.get_current_step() == 2 ** 31 - 1 # step = 2147483647 + policy.increment_step(3) + assert policy.get_current_step() == 2 ** 31 + 2 # step = 2147483650 diff --git a/ml-agents/mlagents/trainers/torch/networks.py b/ml-agents/mlagents/trainers/torch/networks.py index b89029e404..e9d4a5d96f 100644 --- a/ml-agents/mlagents/trainers/torch/networks.py +++ b/ml-agents/mlagents/trainers/torch/networks.py @@ -488,7 +488,9 @@ def update_normalization(self, vector_obs: List[torch.Tensor]) -> None: class GlobalSteps(nn.Module): def __init__(self): super().__init__() - self.__global_step = nn.Parameter(torch.Tensor([0]), requires_grad=False) + self.__global_step = nn.Parameter( + torch.Tensor([0]).to(torch.int64), requires_grad=False + ) @property def current_step(self): From 0c9d4b8560f77501c810f8951d05fd29c249381a Mon Sep 17 00:00:00 2001 From: Ruo-Ping Dong Date: Fri, 13 Nov 2020 11:29:26 -0800 Subject: [PATCH 4/4] Update changelog --- com.unity.ml-agents/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 59030104a8..a9bb1b6b58 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -47,6 +47,8 @@ if they are called recursively (for example, if they call `Agent.EndEpisode()`). Previously, this would result in an infinite loop and cause the editor to hang. (#4573) #### ml-agents / ml-agents-envs / gym-unity (Python) - Fixed an issue where runs could not be resumed when using TensorFlow and Ghost Training. (#4593) +- Change the tensor type of step count from int32 to int64 to address the overflow issue when step +goes larger than 2^31. Previous Tensorflow checkpoints will become incompatible and cannot be loaded. (#4607) ## [1.5.0-preview] - 2020-10-14