Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ if they are called recursively (for example, if they call `Agent.EndEpisode()`).
Previously, this would result in an infinite loop and cause the editor to hang. (#4573)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- Fixed an issue where runs could not be resumed when using TensorFlow and Ghost Training. (#4593)
- Change the tensor type of step count from int32 to int64 to address the overflow issue when step
goes larger than 2^31. Previous Tensorflow checkpoints will become incompatible and cannot be loaded. (#4607)


## [1.5.0-preview] - 2020-10-14
Expand Down
10 changes: 10 additions & 0 deletions ml-agents/mlagents/trainers/policy/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,16 @@ def get_action(
) -> ActionInfo:
raise NotImplementedError

@staticmethod
def check_nan_action(action: Optional[np.ndarray]) -> None:
# Fast NaN check on the action
# See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background.
if action is not None:
d = np.sum(action)
has_nan = np.isnan(d)
if has_nan:
raise RuntimeError("NaN action detected.")

@abstractmethod
def update_normalization(self, vector_obs: np.ndarray) -> None:
pass
Expand Down
2 changes: 2 additions & 0 deletions ml-agents/mlagents/trainers/policy/tf_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,8 @@ def get_action(
)

self.save_memories(global_agent_ids, run_out.get("memory_out"))
self.check_nan_action(run_out.get("action"))

return ActionInfo(
action=run_out.get("action"),
value=run_out.get("value"),
Expand Down
1 change: 1 addition & 0 deletions ml-agents/mlagents/trainers/policy/torch_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ def get_action(
decision_requests, global_agent_ids
) # pylint: disable=assignment-from-no-return
self.save_memories(global_agent_ids, run_out.get("memory_out"))
self.check_nan_action(run_out.get("action"))
return ActionInfo(
action=run_out.get("action"),
value=run_out.get("value"),
Expand Down
20 changes: 20 additions & 0 deletions ml-agents/mlagents/trainers/tests/tensorflow/test_nn_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,5 +265,25 @@ def test_min_visual_size():
enc_func(vis_input, 32, ModelUtils.swish, 1, "test", False)


def test_step_overflow():
behavior_spec = mb.setup_test_behavior_specs(
use_discrete=True, use_visual=False, vector_action_space=[2], vector_obs_space=1
)

policy = TFPolicy(
0,
behavior_spec,
TrainerSettings(network_settings=NetworkSettings(normalize=True)),
create_tf_graph=False,
)
policy.create_input_placeholders()
policy.initialize()

policy.set_step(2 ** 31 - 1)
assert policy.get_current_step() == 2 ** 31 - 1
policy.increment_step(3)
assert policy.get_current_step() == 2 ** 31 + 2


if __name__ == "__main__":
pytest.main()
8 changes: 8 additions & 0 deletions ml-agents/mlagents/trainers/tests/torch/test_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,11 @@ def test_sample_actions(rnn, visual, discrete):

if rnn:
assert memories.shape == (1, 1, policy.m_size)


def test_step_overflow():
policy = create_policy_mock(TrainerSettings())
policy.set_step(2 ** 31 - 1)
assert policy.get_current_step() == 2 ** 31 - 1 # step = 2147483647
policy.increment_step(3)
assert policy.get_current_step() == 2 ** 31 + 2 # step = 2147483650
8 changes: 4 additions & 4 deletions ml-agents/mlagents/trainers/tf/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ class ModelUtils:
def create_global_steps():
"""Creates TF ops to track and increment global training step."""
global_step = tf.Variable(
0, name="global_step", trainable=False, dtype=tf.int32
0, name="global_step", trainable=False, dtype=tf.int64
)
steps_to_increment = tf.placeholder(
shape=[], dtype=tf.int32, name="steps_to_increment"
shape=[], dtype=tf.int64, name="steps_to_increment"
)
increment_step = tf.assign(global_step, tf.add(global_step, steps_to_increment))
return global_step, increment_step, steps_to_increment
Expand Down Expand Up @@ -195,7 +195,7 @@ def create_normalizer(vector_obs: tf.Tensor) -> NormalizerTensors:
"normalization_steps",
[],
trainable=False,
dtype=tf.int32,
dtype=tf.int64,
initializer=tf.zeros_initializer(),
)
running_mean = tf.get_variable(
Expand Down Expand Up @@ -244,7 +244,7 @@ def create_normalizer_update(
# Based on Welford's algorithm for running mean and standard deviation, for batch updates. Discussion here:
# https://stackoverflow.com/questions/56402955/whats-the-formula-for-welfords-algorithm-for-variance-std-with-batch-updates
steps_increment = tf.shape(vector_input)[0]
total_new_steps = tf.add(steps, steps_increment)
total_new_steps = tf.add(steps, tf.cast(steps_increment, dtype=tf.int64))

# Compute the incremental update and divide by the number of new steps.
input_to_old_mean = tf.subtract(vector_input, running_mean)
Expand Down
2 changes: 2 additions & 0 deletions ml-agents/mlagents/trainers/tf/tensorflow_to_barracuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,8 @@ def get_tensor_data(tensor):
data = tensor.float_val
if tensor.int_val:
data = np.array(tensor.int_val, dtype=float)
if tensor.int64_val:
data = np.array(tensor.int64_val, dtype=float)
if tensor.bool_val:
data = np.array(tensor.bool_val, dtype=float)
return np.array(data).reshape(dims)
Expand Down
4 changes: 3 additions & 1 deletion ml-agents/mlagents/trainers/torch/networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,9 @@ def update_normalization(self, vector_obs: List[torch.Tensor]) -> None:
class GlobalSteps(nn.Module):
def __init__(self):
super().__init__()
self.__global_step = nn.Parameter(torch.Tensor([0]), requires_grad=False)
self.__global_step = nn.Parameter(
torch.Tensor([0]).to(torch.int64), requires_grad=False
)

@property
def current_step(self):
Expand Down