Skip to content

Commit

Permalink
Merge pull request #4872 from Unity-Technologies/fix-numti-env-delaye…
Browse files Browse the repository at this point in the history
…d-spawn

[Bug Fix] Fix crash if spawn is delayed in multi-env
  • Loading branch information
vincentpierre authored Jan 22, 2021
2 parents 78c3f31 + 9a99962 commit fd0e092
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 2 deletions.
1 change: 1 addition & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ removed when training with a player. The Editor still requires it to be clamped
- Fix a compile warning about using an obsolete enum in `GrpcExtensions.cs`. (#4812)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- Fixed a bug that would cause an exception when `RunOptions` was deserialized via `pickle`. (#4842)
- Fixed a bug that can cause a crash if a behavior can appear during training in multi-environment training. (#4872)
- Fixed the computation of entropy for continuous actions. (#4869)


Expand Down
7 changes: 5 additions & 2 deletions ml-agents/mlagents/trainers/subprocess_env_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,11 @@ def set_env_parameters(self, config: Dict = None) -> None:

@property
def training_behaviors(self) -> Dict[BehaviorName, BehaviorSpec]:
self.env_workers[0].send(EnvironmentCommand.BEHAVIOR_SPECS)
return self.env_workers[0].recv().payload
result: Dict[BehaviorName, BehaviorSpec] = {}
for worker in self.env_workers:
worker.send(EnvironmentCommand.BEHAVIOR_SPECS)
result.update(worker.recv().payload)
return result

def close(self) -> None:
logger.debug("SubprocessEnvManager closing.")
Expand Down
27 changes: 27 additions & 0 deletions ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,33 @@ def test_reset_collects_results_from_all_envs(self, mock_create_worker):
)
assert res == list(map(lambda ew: ew.previous_step, manager.env_workers))

@mock.patch(
"mlagents.trainers.subprocess_env_manager.SubprocessEnvManager.create_worker"
)
def test_training_behaviors_collects_results_from_all_envs(
self, mock_create_worker
):
def create_worker_mock(worker_id, step_queue, env_factor, engine_c):
return MockEnvWorker(
worker_id,
EnvironmentResponse(
EnvironmentCommand.RESET, worker_id, {f"key{worker_id}": worker_id}
),
)

mock_create_worker.side_effect = create_worker_mock
manager = SubprocessEnvManager(
mock_env_factory, EngineConfig.default_config(), 4
)

res = manager.training_behaviors
for env in manager.env_workers:
env.send.assert_called_with(EnvironmentCommand.BEHAVIOR_SPECS)
env.recv.assert_called()
for worker_id in range(4):
assert f"key{worker_id}" in res
assert res[f"key{worker_id}"] == worker_id

@mock.patch(
"mlagents.trainers.subprocess_env_manager.SubprocessEnvManager.create_worker"
)
Expand Down

0 comments on commit fd0e092

Please sign in to comment.