From 49bb76dba6ed5cc1b3f15dba74e5923495e4b4ca Mon Sep 17 00:00:00 2001 From: dongruoping Date: Wed, 31 Jul 2019 18:22:09 -0700 Subject: [PATCH 1/2] fix create_buffer for dc --- ml-agents/mlagents/trainers/tests/mock_brain.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/tests/mock_brain.py b/ml-agents/mlagents/trainers/tests/mock_brain.py index 1fcfbbc710..cb4656482a 100644 --- a/ml-agents/mlagents/trainers/tests/mock_brain.py +++ b/ml-agents/mlagents/trainers/tests/mock_brain.py @@ -129,7 +129,10 @@ def create_buffer(brain_infos, brain_params, sequence_length): buffer[0]["prev_action"].append(current_brain_info.previous_vector_actions[0]) buffer[0]["masks"].append(1.0) buffer[0]["advantages"].append(1.0) - buffer[0]["action_probs"].append(np.ones(buffer[0]["actions"][0].shape)) + if brain_params.vector_action_space_type == "discrete": + buffer[0]["action_probs"].append(np.ones(sum(brain_params.vector_action_space_size))) + else: + buffer[0]["action_probs"].append(np.ones(buffer[0]["actions"][0].shape)) buffer[0]["actions_pre"].append(np.ones(buffer[0]["actions"][0].shape)) buffer[0]["random_normal_epsilon"].append( np.ones(buffer[0]["actions"][0].shape) From 1bcc52864b2994ece36e6cedb757182984ddf566 Mon Sep 17 00:00:00 2001 From: dongruoping Date: Wed, 31 Jul 2019 18:25:30 -0700 Subject: [PATCH 2/2] reformat --- ml-agents/mlagents/trainers/tests/mock_brain.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/tests/mock_brain.py b/ml-agents/mlagents/trainers/tests/mock_brain.py index cb4656482a..75d7dfac33 100644 --- a/ml-agents/mlagents/trainers/tests/mock_brain.py +++ b/ml-agents/mlagents/trainers/tests/mock_brain.py @@ -130,7 +130,9 @@ def create_buffer(brain_infos, brain_params, sequence_length): buffer[0]["masks"].append(1.0) buffer[0]["advantages"].append(1.0) if brain_params.vector_action_space_type == "discrete": - buffer[0]["action_probs"].append(np.ones(sum(brain_params.vector_action_space_size))) + buffer[0]["action_probs"].append( + np.ones(sum(brain_params.vector_action_space_size)) + ) else: buffer[0]["action_probs"].append(np.ones(buffer[0]["actions"][0].shape)) buffer[0]["actions_pre"].append(np.ones(buffer[0]["actions"][0].shape))