diff --git a/UnitySDK/Assets/ML-Agents/Examples/Soccer/TFModels/SoccerTwos.bytes b/UnitySDK/Assets/ML-Agents/Examples/Soccer/TFModels/SoccerTwos.bytes new file mode 100644 index 0000000000..c1f3e59f93 Binary files /dev/null and b/UnitySDK/Assets/ML-Agents/Examples/Soccer/TFModels/SoccerTwos.bytes differ diff --git a/UnitySDK/Assets/ML-Agents/Examples/Soccer/TFModels/SoccerTwos.bytes.meta b/UnitySDK/Assets/ML-Agents/Examples/Soccer/TFModels/SoccerTwos.bytes.meta new file mode 100644 index 0000000000..680b8b1590 --- /dev/null +++ b/UnitySDK/Assets/ML-Agents/Examples/Soccer/TFModels/SoccerTwos.bytes.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 4856a334c6d4a4984ba1cc6610f31b20 +TextScriptImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/config/trainer_config.yaml b/config/trainer_config.yaml index e097ba57ad..cda281d4ef 100644 --- a/config/trainer_config.yaml +++ b/config/trainer_config.yaml @@ -67,10 +67,12 @@ BigWallBrain: normalize: false StrikerBrain: - max_steps: 1.0e5 + max_steps: 5.0e5 + learning_rate: 1e-3 batch_size: 128 - buffer_size: 2048 - beta: 5.0e-3 + num_epoch: 3 + buffer_size: 2000 + beta: 1.0e-2 hidden_units: 256 summary_freq: 2000 time_horizon: 128 @@ -78,10 +80,12 @@ StrikerBrain: normalize: false GoalieBrain: - max_steps: 1.0e5 - batch_size: 128 - buffer_size: 2048 - beta: 5.0e-3 + max_steps: 5.0e5 + learning_rate: 1e-3 + batch_size: 320 + num_epoch: 3 + buffer_size: 2000 + beta: 1.0e-2 hidden_units: 256 summary_freq: 2000 time_horizon: 128 diff --git a/ml-agents/mlagents/trainers/models.py b/ml-agents/mlagents/trainers/models.py index b2aec6890f..d97002b5e9 100644 --- a/ml-agents/mlagents/trainers/models.py +++ b/ml-agents/mlagents/trainers/models.py @@ -153,9 +153,10 @@ def create_discrete_action_masking_layer(all_logits, action_masks, action_size): action_idx = [0] + list(np.cumsum(action_size)) branches_logits = [all_logits[:, action_idx[i]:action_idx[i + 1]] for i in range(len(action_size))] branch_masks = [action_masks[:, action_idx[i]:action_idx[i + 1]] for i in range(len(action_size))] - raw_probs = [tf.multiply(tf.nn.softmax(branches_logits[k]), branch_masks[k]) + (1-branch_masks[k])*1.0e-10 + raw_probs = [tf.multiply(tf.nn.softmax(branches_logits[k]), branch_masks[k]) + 1.0e-10 for k in range(len(action_size))] - normalized_probs = [tf.divide(raw_probs[k], tf.reduce_sum(raw_probs[k], axis=1, keepdims=True)) + normalized_probs = [ + tf.divide(raw_probs[k], tf.reduce_sum(raw_probs[k] + 1.0e-10, axis=1, keepdims=True)) for k in range(len(action_size))] output = tf.concat([tf.multinomial(tf.log(normalized_probs[k]), 1) for k in range(len(action_size))], axis=1) return output, tf.concat([tf.log(normalized_probs[k]) for k in range(len(action_size))], axis=1)