From 585d884ed6de5d6aa1a982d262d6ddb04cd36325 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Tue, 16 Jun 2020 16:25:51 -0700 Subject: [PATCH 01/12] Better hyperparameters for WalkerStatic --- config/sac/WalkerStatic.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/config/sac/WalkerStatic.yaml b/config/sac/WalkerStatic.yaml index ef61a8c054..2274054260 100644 --- a/config/sac/WalkerStatic.yaml +++ b/config/sac/WalkerStatic.yaml @@ -4,8 +4,8 @@ behaviors: hyperparameters: learning_rate: 0.0003 learning_rate_schedule: constant - batch_size: 256 - buffer_size: 500000 + batch_size: 1024 + buffer_size: 2000000 buffer_init_steps: 0 tau: 0.005 steps_per_update: 30.0 @@ -14,15 +14,15 @@ behaviors: reward_signal_steps_per_update: 30.0 network_settings: normalize: true - hidden_units: 512 - num_layers: 4 + hidden_units: 256 + num_layers: 3 vis_encode_type: simple reward_signals: extrinsic: gamma: 0.995 strength: 1.0 keep_checkpoints: 5 - max_steps: 20000000 + max_steps: 15000000 time_horizon: 1000 summary_freq: 30000 threaded: true From b0fa7f27e9ca8fbe72b79dafe28ba743462ab169 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Tue, 16 Jun 2020 16:47:19 -0700 Subject: [PATCH 02/12] Lower beta for Reacher --- config/ppo/Reacher.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/ppo/Reacher.yaml b/config/ppo/Reacher.yaml index 69c821cdd6..55bedbb5d8 100644 --- a/config/ppo/Reacher.yaml +++ b/config/ppo/Reacher.yaml @@ -5,7 +5,7 @@ behaviors: batch_size: 2024 buffer_size: 20240 learning_rate: 0.0003 - beta: 0.005 + beta: 0.001 epsilon: 0.2 lambd: 0.95 num_epoch: 3 From 34b88cef8f4600277ca6ae024d1e6c4c8d38075f Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Tue, 16 Jun 2020 18:43:40 -0700 Subject: [PATCH 03/12] Experimental Pyramids config --- config/sac/Pyramids.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/config/sac/Pyramids.yaml b/config/sac/Pyramids.yaml index cd764a4d34..c11a57ce46 100644 --- a/config/sac/Pyramids.yaml +++ b/config/sac/Pyramids.yaml @@ -5,8 +5,8 @@ behaviors: learning_rate: 0.0003 learning_rate_schedule: constant batch_size: 128 - buffer_size: 500000 - buffer_init_steps: 10000 + buffer_size: 2000000 + buffer_init_steps: 1000 tau: 0.01 steps_per_update: 10.0 save_replay_buffer: false @@ -19,18 +19,18 @@ behaviors: vis_encode_type: simple reward_signals: extrinsic: - gamma: 0.99 + gamma: 0.995 strength: 2.0 gail: gamma: 0.99 - strength: 0.02 + strength: 0.01 encoding_size: 128 learning_rate: 0.0003 use_actions: true use_vail: false demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo keep_checkpoints: 5 - max_steps: 10000000 + max_steps: 2000000 time_horizon: 128 summary_freq: 30000 threaded: true From 2e4f45ae57a7ffac531e3294f07a4a300222d4d4 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Tue, 16 Jun 2020 19:42:33 -0700 Subject: [PATCH 04/12] Bigger networks --- config/sac/Pyramids.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/sac/Pyramids.yaml b/config/sac/Pyramids.yaml index c11a57ce46..ff93da16f2 100644 --- a/config/sac/Pyramids.yaml +++ b/config/sac/Pyramids.yaml @@ -14,8 +14,8 @@ behaviors: reward_signal_steps_per_update: 10.0 network_settings: normalize: false - hidden_units: 256 - num_layers: 2 + hidden_units: 512 + num_layers: 3 vis_encode_type: simple reward_signals: extrinsic: From 6106b09655e1cca4e17765262211df7ddd6441d3 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Tue, 16 Jun 2020 22:18:58 -0700 Subject: [PATCH 05/12] Use curiosity instead of GAIL --- config/sac/Pyramids.yaml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/config/sac/Pyramids.yaml b/config/sac/Pyramids.yaml index ff93da16f2..91f5f5d461 100644 --- a/config/sac/Pyramids.yaml +++ b/config/sac/Pyramids.yaml @@ -21,16 +21,21 @@ behaviors: extrinsic: gamma: 0.995 strength: 2.0 - gail: + curiosity: gamma: 0.99 - strength: 0.01 - encoding_size: 128 + strength: 0.02 + encoding_size: 256 learning_rate: 0.0003 - use_actions: true - use_vail: false - demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo + # gail: + # gamma: 0.99 + # strength: 0.01 + # encoding_size: 128 + # learning_rate: 0.0003 + # use_actions: true + # use_vail: false + # demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo keep_checkpoints: 5 - max_steps: 2000000 + max_steps: 5000000 time_horizon: 128 summary_freq: 30000 threaded: true From 9a1d9958f6753b961c5ea5802ab0f6283efc9561 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Tue, 16 Jun 2020 22:19:13 -0700 Subject: [PATCH 06/12] Bigger buffer size --- config/sac/Pyramids.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/sac/Pyramids.yaml b/config/sac/Pyramids.yaml index 91f5f5d461..760eed0993 100644 --- a/config/sac/Pyramids.yaml +++ b/config/sac/Pyramids.yaml @@ -5,7 +5,7 @@ behaviors: learning_rate: 0.0003 learning_rate_schedule: constant batch_size: 128 - buffer_size: 2000000 + buffer_size: 5000000 buffer_init_steps: 1000 tau: 0.01 steps_per_update: 10.0 From e24d49dfc261aa8ba57d04a2adac3a271f26a0ed Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 17 Jun 2020 16:15:32 -0700 Subject: [PATCH 07/12] Switch back to GAIL --- config/sac/Pyramids.yaml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/config/sac/Pyramids.yaml b/config/sac/Pyramids.yaml index 760eed0993..6f23eedbe6 100644 --- a/config/sac/Pyramids.yaml +++ b/config/sac/Pyramids.yaml @@ -21,19 +21,19 @@ behaviors: extrinsic: gamma: 0.995 strength: 2.0 - curiosity: - gamma: 0.99 - strength: 0.02 - encoding_size: 256 - learning_rate: 0.0003 - # gail: + # curiosity: # gamma: 0.99 - # strength: 0.01 - # encoding_size: 128 + # strength: 0.02 + # encoding_size: 256 # learning_rate: 0.0003 - # use_actions: true - # use_vail: false - # demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo + gail: + gamma: 0.99 + strength: 0.01 + encoding_size: 128 + learning_rate: 0.0003 + use_actions: true + use_vail: false + demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo keep_checkpoints: 5 max_steps: 5000000 time_horizon: 128 From ce74afda77cf7ac9991e2c72706334a704f2de19 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Thu, 18 Jun 2020 12:05:41 -0700 Subject: [PATCH 08/12] Reduce Reacher batch size --- config/ppo/Reacher.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/ppo/Reacher.yaml b/config/ppo/Reacher.yaml index 55bedbb5d8..ba2274b170 100644 --- a/config/ppo/Reacher.yaml +++ b/config/ppo/Reacher.yaml @@ -2,8 +2,8 @@ behaviors: Reacher: trainer_type: ppo hyperparameters: - batch_size: 2024 - buffer_size: 20240 + batch_size: 512 + buffer_size: 20480 learning_rate: 0.0003 beta: 0.001 epsilon: 0.2 From 583489e9335648d6428de53266f6c691264c3610 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Thu, 18 Jun 2020 12:11:32 -0700 Subject: [PATCH 09/12] Final Pyramids config --- config/sac/Pyramids.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/config/sac/Pyramids.yaml b/config/sac/Pyramids.yaml index 6f23eedbe6..b147779c7c 100644 --- a/config/sac/Pyramids.yaml +++ b/config/sac/Pyramids.yaml @@ -21,11 +21,6 @@ behaviors: extrinsic: gamma: 0.995 strength: 2.0 - # curiosity: - # gamma: 0.99 - # strength: 0.02 - # encoding_size: 256 - # learning_rate: 0.0003 gail: gamma: 0.99 strength: 0.01 From 2d9451250c33a31fc35413e5ce8a092d9adeb984 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Tue, 23 Jun 2020 17:17:55 -0700 Subject: [PATCH 10/12] Reduce Pyramids buffer size --- config/sac/Pyramids.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/sac/Pyramids.yaml b/config/sac/Pyramids.yaml index b147779c7c..3ddc09fc99 100644 --- a/config/sac/Pyramids.yaml +++ b/config/sac/Pyramids.yaml @@ -5,7 +5,7 @@ behaviors: learning_rate: 0.0003 learning_rate_schedule: constant batch_size: 128 - buffer_size: 5000000 + buffer_size: 2000000 buffer_init_steps: 1000 tau: 0.01 steps_per_update: 10.0 From c568601a773a11cdab3bffe09ab401578e38a961 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 24 Jun 2020 11:52:03 -0700 Subject: [PATCH 11/12] Increase buffer size for testing only --- config/sac/Pyramids.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/sac/Pyramids.yaml b/config/sac/Pyramids.yaml index 3ddc09fc99..b147779c7c 100644 --- a/config/sac/Pyramids.yaml +++ b/config/sac/Pyramids.yaml @@ -5,7 +5,7 @@ behaviors: learning_rate: 0.0003 learning_rate_schedule: constant batch_size: 128 - buffer_size: 2000000 + buffer_size: 5000000 buffer_init_steps: 1000 tau: 0.01 steps_per_update: 10.0 From cf9af0d7d7f94d535ff7f84fac324409397132c2 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Wed, 24 Jun 2020 19:55:17 -0700 Subject: [PATCH 12/12] Shorten Pyramids run --- config/sac/Pyramids.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/sac/Pyramids.yaml b/config/sac/Pyramids.yaml index b147779c7c..b0797df503 100644 --- a/config/sac/Pyramids.yaml +++ b/config/sac/Pyramids.yaml @@ -5,7 +5,7 @@ behaviors: learning_rate: 0.0003 learning_rate_schedule: constant batch_size: 128 - buffer_size: 5000000 + buffer_size: 2000000 buffer_init_steps: 1000 tau: 0.01 steps_per_update: 10.0 @@ -30,7 +30,7 @@ behaviors: use_vail: false demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo keep_checkpoints: 5 - max_steps: 5000000 + max_steps: 3000000 time_horizon: 128 summary_freq: 30000 threaded: true