From a4d81d3205fc14e5b0a42c24a1c2666110c98982 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Wed, 17 Jun 2020 12:40:42 -0700 Subject: [PATCH 1/9] update config 3dball --- config/ppo/3DBallHard.yaml | 4 ++-- config/sac/3DBall.yaml | 4 ++-- config/sac/3DBallHard.yaml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/config/ppo/3DBallHard.yaml b/config/ppo/3DBallHard.yaml index 6dc83100f2..c8a1ae74d0 100644 --- a/config/ppo/3DBallHard.yaml +++ b/config/ppo/3DBallHard.yaml @@ -5,7 +5,7 @@ behaviors: batch_size: 1200 buffer_size: 12000 learning_rate: 0.0003 - beta: 0.001 + beta: 0.0001 epsilon: 0.2 lambd: 0.95 num_epoch: 3 @@ -20,7 +20,7 @@ behaviors: gamma: 0.995 strength: 1.0 keep_checkpoints: 5 - max_steps: 5000000 + max_steps: 2000000 time_horizon: 1000 summary_freq: 12000 threaded: true diff --git a/config/sac/3DBall.yaml b/config/sac/3DBall.yaml index 0458c03070..e6f91bc548 100644 --- a/config/sac/3DBall.yaml +++ b/config/sac/3DBall.yaml @@ -5,7 +5,7 @@ behaviors: learning_rate: 0.0003 learning_rate_schedule: constant batch_size: 64 - buffer_size: 12000 + buffer_size: 200000 buffer_init_steps: 0 tau: 0.005 steps_per_update: 10.0 @@ -22,7 +22,7 @@ behaviors: gamma: 0.99 strength: 1.0 keep_checkpoints: 5 - max_steps: 500000 + max_steps: 200000 time_horizon: 1000 summary_freq: 12000 threaded: true diff --git a/config/sac/3DBallHard.yaml b/config/sac/3DBallHard.yaml index 511f375bc2..e34bb93a79 100644 --- a/config/sac/3DBallHard.yaml +++ b/config/sac/3DBallHard.yaml @@ -5,7 +5,7 @@ behaviors: learning_rate: 0.0003 learning_rate_schedule: constant batch_size: 256 - buffer_size: 50000 + buffer_size: 500000 buffer_init_steps: 0 tau: 0.005 steps_per_update: 10.0 From e5bba80de5ba1c20fa64a2972335cb9a3aedf54f Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Wed, 17 Jun 2020 13:38:57 -0700 Subject: [PATCH 2/9] zeroing out beta ppo 3dballhard --- config/ppo/3DBallHard.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/ppo/3DBallHard.yaml b/config/ppo/3DBallHard.yaml index c8a1ae74d0..c936c4cf84 100644 --- a/config/ppo/3DBallHard.yaml +++ b/config/ppo/3DBallHard.yaml @@ -5,7 +5,7 @@ behaviors: batch_size: 1200 buffer_size: 12000 learning_rate: 0.0003 - beta: 0.0001 + beta: 0.0 epsilon: 0.2 lambd: 0.95 num_epoch: 3 From 07ad85aa34b39fe33c39e61c9aa1d9b0cf5d97fd Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Wed, 17 Jun 2020 15:12:36 -0700 Subject: [PATCH 3/9] reduce batch size --- config/ppo/3DBallHard.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/ppo/3DBallHard.yaml b/config/ppo/3DBallHard.yaml index c936c4cf84..02f5d26851 100644 --- a/config/ppo/3DBallHard.yaml +++ b/config/ppo/3DBallHard.yaml @@ -2,10 +2,10 @@ behaviors: 3DBallHard: trainer_type: ppo hyperparameters: - batch_size: 1200 + batch_size: 120 buffer_size: 12000 learning_rate: 0.0003 - beta: 0.0 + beta: 0.001 epsilon: 0.2 lambd: 0.95 num_epoch: 3 From 2c113f48bd728aad05c38223eaa084d6886b61a2 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Wed, 17 Jun 2020 17:00:23 -0700 Subject: [PATCH 4/9] 3dball hard ppo 1M timesteps --- config/ppo/3DBallHard.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/ppo/3DBallHard.yaml b/config/ppo/3DBallHard.yaml index 02f5d26851..207cdd219c 100644 --- a/config/ppo/3DBallHard.yaml +++ b/config/ppo/3DBallHard.yaml @@ -20,7 +20,7 @@ behaviors: gamma: 0.995 strength: 1.0 keep_checkpoints: 5 - max_steps: 2000000 + max_steps: 1000000 time_horizon: 1000 summary_freq: 12000 threaded: true From 5eb188b6f888e9a5a60b3964355eb048f340914f Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Wed, 17 Jun 2020 17:02:16 -0700 Subject: [PATCH 5/9] revert 1M timestep commit --- config/ppo/3DBallHard.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/ppo/3DBallHard.yaml b/config/ppo/3DBallHard.yaml index 207cdd219c..02f5d26851 100644 --- a/config/ppo/3DBallHard.yaml +++ b/config/ppo/3DBallHard.yaml @@ -20,7 +20,7 @@ behaviors: gamma: 0.995 strength: 1.0 keep_checkpoints: 5 - max_steps: 1000000 + max_steps: 2000000 time_horizon: 1000 summary_freq: 12000 threaded: true From e13c4da68b3bb7c4f52a27206786dc801b68d4e3 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Thu, 18 Jun 2020 08:41:42 -0700 Subject: [PATCH 6/9] reduce gamma --- config/ppo/3DBallHard.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/ppo/3DBallHard.yaml b/config/ppo/3DBallHard.yaml index 02f5d26851..5d10be7ebe 100644 --- a/config/ppo/3DBallHard.yaml +++ b/config/ppo/3DBallHard.yaml @@ -17,7 +17,7 @@ behaviors: vis_encode_type: simple reward_signals: extrinsic: - gamma: 0.995 + gamma: 0.99 strength: 1.0 keep_checkpoints: 5 max_steps: 2000000 From 0f38c36ba724a64a3fde80984e0ca3654321b879 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Sat, 20 Jun 2020 08:49:22 -0700 Subject: [PATCH 7/9] learning rate to constant --- config/ppo/3DBallHard.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/ppo/3DBallHard.yaml b/config/ppo/3DBallHard.yaml index 5d10be7ebe..87c532dc5d 100644 --- a/config/ppo/3DBallHard.yaml +++ b/config/ppo/3DBallHard.yaml @@ -9,7 +9,7 @@ behaviors: epsilon: 0.2 lambd: 0.95 num_epoch: 3 - learning_rate_schedule: linear + learning_rate_schedule: constant network_settings: normalize: true hidden_units: 128 From 3b41c2eac792b12aaaf55a2f4dbb7fedae6ef4e9 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Sat, 20 Jun 2020 11:34:25 -0700 Subject: [PATCH 8/9] reduce steps --- config/ppo/3DBallHard.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/ppo/3DBallHard.yaml b/config/ppo/3DBallHard.yaml index 87c532dc5d..89703ecc0d 100644 --- a/config/ppo/3DBallHard.yaml +++ b/config/ppo/3DBallHard.yaml @@ -20,7 +20,7 @@ behaviors: gamma: 0.99 strength: 1.0 keep_checkpoints: 5 - max_steps: 2000000 + max_steps: 500000 time_horizon: 1000 summary_freq: 12000 threaded: true From fc59204c6da43563d59862ea03e41f909c513650 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Sun, 21 Jun 2020 08:39:08 -0700 Subject: [PATCH 9/9] constant to linear LR --- config/ppo/3DBallHard.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/ppo/3DBallHard.yaml b/config/ppo/3DBallHard.yaml index 89703ecc0d..ca921e26f2 100644 --- a/config/ppo/3DBallHard.yaml +++ b/config/ppo/3DBallHard.yaml @@ -9,7 +9,7 @@ behaviors: epsilon: 0.2 lambd: 0.95 num_epoch: 3 - learning_rate_schedule: constant + learning_rate_schedule: linear network_settings: normalize: true hidden_units: 128