diff --git a/config/ppo/Reacher.yaml b/config/ppo/Reacher.yaml index 69c821cdd6..ba2274b170 100644 --- a/config/ppo/Reacher.yaml +++ b/config/ppo/Reacher.yaml @@ -2,10 +2,10 @@ behaviors: Reacher: trainer_type: ppo hyperparameters: - batch_size: 2024 - buffer_size: 20240 + batch_size: 512 + buffer_size: 20480 learning_rate: 0.0003 - beta: 0.005 + beta: 0.001 epsilon: 0.2 lambd: 0.95 num_epoch: 3 diff --git a/config/sac/Pyramids.yaml b/config/sac/Pyramids.yaml index cd764a4d34..b0797df503 100644 --- a/config/sac/Pyramids.yaml +++ b/config/sac/Pyramids.yaml @@ -5,8 +5,8 @@ behaviors: learning_rate: 0.0003 learning_rate_schedule: constant batch_size: 128 - buffer_size: 500000 - buffer_init_steps: 10000 + buffer_size: 2000000 + buffer_init_steps: 1000 tau: 0.01 steps_per_update: 10.0 save_replay_buffer: false @@ -14,23 +14,23 @@ behaviors: reward_signal_steps_per_update: 10.0 network_settings: normalize: false - hidden_units: 256 - num_layers: 2 + hidden_units: 512 + num_layers: 3 vis_encode_type: simple reward_signals: extrinsic: - gamma: 0.99 + gamma: 0.995 strength: 2.0 gail: gamma: 0.99 - strength: 0.02 + strength: 0.01 encoding_size: 128 learning_rate: 0.0003 use_actions: true use_vail: false demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo keep_checkpoints: 5 - max_steps: 10000000 + max_steps: 3000000 time_horizon: 128 summary_freq: 30000 threaded: true diff --git a/config/sac/WalkerStatic.yaml b/config/sac/WalkerStatic.yaml index ef61a8c054..2274054260 100644 --- a/config/sac/WalkerStatic.yaml +++ b/config/sac/WalkerStatic.yaml @@ -4,8 +4,8 @@ behaviors: hyperparameters: learning_rate: 0.0003 learning_rate_schedule: constant - batch_size: 256 - buffer_size: 500000 + batch_size: 1024 + buffer_size: 2000000 buffer_init_steps: 0 tau: 0.005 steps_per_update: 30.0 @@ -14,15 +14,15 @@ behaviors: reward_signal_steps_per_update: 30.0 network_settings: normalize: true - hidden_units: 512 - num_layers: 4 + hidden_units: 256 + num_layers: 3 vis_encode_type: simple reward_signals: extrinsic: gamma: 0.995 strength: 1.0 keep_checkpoints: 5 - max_steps: 20000000 + max_steps: 15000000 time_horizon: 1000 summary_freq: 30000 threaded: true