forked from ikostrikov/pytorch-a2c-ppo-acktr-gail
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request ikostrikov#2 from avivt/master
sync with avivt
- Loading branch information
Showing
17 changed files
with
454 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,19 @@ | ||
python3 main.py --env-name "h_bandit-randchoose-v5" --algo ppo --log-interval 25 --num-steps 100 --num-processes 10 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 10 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 5000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --free_exploration 6 --use_graddrop --seed 1 & | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 16000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --recurrent-policy --free_exploration 6 --seed 1 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v5" --algo ppo --log-interval 25 --num-steps 100 --num-processes 10 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 10 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 5000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --free_exploration 6 --use_graddrop --seed 2 & | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 16000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --recurrent-policy --free_exploration 6 --seed 2 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v5" --algo ppo --log-interval 25 --num-steps 100 --num-processes 10 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 10 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 5000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --free_exploration 6 --use_graddrop --seed 3 & | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 16000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --recurrent-policy --free_exploration 6 --seed 3 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v5" --algo ppo --log-interval 25 --num-steps 100 --num-processes 10 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 10 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 5000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --free_exploration 6 --use_graddrop --seed 4 & | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 16000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --recurrent-policy --free_exploration 6 --seed 4 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v5" --algo ppo --log-interval 25 --num-steps 100 --num-processes 10 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 10 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 5000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --free_exploration 6 --use_graddrop --seed 5 & | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 16000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --recurrent-policy --free_exploration 6 --seed 5 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v5" --algo ppo --log-interval 25 --num-steps 100 --num-processes 10 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 10 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 5000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --free_exploration 6 --use_graddrop --seed 6 & | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 16000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --recurrent-policy --free_exploration 6 --seed 6 & | ||
wait | ||
|
||
echo "graddrop obs recurrent 10 arms and free exploration" | ||
echo "recurrent 25 arms and free exploration, different hyperparams" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
python3 main.py --env-name "h_bandit-randchoose-v5" --algo ppo --log-interval 25 --num-steps 100 --num-processes 10 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 10 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 5000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --free_exploration 6 --use_graddrop --seed 1 & | ||
|
||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v5" --algo ppo --log-interval 25 --num-steps 100 --num-processes 10 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 10 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 5000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --free_exploration 6 --use_graddrop --seed 2 & | ||
|
||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v5" --algo ppo --log-interval 25 --num-steps 100 --num-processes 10 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 10 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 5000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --free_exploration 6 --use_graddrop --seed 3 & | ||
|
||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v5" --algo ppo --log-interval 25 --num-steps 100 --num-processes 10 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 10 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 5000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --free_exploration 6 --use_graddrop --seed 4 & | ||
|
||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v5" --algo ppo --log-interval 25 --num-steps 100 --num-processes 10 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 10 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 5000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --free_exploration 6 --use_graddrop --seed 5 & | ||
|
||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v5" --algo ppo --log-interval 25 --num-steps 100 --num-processes 10 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 1 --num-mini-batch 10 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 5000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --free_exploration 6 --use_graddrop --seed 6 & | ||
|
||
wait | ||
|
||
echo "graddrop obs recurrent 10 arms and free exploration" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 5 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --recurrent-policy --free_exploration 6 --seed 1 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 5 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --recurrent-policy --free_exploration 6 --seed 2 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 7 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --recurrent-policy --free_exploration 6 --seed 3 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 3e-4 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 7 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --recurrent-policy --free_exploration 6 --seed 4 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --recurrent-policy --free_exploration 6 --seed 5 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --recurrent-policy --free_exploration 6 --seed 6 & | ||
wait | ||
|
||
echo "recurrent 25 arms and free exploration, different hyperparams" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 1.0 --free_exploration 6 --seed 1 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 1.0 --free_exploration 6 --seed 2 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.8 --free_exploration 6 --seed 3 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.8 --free_exploration 6 --seed 4 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.5 --free_exploration 6 --seed 5 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.5 --free_exploration 6 --seed 6 & | ||
wait | ||
|
||
echo "obs recurrent 25 arms and free exploration, different hyperparams" | ||
echo "seed 4 works!!! Seed 4 Iter 2300 five_arms 18.0 ten_arms 18.5 many_arms 14.36" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_median_grad --grad_noise_ratio 1.5 --free_exploration 6 --seed 1 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_median_grad --grad_noise_ratio 1.5 --free_exploration 6 --seed 2 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_median_grad --grad_noise_ratio 2.5 --free_exploration 6 --seed 3 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_median_grad --grad_noise_ratio 2.5 --free_exploration 6 --seed 4 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_median_grad --grad_noise_ratio 2.0 --free_exploration 6 --seed 5 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_median_grad --grad_noise_ratio 2.0 --free_exploration 6 --seed 6 & | ||
wait | ||
|
||
echo "obs recurrent 25 arms and free exploration, median gradient, different hyperparams" | ||
echo "so far not good" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.8 --free_exploration 6 --seed 1 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.8 --free_exploration 6 --seed 2 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.8 --free_exploration 6 --seed 3 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.8 --free_exploration 6 --seed 4 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.8 --free_exploration 6 --seed 5 & | ||
sleep 3 | ||
|
||
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.8 --free_exploration 6 --seed 6 & | ||
wait | ||
|
||
echo "obs recurrent 25 arms and free exploration, testgrad_beta 0.8" | ||
echo "a previous seed gave surprising results, checking if this is consistent" | ||
echo "2 seeds out of 6 are good (13,14)" |
Oops, something went wrong.