Skip to content

Commit

Permalink
scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
avivt committed May 29, 2021
1 parent bb02d6a commit eabdc2c
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 1 deletion.
3 changes: 2 additions & 1 deletion a2c_ppo_acktr/scripts/script_rec45.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ sleep 3
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.2 --use_testgrad_median --free_exploration 6 --seed 16 &
wait

echo "obs recurrent 25 arms and free exploration, testgrad_"
echo "obs recurrent 25 arms and free exploration, testgrad_median"
echo "Seed 16 Iter 2000 five_arms 16.0 ten_arms 15.6 many_arms 10.18 , didn't work well"
20 changes: 20 additions & 0 deletions a2c_ppo_acktr/scripts/script_rec46.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.7 --free_exploration 6 --seed 11 &
sleep 3

python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.7 --free_exploration 6 --seed 12 &
sleep 3

python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.7 --free_exploration 6 --seed 13 &
sleep 3

python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.7 --free_exploration 6 --seed 14 &
sleep 3

python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.7 --free_exploration 6 --seed 15 &
sleep 3

python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.7 --free_exploration 6 --seed 16 &
wait

echo "obs recurrent 25 arms and free exploration, testgrad_beta 0.7"
echo "a previous seed with 0.8 gave surprising results, checking if this is consistent"
20 changes: 20 additions & 0 deletions a2c_ppo_acktr/scripts/script_rec47.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.9 --free_exploration 6 --seed 11 &
sleep 3

python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.9 --free_exploration 6 --seed 12 &
sleep 3

python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.9 --free_exploration 6 --seed 13 &
sleep 3

python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.9 --free_exploration 6 --seed 14 &
sleep 3

python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.9 --free_exploration 6 --seed 15 &
sleep 3

python3 main.py --env-name "h_bandit-randchoose-v8" --algo ppo --log-interval 25 --num-steps 100 --num-processes 25 --lr 1e-3 --entropy-coef 0.05 --value-loss-coef 0.5 --ppo-epoch 3 --num-mini-batch 25 --gamma 0.9 --gae-lambda 0.95 --num-env-steps 6000000 --eval-interval 100 --log-dir ./ppo_log --task_steps=20 --obs_recurrent --use_testgrad --testgrad_beta 0.9 --free_exploration 6 --seed 16 &
wait

echo "obs recurrent 25 arms and free exploration, testgrad_beta 0.9"
echo "a previous seed with 0.8 gave surprising results, checking if this is consistent"

0 comments on commit eabdc2c

Please sign in to comment.