Skip to content
This repository has been archived by the owner on Oct 31, 2023. It is now read-only.

Simple modular configuration refactoring #2

Merged
merged 4 commits into from
Sep 7, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ conda activate drqv2

Train the agent:
```sh
python train.py task=quadruped_walk
python train.py env_cfg=quadruped_walk
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's rename it to just task

```

Monitor results:
Expand Down
16 changes: 16 additions & 0 deletions cfgs/agent_cfg/drqv2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
experiment: exp

agent:
_target_: drqv2.DrQV2Agent
obs_shape: ??? # to be specified later
action_shape: ??? # to be specified later
device: ${device}
lr: ${env_cfg.lr}
critic_target_tau: 0.01
update_every_steps: 2
use_tb: ${use_tb}
num_expl_steps: 2000
hidden_dim: 1024
feature_dim: ${env_cfg.feature_dim}
stddev_schedule: ${env_cfg.stddev_schedule}
stddev_clip: 0.3
29 changes: 29 additions & 0 deletions cfgs/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
defaults:
- agent_cfg: drqv2
- env_cfg: quadruped_walk
- override hydra/launcher: submitit_local

# snapshot
save_snapshot: false
#misc
seed: 1
device: cuda
save_video: true
save_train_video: false
use_tb: false


hydra:
run:
dir: ./exp_local/${now:%Y.%m.%d}/${now:%H%M%S}_${hydra.job.override_dirname}
sweep:
dir: ./exp/${now:%Y.%m.%d}/${now:%H%M}_${agent_cfg.experiment}
subdir: ${hydra.job.num}
launcher:
timeout_min: 4300
cpus_per_task: 10
gpus_per_node: 1
tasks_per_node: 1
mem_gb: 160
nodes: 1
submitit_folder: ./exp/${now:%Y.%m.%d}/${now:%H%M%S}_${agent_cfg.experiment}/.slurm
20 changes: 20 additions & 0 deletions cfgs/env_cfg/acrobot_swingup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# task settings
task: acrobot_swingup
frame_stack: 3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm there is a lot of overlap going on, only a few arguments are changing. I wonder if there is a way to only have a task specific overrides and still preserve the default config.

action_repeat: 2
discount: 0.99
# train settings
num_train_frames: 3100000
num_seed_frames: 4000
# eval
eval_every_frames: 10000
num_eval_episodes: 10
# replay buffer
replay_buffer_size: 1000000
replay_buffer_num_workers: 4
nstep: 3
batch_size: 256
# agent
lr: 1e-4
feature_dim: 50
stddev_schedule: 'linear(1.0,0.1,500000)'
20 changes: 20 additions & 0 deletions cfgs/env_cfg/cartpole_swingup_sparse.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# task settings
task: cartpole_swingup_sparse
frame_stack: 3
action_repeat: 2
discount: 0.99
# train settings
num_train_frames: 3100000
num_seed_frames: 4000
# eval
eval_every_frames: 10000
num_eval_episodes: 10
# replay buffer
replay_buffer_size: 1000000
replay_buffer_num_workers: 4
nstep: 3
batch_size: 256
# agent
lr: 1e-4
feature_dim: 50
stddev_schedule: 'linear(1.0,0.1,500000)'
20 changes: 20 additions & 0 deletions cfgs/env_cfg/cheetah_run.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# task settings
task: cheetah_run
frame_stack: 3
action_repeat: 2
discount: 0.99
# train settings
num_train_frames: 3100000
num_seed_frames: 4000
# eval
eval_every_frames: 10000
num_eval_episodes: 10
# replay buffer
replay_buffer_size: 1000000
replay_buffer_num_workers: 4
nstep: 3
batch_size: 256
# agent
lr: 1e-4
feature_dim: 50
stddev_schedule: 'linear(1.0,0.1,500000)'
20 changes: 20 additions & 0 deletions cfgs/env_cfg/finger_turn_easy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# task settings
task: finger_turn_easy
frame_stack: 3
action_repeat: 2
discount: 0.99
# train settings
num_train_frames: 3100000
num_seed_frames: 4000
# eval
eval_every_frames: 10000
num_eval_episodes: 10
# replay buffer
replay_buffer_size: 1000000
replay_buffer_num_workers: 4
nstep: 3
batch_size: 256
# agent
lr: 1e-4
feature_dim: 50
stddev_schedule: 'linear(1.0,0.1,500000)'
20 changes: 20 additions & 0 deletions cfgs/env_cfg/finger_turn_hard.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# task settings
task: finger_turn_hard
frame_stack: 3
action_repeat: 2
discount: 0.99
# train settings
num_train_frames: 3100000
num_seed_frames: 4000
# eval
eval_every_frames: 10000
num_eval_episodes: 10
# replay buffer
replay_buffer_size: 1000000
replay_buffer_num_workers: 4
nstep: 3
batch_size: 256
# agent
lr: 1e-4
feature_dim: 50
stddev_schedule: 'linear(1.0,0.1,500000)'
20 changes: 20 additions & 0 deletions cfgs/env_cfg/hopper_hop.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# task settings
task: hopper_hop
frame_stack: 3
action_repeat: 2
discount: 0.99
# train settings
num_train_frames: 3100000
num_seed_frames: 4000
# eval
eval_every_frames: 10000
num_eval_episodes: 10
# replay buffer
replay_buffer_size: 1000000
replay_buffer_num_workers: 4
nstep: 3
batch_size: 256
# agent
lr: 1e-4
feature_dim: 50
stddev_schedule: 'linear(1.0,0.1,500000)'
20 changes: 20 additions & 0 deletions cfgs/env_cfg/quadruped_run.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# task settings
task: quadruped_run
frame_stack: 3
action_repeat: 2
discount: 0.99
# train settings
num_train_frames: 3100000
num_seed_frames: 4000
# eval
eval_every_frames: 10000
num_eval_episodes: 10
# replay buffer
replay_buffer_size: 100000
replay_buffer_num_workers: 4
nstep: 3
batch_size: 256
# agent
lr: 1e-4
feature_dim: 50
stddev_schedule: 'linear(1.0,0.1,500000)'
20 changes: 20 additions & 0 deletions cfgs/env_cfg/quadruped_walk.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# task settings
task: quadruped_walk
frame_stack: 3
action_repeat: 2
discount: 0.99
# train settings
num_train_frames: 3100000
num_seed_frames: 4000
# eval
eval_every_frames: 10000
num_eval_episodes: 10
# replay buffer
replay_buffer_size: 1000000
replay_buffer_num_workers: 4
nstep: 3
batch_size: 256
# agent
lr: 1e-4
feature_dim: 50
stddev_schedule: 'linear(1.0,0.1,500000)'
20 changes: 20 additions & 0 deletions cfgs/env_cfg/reach_duplo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# task settings
task: reach_duplo
frame_stack: 3
action_repeat: 2
discount: 0.99
# train settings
num_train_frames: 3100000
num_seed_frames: 4000
# eval
eval_every_frames: 10000
num_eval_episodes: 10
# replay buffer
replay_buffer_size: 1000000
replay_buffer_num_workers: 4
nstep: 3
batch_size: 256
# agent
lr: 1e-4
feature_dim: 50
stddev_schedule: 'linear(1.0,0.1,500000)'
20 changes: 20 additions & 0 deletions cfgs/env_cfg/reacher_easy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# task settings
task: reacher_easy
frame_stack: 3
action_repeat: 2
discount: 0.99
# train settings
num_train_frames: 3100000
num_seed_frames: 4000
# eval
eval_every_frames: 10000
num_eval_episodes: 10
# replay buffer
replay_buffer_size: 1000000
replay_buffer_num_workers: 4
nstep: 3
batch_size: 256
# agent
lr: 1e-4
feature_dim: 50
stddev_schedule: 'linear(1.0,0.1,500000)'
20 changes: 20 additions & 0 deletions cfgs/env_cfg/reacher_hard.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# task settings
task: reacher_hard
frame_stack: 3
action_repeat: 2
discount: 0.99
# train settings
num_train_frames: 3100000
num_seed_frames: 4000
# eval
eval_every_frames: 10000
num_eval_episodes: 10
# replay buffer
replay_buffer_size: 1000000
replay_buffer_num_workers: 4
nstep: 3
batch_size: 256
# agent
lr: 1e-4
feature_dim: 50
stddev_schedule: 'linear(1.0,0.1,500000)'
20 changes: 20 additions & 0 deletions cfgs/env_cfg/walker_run.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# task settings
task: walker_run
frame_stack: 3
action_repeat: 2
discount: 0.99
# train settings
num_train_frames: 3100000
num_seed_frames: 4000
# eval
eval_every_frames: 10000
num_eval_episodes: 10
# replay buffer
replay_buffer_size: 1000000
replay_buffer_num_workers: 4
nstep: 1
batch_size: 512
# agent
lr: 1e-4
feature_dim: 50
stddev_schedule: 'linear(1.0,0.1,500000)'
59 changes: 0 additions & 59 deletions config.yaml

This file was deleted.