diff --git a/mighty/configs/environment/dacbench/function_approximation_benchmark.yaml b/mighty/configs/environment/dacbench/function_approximation_benchmark.yaml deleted file mode 100644 index 36087074..00000000 --- a/mighty/configs/environment/dacbench/function_approximation_benchmark.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# @package _global_ - -num_steps: 1e5 -env: FunctionApproximationBenchmark -env_kwargs: {benchmark: true, dimension: 1} -env_wrappers: [] -num_envs: 16 \ No newline at end of file diff --git a/mighty/configs/environment/pufferlib_ocean/memory.yaml b/mighty/configs/environment/pufferlib_ocean/memory.yaml deleted file mode 100644 index 3c6ff1fd..00000000 --- a/mighty/configs/environment/pufferlib_ocean/memory.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# @package _global_ - -num_steps: 50_000 -env: pufferlib.ocean.memory -env_kwargs: {} -env_wrappers: [] -num_envs: 1 \ No newline at end of file diff --git a/mighty/configs/environment/pufferlib_ocean/password.yaml b/mighty/configs/environment/pufferlib_ocean/password.yaml index 7c36a6c6..2dafd95e 100644 --- a/mighty/configs/environment/pufferlib_ocean/password.yaml +++ b/mighty/configs/environment/pufferlib_ocean/password.yaml @@ -4,4 +4,4 @@ num_steps: 50_000 env: pufferlib.ocean.password env_kwargs: {} env_wrappers: [] -num_envs: 1 \ No newline at end of file +num_envs: 64 \ No newline at end of file diff --git a/mighty/configs/environment/pufferlib_ocean/squared.yaml b/mighty/configs/environment/pufferlib_ocean/squared.yaml index 10abb6cb..7da47bad 100644 --- a/mighty/configs/environment/pufferlib_ocean/squared.yaml +++ b/mighty/configs/environment/pufferlib_ocean/squared.yaml @@ -3,5 +3,5 @@ num_steps: 50_000 env: pufferlib.ocean.squared env_kwargs: {} -env_wrappers: [mighty.utils.wrappers.FlattenVecObs] -num_envs: 1 \ No newline at end of file +env_wrappers: [mighty.mighty_utils.wrappers.FlattenVecObs] +num_envs: 64 \ No newline at end of file diff --git a/mighty/configs/environment/pufferlib_ocean/stochastic.yaml b/mighty/configs/environment/pufferlib_ocean/stochastic.yaml index d032ccce..4bb8008d 100644 --- a/mighty/configs/environment/pufferlib_ocean/stochastic.yaml +++ b/mighty/configs/environment/pufferlib_ocean/stochastic.yaml @@ -4,4 +4,4 @@ num_steps: 50_000 env: pufferlib.ocean.stochastic env_kwargs: {} env_wrappers: [] -num_envs: 1 \ No newline at end of file +num_envs: 64 \ No newline at end of file diff --git a/mighty/configs/exploration/ez_greedy.yaml b/mighty/configs/exploration/ez_greedy.yaml index 2e61df6b..45df0c10 100644 --- a/mighty/configs/exploration/ez_greedy.yaml +++ b/mighty/configs/exploration/ez_greedy.yaml @@ -1,3 +1,4 @@ # @package _global_ algorithm_kwargs: - policy_class: mighty.mighty_exploration.EZGreedy \ No newline at end of file + policy_class: mighty.mighty_exploration.EZGreedy + policy_kwargs: null \ No newline at end of file diff --git a/mighty/configs/ppo_smac.yaml b/mighty/configs/ppo_smac.yaml deleted file mode 100644 index 40da7c69..00000000 --- a/mighty/configs/ppo_smac.yaml +++ /dev/null @@ -1,51 +0,0 @@ -defaults: - - _self_ - - /cluster: local - - algorithm: ppo_mujoco - - environment: gymnasium/pendulum - - search_space: ppo_rs - - override hydra/job_logging: colorlog - - override hydra/hydra_logging: colorlog - - override hydra/help: mighty_help - - override hydra/sweeper: HyperSMAC # use Hypersweeper’s RandomSearch - -runner: standard -debug: false -seed: 0 -output_dir: sweep_smac -wandb_project: null -tensorboard_file: null -experiment_name: ppo_smac - -budget: 200000 # Budget for the hyperparameter search - -algorithm_kwargs: {} - -# Training -eval_every_n_steps: 1e4 # After how many steps to evaluate. -n_episodes_eval: 10 -checkpoint: null # Path to load model checkpoint -save_model_every_n_steps: 5e5 - -hydra: - sweeper: - n_trials: 10 - budget_variable: budget - sweeper_kwargs: - seeds: [0] - optimizer_kwargs: - smac_facade: - _target_: smac.facade.blackbox_facade.BlackBoxFacade - _partial_: true - logging_level: 20 # 10 DEBUG, 20 INFO - scenario: - seed: 42 - n_trials: ${hydra.sweeper.n_trials} - deterministic: true - n_workers: 4 - output_directory: ${hydra.sweep.dir} - search_space: ${search_space} - run: - dir: ${output_dir}/${experiment_name}_${seed} - sweep: - dir: ${output_dir}/${experiment_name}_${seed} diff --git a/mighty/configs/sac_smac.yaml b/mighty/configs/sac_smac.yaml deleted file mode 100644 index 613efd26..00000000 --- a/mighty/configs/sac_smac.yaml +++ /dev/null @@ -1,51 +0,0 @@ -defaults: - - _self_ - - /cluster: local - - algorithm: sac_mujoco - - environment: gymnasium/pendulum - - search_space: sac_rs - - override hydra/job_logging: colorlog - - override hydra/hydra_logging: colorlog - - override hydra/help: mighty_help - - override hydra/sweeper: HyperSMAC # use Hypersweeper’s RandomSearch - -runner: standard -debug: false -seed: 0 -output_dir: sweep_smac -wandb_project: null -tensorboard_file: null -experiment_name: ppo_smac - -budget: 200000 # Budget for the hyperparameter search - -algorithm_kwargs: {} - -# Training -eval_every_n_steps: 1e4 # After how many steps to evaluate. -n_episodes_eval: 10 -checkpoint: null # Path to load model checkpoint -save_model_every_n_steps: 5e5 - -hydra: - sweeper: - n_trials: 10 - budget_variable: budget - sweeper_kwargs: - seeds: [0] - optimizer_kwargs: - smac_facade: - _target_: smac.facade.blackbox_facade.BlackBoxFacade - _partial_: true - logging_level: 20 # 10 DEBUG, 20 INFO - scenario: - seed: 42 - n_trials: ${hydra.sweeper.n_trials} - deterministic: true - n_workers: 4 - output_directory: ${hydra.sweep.dir} - search_space: ${search_space} - run: - dir: ${output_dir}/${experiment_name}_${seed} - sweep: - dir: ${output_dir}/${experiment_name}_${seed} diff --git a/mighty/configs/search_space/dqn_rs.yaml b/mighty/configs/search_space/dqn_rs.yaml deleted file mode 100644 index 2a910e72..00000000 --- a/mighty/configs/search_space/dqn_rs.yaml +++ /dev/null @@ -1,15 +0,0 @@ -hyperparameters: - algorithm_kwargs.learning_rate: - type: uniform_float - upper: 0.1 - lower: 1.0e-06 - default: 0.0003 - log: true - algorithm_kwargs.gamma: - type: uniform_float - lower: 0.9 - upper: 0.9999 - log: false - algorithm_kwargs.batch_size: - type: categorical - choices: [32, 64, 128, 256] \ No newline at end of file diff --git a/mighty/configs/search_space/dqn_template.yaml b/mighty/configs/search_space/dqn_template.yaml deleted file mode 100644 index 51d23767..00000000 --- a/mighty/configs/search_space/dqn_template.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# @package hydra.sweeper.search_space -hyperparameters: - algorithm_kwargs.n_units: - type: ordinal - sequence: [4,8,16,32,64,128,256,512] - algorithm_kwargs.soft_update_weight: - type: uniform_float - lower: 0 - upper: 1 - default_value: 1 - diff --git a/mighty/configs/search_space/ppo_rs.yaml b/mighty/configs/search_space/ppo_rs.yaml deleted file mode 100644 index 9ae950a9..00000000 --- a/mighty/configs/search_space/ppo_rs.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# configs/search_space/ppo_rs.yaml -hyperparameters: - # match the keys under algorithm_kwargs in your PPO config - algorithm_kwargs.learning_rate: - type: uniform_float - lower: 1e-5 - upper: 1e-3 - log: true - algorithm_kwargs.batch_size: - type: categorical - choices: [8192, 16384, 32768] - algorithm_kwargs.n_gradient_steps: - type: uniform_int - lower: 1 - upper: 20 - log: false - algorithm_kwargs.gamma: - type: uniform_float - lower: 0.9 - upper: 0.9999 - log: false - algorithm_kwargs.ppo_clip: - type: uniform_float - lower: 0.1 - upper: 0.3 - log: false - algorithm_kwargs.value_loss_coef: - type: uniform_float - lower: 0.1 - upper: 1.0 - log: false - algorithm_kwargs.entropy_coef: - type: uniform_float - lower: 0.0 - upper: 0.1 - log: false - algorithm_kwargs.max_grad_norm: - type: uniform_float - lower: 0.1 - upper: 1.0 - log: false diff --git a/mighty/configs/search_space/sac_rs.yaml b/mighty/configs/search_space/sac_rs.yaml deleted file mode 100644 index fdaa3d87..00000000 --- a/mighty/configs/search_space/sac_rs.yaml +++ /dev/null @@ -1,9 +0,0 @@ -hyperparameters: - algorithm_kwargs.learning_rate: - type: uniform_float - lower: 0.000001 - upper: 0.01 - log: true - algorithm_kwargs.batch_size: - type: categorical - choices: [32, 64, 128, 256] \ No newline at end of file diff --git a/mighty/configs/sweep_ppo_pbt.yaml b/mighty/configs/sweep_ppo_pbt.yaml deleted file mode 100644 index 3aba687f..00000000 --- a/mighty/configs/sweep_ppo_pbt.yaml +++ /dev/null @@ -1,44 +0,0 @@ -defaults: - - _self_ - - /cluster: local - - algorithm: ppo - - environment: gymnasium/pendulum - - search_space: ppo_rs - - override hydra/job_logging: colorlog - - override hydra/hydra_logging: colorlog - - override hydra/help: mighty_help - - override hydra/sweeper: HyperPBT # use Hypersweeper’s RandomSearch - -runner: standard -debug: false -seed: 0 -output_dir: sweep_pbt -wandb_project: null -tensorboard_file: null -experiment_name: mighty_experiment - -algorithm_kwargs: {} - -# Training -eval_every_n_steps: 1e4 # After how many steps to evaluate. -n_episodes_eval: 10 -checkpoint: null # Path to load model checkpoint -save_model_every_n_steps: 5e5 - -hydra: - sweeper: - budget: 100000 - budget_variable: 100000 - loading_variable: load - saving_variable: save - sweeper_kwargs: - optimizer_kwargs: - population_size: 10 - config_interval: 1e4 - checkpoint_tf: true - load_tf: true - search_space: ${search_space} - run: - dir: ${output_dir}/${experiment_name}_${seed} - sweep: - dir: ${output_dir}/${experiment_name}_${seed} \ No newline at end of file diff --git a/mighty/configs/sweep_rs.yaml b/mighty/configs/sweep_rs.yaml deleted file mode 100644 index 650c3545..00000000 --- a/mighty/configs/sweep_rs.yaml +++ /dev/null @@ -1,38 +0,0 @@ -defaults: - - _self_ - - /cluster: local - - algorithm: ppo - - environment: gymnasium/pendulum - - search_space: ppo_rs - - override hydra/job_logging: colorlog - - override hydra/hydra_logging: colorlog - - override hydra/help: mighty_help - - override hydra/sweeper: HyperRS # use Hypersweeper’s RandomSearch - -runner: standard -debug: false -seed: 0 -output_dir: sweep_rs -wandb_project: null -tensorboard_file: null -experiment_name: dqn_sweep - -algorithm_kwargs: {} - -# Training -eval_every_n_steps: 1e4 # After how many steps to evaluate. -n_episodes_eval: 10 -checkpoint: null # Path to load model checkpoint -save_model_every_n_steps: 5e5 - -hydra: - sweeper: - n_trials: 10 - sweeper_kwargs: - max_parallelization: 0.8 - max_budget: 100000 - search_space: ${search_space} - run: - dir: ${output_dir}/${experiment_name}_${seed} - sweep: - dir: ${output_dir}/${experiment_name}_${seed} \ No newline at end of file diff --git a/mighty/mighty_agents/base_agent.py b/mighty/mighty_agents/base_agent.py index 790a7c74..70cc1c89 100644 --- a/mighty/mighty_agents/base_agent.py +++ b/mighty/mighty_agents/base_agent.py @@ -13,7 +13,7 @@ import pandas as pd import torch import wandb -from omegaconf import DictConfig +from omegaconf import DictConfig, OmegaConf from rich import print from rich.layout import Layout from rich.live import Live @@ -323,6 +323,10 @@ def initialize_agent(self) -> None: if isinstance(self.buffer_class, type) and issubclass( self.buffer_class, PrioritizedReplay ): + if isinstance(self.buffer_kwargs, DictConfig): + self.buffer_kwargs = OmegaConf.to_container( + self.buffer_kwargs, resolve=True + ) # 1) Get observation-space shape try: obs_space = self.env.single_observation_space diff --git a/mighty/mighty_utils/wrappers.py b/mighty/mighty_utils/wrappers.py index f8bc0747..70b93ed3 100644 --- a/mighty/mighty_utils/wrappers.py +++ b/mighty/mighty_utils/wrappers.py @@ -106,19 +106,21 @@ def __init__(self, env): """ super().__init__(env) - self.n_actions = len(self.env.single_action_space.nvec) - self.single_action_space = gym.spaces.Discrete( - np.prod(self.env.single_action_space.nvec) - ) + self.n_actions = len(self.env.action_space.nvec) + self.action_mapper = {} for idx, prod_idx in zip( - range(np.prod(self.env.single_action_space.nvec)), + range(np.prod(self.env.action_space.nvec)), itertools.product( - *[np.arange(val) for val in self.env.single_action_space.nvec] + *[np.arange(val) for val in self.env.action_space.nvec] ), ): self.action_mapper[idx] = prod_idx + self.action_space = gym.spaces.Discrete( + int(np.prod(self.env.action_space.nvec)) + ) + def step(self, action): """Maps discrete action value to array.""" action = [self.action_mapper[a] for a in action]