diff --git a/mighty/configs/environment/dacbench/function_approximation_benchmark.yaml b/mighty/configs/environment/dacbench/function_approximation_benchmark.yaml
deleted file mode 100644
index 36087074..00000000
--- a/mighty/configs/environment/dacbench/function_approximation_benchmark.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-# @package _global_
-
-num_steps: 1e5
-env: FunctionApproximationBenchmark
-env_kwargs: {benchmark: true, dimension: 1}
-env_wrappers: []
-num_envs: 16
\ No newline at end of file
diff --git a/mighty/configs/environment/pufferlib_ocean/memory.yaml b/mighty/configs/environment/pufferlib_ocean/memory.yaml
deleted file mode 100644
index 3c6ff1fd..00000000
--- a/mighty/configs/environment/pufferlib_ocean/memory.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-# @package _global_
-
-num_steps: 50_000 
-env: pufferlib.ocean.memory
-env_kwargs: {}
-env_wrappers: []
-num_envs: 1
\ No newline at end of file
diff --git a/mighty/configs/environment/pufferlib_ocean/password.yaml b/mighty/configs/environment/pufferlib_ocean/password.yaml
index 7c36a6c6..2dafd95e 100644
--- a/mighty/configs/environment/pufferlib_ocean/password.yaml
+++ b/mighty/configs/environment/pufferlib_ocean/password.yaml
@@ -4,4 +4,4 @@ num_steps: 50_000
 env: pufferlib.ocean.password
 env_kwargs: {}
 env_wrappers: []
-num_envs: 1
\ No newline at end of file
+num_envs: 64
\ No newline at end of file
diff --git a/mighty/configs/environment/pufferlib_ocean/squared.yaml b/mighty/configs/environment/pufferlib_ocean/squared.yaml
index 10abb6cb..7da47bad 100644
--- a/mighty/configs/environment/pufferlib_ocean/squared.yaml
+++ b/mighty/configs/environment/pufferlib_ocean/squared.yaml
@@ -3,5 +3,5 @@
 num_steps: 50_000 
 env: pufferlib.ocean.squared
 env_kwargs: {}
-env_wrappers: [mighty.utils.wrappers.FlattenVecObs]
-num_envs: 1
\ No newline at end of file
+env_wrappers: [mighty.mighty_utils.wrappers.FlattenVecObs]
+num_envs: 64
\ No newline at end of file
diff --git a/mighty/configs/environment/pufferlib_ocean/stochastic.yaml b/mighty/configs/environment/pufferlib_ocean/stochastic.yaml
index d032ccce..4bb8008d 100644
--- a/mighty/configs/environment/pufferlib_ocean/stochastic.yaml
+++ b/mighty/configs/environment/pufferlib_ocean/stochastic.yaml
@@ -4,4 +4,4 @@ num_steps: 50_000
 env: pufferlib.ocean.stochastic
 env_kwargs: {}
 env_wrappers: []
-num_envs: 1
\ No newline at end of file
+num_envs: 64
\ No newline at end of file
diff --git a/mighty/configs/exploration/ez_greedy.yaml b/mighty/configs/exploration/ez_greedy.yaml
index 2e61df6b..45df0c10 100644
--- a/mighty/configs/exploration/ez_greedy.yaml
+++ b/mighty/configs/exploration/ez_greedy.yaml
@@ -1,3 +1,4 @@
 # @package _global_
 algorithm_kwargs:
-  policy_class: mighty.mighty_exploration.EZGreedy
\ No newline at end of file
+  policy_class: mighty.mighty_exploration.EZGreedy
+  policy_kwargs: null
\ No newline at end of file
diff --git a/mighty/configs/ppo_smac.yaml b/mighty/configs/ppo_smac.yaml
deleted file mode 100644
index 40da7c69..00000000
--- a/mighty/configs/ppo_smac.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-defaults:
-  - _self_
-  - /cluster: local
-  - algorithm: ppo_mujoco
-  - environment: gymnasium/pendulum
-  - search_space: ppo_rs
-  - override hydra/job_logging: colorlog
-  - override hydra/hydra_logging: colorlog
-  - override hydra/help: mighty_help
-  - override hydra/sweeper: HyperSMAC        # use Hypersweeper’s RandomSearch
-
-runner: standard
-debug: false
-seed: 0
-output_dir: sweep_smac
-wandb_project: null
-tensorboard_file: null
-experiment_name: ppo_smac
-
-budget: 200000  # Budget for the hyperparameter search
-
-algorithm_kwargs: {}
-
-# Training
-eval_every_n_steps: 1e4  # After how many steps to evaluate.
-n_episodes_eval: 10
-checkpoint: null  # Path to load model checkpoint
-save_model_every_n_steps: 5e5
-
-hydra:
-  sweeper:
-    n_trials: 10
-    budget_variable: budget
-    sweeper_kwargs:
-      seeds: [0]
-      optimizer_kwargs:
-        smac_facade: 
-          _target_: smac.facade.blackbox_facade.BlackBoxFacade
-          _partial_: true
-          logging_level: 20  # 10 DEBUG, 20 INFO
-        scenario:
-          seed: 42
-          n_trials: ${hydra.sweeper.n_trials}
-          deterministic: true
-          n_workers: 4
-          output_directory: ${hydra.sweep.dir}
-    search_space: ${search_space}
-  run:
-    dir: ${output_dir}/${experiment_name}_${seed}
-  sweep:
-    dir: ${output_dir}/${experiment_name}_${seed}
diff --git a/mighty/configs/sac_smac.yaml b/mighty/configs/sac_smac.yaml
deleted file mode 100644
index 613efd26..00000000
--- a/mighty/configs/sac_smac.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-defaults:
-  - _self_
-  - /cluster: local
-  - algorithm: sac_mujoco
-  - environment: gymnasium/pendulum
-  - search_space: sac_rs
-  - override hydra/job_logging: colorlog
-  - override hydra/hydra_logging: colorlog
-  - override hydra/help: mighty_help
-  - override hydra/sweeper: HyperSMAC        # use Hypersweeper’s RandomSearch
-
-runner: standard
-debug: false
-seed: 0
-output_dir: sweep_smac
-wandb_project: null
-tensorboard_file: null
-experiment_name: ppo_smac
-
-budget: 200000  # Budget for the hyperparameter search
-
-algorithm_kwargs: {}
-
-# Training
-eval_every_n_steps: 1e4  # After how many steps to evaluate.
-n_episodes_eval: 10
-checkpoint: null  # Path to load model checkpoint
-save_model_every_n_steps: 5e5
-
-hydra:
-  sweeper:
-    n_trials: 10
-    budget_variable: budget
-    sweeper_kwargs:
-      seeds: [0]
-      optimizer_kwargs:
-        smac_facade: 
-          _target_: smac.facade.blackbox_facade.BlackBoxFacade
-          _partial_: true
-          logging_level: 20  # 10 DEBUG, 20 INFO
-        scenario:
-          seed: 42
-          n_trials: ${hydra.sweeper.n_trials}
-          deterministic: true
-          n_workers: 4
-          output_directory: ${hydra.sweep.dir}
-    search_space: ${search_space}
-  run:
-    dir: ${output_dir}/${experiment_name}_${seed}
-  sweep:
-    dir: ${output_dir}/${experiment_name}_${seed}
diff --git a/mighty/configs/search_space/dqn_rs.yaml b/mighty/configs/search_space/dqn_rs.yaml
deleted file mode 100644
index 2a910e72..00000000
--- a/mighty/configs/search_space/dqn_rs.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-hyperparameters:
-  algorithm_kwargs.learning_rate:
-    type: uniform_float
-    upper: 0.1
-    lower: 1.0e-06
-    default: 0.0003
-    log: true
-  algorithm_kwargs.gamma:
-    type: uniform_float
-    lower: 0.9
-    upper: 0.9999
-    log: false
-  algorithm_kwargs.batch_size:
-    type: categorical
-    choices: [32, 64, 128, 256]
\ No newline at end of file
diff --git a/mighty/configs/search_space/dqn_template.yaml b/mighty/configs/search_space/dqn_template.yaml
deleted file mode 100644
index 51d23767..00000000
--- a/mighty/configs/search_space/dqn_template.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-# @package hydra.sweeper.search_space
-hyperparameters:
-  algorithm_kwargs.n_units:
-    type: ordinal
-    sequence: [4,8,16,32,64,128,256,512]
-  algorithm_kwargs.soft_update_weight:
-    type: uniform_float
-    lower: 0
-    upper: 1
-    default_value: 1
-
diff --git a/mighty/configs/search_space/ppo_rs.yaml b/mighty/configs/search_space/ppo_rs.yaml
deleted file mode 100644
index 9ae950a9..00000000
--- a/mighty/configs/search_space/ppo_rs.yaml
+++ /dev/null
@@ -1,41 +0,0 @@
-# configs/search_space/ppo_rs.yaml
-hyperparameters:
-  # match the keys under algorithm_kwargs in your PPO config
-  algorithm_kwargs.learning_rate:
-    type: uniform_float
-    lower: 1e-5
-    upper: 1e-3
-    log: true
-  algorithm_kwargs.batch_size:
-    type: categorical
-    choices: [8192, 16384, 32768]
-  algorithm_kwargs.n_gradient_steps:
-    type: uniform_int
-    lower: 1
-    upper: 20
-    log: false
-  algorithm_kwargs.gamma:
-    type: uniform_float
-    lower: 0.9
-    upper: 0.9999
-    log: false
-  algorithm_kwargs.ppo_clip:
-    type: uniform_float
-    lower: 0.1
-    upper: 0.3
-    log: false
-  algorithm_kwargs.value_loss_coef:
-    type: uniform_float
-    lower: 0.1
-    upper: 1.0
-    log: false
-  algorithm_kwargs.entropy_coef:
-    type: uniform_float
-    lower: 0.0
-    upper: 0.1
-    log: false
-  algorithm_kwargs.max_grad_norm:
-    type: uniform_float
-    lower: 0.1
-    upper: 1.0
-    log: false
diff --git a/mighty/configs/search_space/sac_rs.yaml b/mighty/configs/search_space/sac_rs.yaml
deleted file mode 100644
index fdaa3d87..00000000
--- a/mighty/configs/search_space/sac_rs.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-hyperparameters:
-  algorithm_kwargs.learning_rate:
-    type: uniform_float
-    lower: 0.000001
-    upper: 0.01
-    log: true
-  algorithm_kwargs.batch_size:
-    type: categorical
-    choices: [32, 64, 128, 256]
\ No newline at end of file
diff --git a/mighty/configs/sweep_ppo_pbt.yaml b/mighty/configs/sweep_ppo_pbt.yaml
deleted file mode 100644
index 3aba687f..00000000
--- a/mighty/configs/sweep_ppo_pbt.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-defaults:
-  - _self_
-  - /cluster: local
-  - algorithm: ppo
-  - environment: gymnasium/pendulum
-  - search_space: ppo_rs
-  - override hydra/job_logging: colorlog
-  - override hydra/hydra_logging: colorlog
-  - override hydra/help: mighty_help
-  - override hydra/sweeper: HyperPBT        # use Hypersweeper’s RandomSearch
-
-runner: standard
-debug: false
-seed: 0
-output_dir: sweep_pbt
-wandb_project: null
-tensorboard_file: null
-experiment_name: mighty_experiment
-
-algorithm_kwargs: {}
-
-# Training
-eval_every_n_steps: 1e4  # After how many steps to evaluate.
-n_episodes_eval: 10
-checkpoint: null  # Path to load model checkpoint
-save_model_every_n_steps: 5e5
-
-hydra:
-  sweeper:
-    budget: 100000
-    budget_variable: 100000
-    loading_variable: load
-    saving_variable: save
-    sweeper_kwargs:
-      optimizer_kwargs:
-        population_size: 10
-        config_interval: 1e4
-      checkpoint_tf: true
-      load_tf: true
-    search_space: ${search_space}
-  run:
-    dir: ${output_dir}/${experiment_name}_${seed}
-  sweep:
-    dir: ${output_dir}/${experiment_name}_${seed}
\ No newline at end of file
diff --git a/mighty/configs/sweep_rs.yaml b/mighty/configs/sweep_rs.yaml
deleted file mode 100644
index 650c3545..00000000
--- a/mighty/configs/sweep_rs.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-defaults:
-  - _self_
-  - /cluster: local
-  - algorithm: ppo
-  - environment: gymnasium/pendulum
-  - search_space: ppo_rs
-  - override hydra/job_logging: colorlog
-  - override hydra/hydra_logging: colorlog
-  - override hydra/help: mighty_help
-  - override hydra/sweeper: HyperRS        # use Hypersweeper’s RandomSearch
-
-runner: standard
-debug: false
-seed: 0
-output_dir: sweep_rs
-wandb_project: null
-tensorboard_file: null
-experiment_name: dqn_sweep
-
-algorithm_kwargs: {}
-
-# Training
-eval_every_n_steps: 1e4  # After how many steps to evaluate.
-n_episodes_eval: 10
-checkpoint: null  # Path to load model checkpoint
-save_model_every_n_steps: 5e5
-
-hydra:
-  sweeper:
-    n_trials: 10
-    sweeper_kwargs:
-      max_parallelization: 0.8
-      max_budget: 100000
-    search_space: ${search_space}
-  run:
-    dir: ${output_dir}/${experiment_name}_${seed}
-  sweep:
-    dir: ${output_dir}/${experiment_name}_${seed}
\ No newline at end of file
diff --git a/mighty/mighty_agents/base_agent.py b/mighty/mighty_agents/base_agent.py
index 790a7c74..70cc1c89 100644
--- a/mighty/mighty_agents/base_agent.py
+++ b/mighty/mighty_agents/base_agent.py
@@ -13,7 +13,7 @@
 import pandas as pd
 import torch
 import wandb
-from omegaconf import DictConfig
+from omegaconf import DictConfig, OmegaConf
 from rich import print
 from rich.layout import Layout
 from rich.live import Live
@@ -323,6 +323,10 @@ def initialize_agent(self) -> None:
         if isinstance(self.buffer_class, type) and issubclass(
             self.buffer_class, PrioritizedReplay
         ):
+            if isinstance(self.buffer_kwargs, DictConfig):
+                self.buffer_kwargs = OmegaConf.to_container(
+                    self.buffer_kwargs, resolve=True
+                )
             # 1) Get observation-space shape
             try:
                 obs_space = self.env.single_observation_space
diff --git a/mighty/mighty_utils/wrappers.py b/mighty/mighty_utils/wrappers.py
index f8bc0747..70b93ed3 100644
--- a/mighty/mighty_utils/wrappers.py
+++ b/mighty/mighty_utils/wrappers.py
@@ -106,19 +106,21 @@ def __init__(self, env):
 
         """
         super().__init__(env)
-        self.n_actions = len(self.env.single_action_space.nvec)
-        self.single_action_space = gym.spaces.Discrete(
-            np.prod(self.env.single_action_space.nvec)
-        )
+        self.n_actions = len(self.env.action_space.nvec)
+        
         self.action_mapper = {}
         for idx, prod_idx in zip(
-            range(np.prod(self.env.single_action_space.nvec)),
+            range(np.prod(self.env.action_space.nvec)),
             itertools.product(
-                *[np.arange(val) for val in self.env.single_action_space.nvec]
+                *[np.arange(val) for val in self.env.action_space.nvec]
             ),
         ):
             self.action_mapper[idx] = prod_idx
 
+        self.action_space = gym.spaces.Discrete(
+            int(np.prod(self.env.action_space.nvec))
+        )
+
     def step(self, action):
         """Maps discrete action value to array."""
         action = [self.action_mapper[a] for a in action]