RL-1470: test fixing

(Issue RL-1470 - Fix seeding in AC runners)
enlite-ai · Nov 30, 2023 · c62708a · c62708a
1 parent 8e7fbf7
commit c62708a
Show file tree

Hide file tree

Showing 5 changed files with 7 additions and 6 deletions.
diff --git a/docs/source/best_practices_and_tutorials/code_snippets/plain_python_training_low_level.py b/docs/source/best_practices_and_tutorials/code_snippets/plain_python_training_low_level.py
@@ -174,7 +174,8 @@ def train(n_epochs):
             n_episodes=1,
             model_selection=None,
             deterministic=True
-        )
+        ),
+        n_training_seeds=100
     )
 
     # Distributed Environments

diff --git a/maze/conf/algorithm/sac.yaml b/maze/conf/algorithm/sac.yaml
@@ -70,9 +70,6 @@ epoch_length: 100
 # Number of steps used for early stopping
 patience: 50
 
-# Number of seeds to be generated and used to seed the environment except when passing a list of explicit seeds"""
-n_training_seeds: 100
-
 # Rollout evaluator (used for best model selection)
 rollout_evaluator:
   _target_: maze.train.trainers.common.evaluators.rollout_evaluator.RolloutEvaluator

diff --git a/maze/test/train/impala/test_impala_multi_step.py b/maze/test/train/impala/test_impala_multi_step.py
@@ -56,7 +56,8 @@ def _algorithm_config():
         actors_batch_size=5,
         critic_burn_in_epochs=0,
         rollout_evaluator=RolloutEvaluator(eval_env=eval_env, n_episodes=1,
-                                           model_selection=None, deterministic=True)
+                                           model_selection=None, deterministic=True),
+        n_training_seeds=10,
     )
 
 

diff --git a/tutorials/examples/a2c_gym_cartpole.py b/tutorials/examples/a2c_gym_cartpole.py
@@ -54,7 +54,8 @@ def main(n_epochs: int) -> None:
         entropy_coef=0.0,
         max_grad_norm=0.0,
         device="cpu",
-        rollout_evaluator=RolloutEvaluator(eval_env=eval_env, n_episodes=1, model_selection=None, deterministic=True)
+        rollout_evaluator=RolloutEvaluator(eval_env=eval_env, n_episodes=1, model_selection=None, deterministic=True),
+        n_training_seeds=100,
     )
 
     # initialize actor critic model

diff --git a/tutorials/examples/a2c_gym_cartpole_default_nets.py b/tutorials/examples/a2c_gym_cartpole_default_nets.py
@@ -94,6 +94,7 @@ def main(n_epochs: int, rnn_steps: int) -> None:
         entropy_coef=0.0,
         max_grad_norm=0.0,
         device="cpu",
+        n_training_seeds=100,
         rollout_evaluator=RolloutEvaluator(eval_env=eval_env, n_episodes=1,
                                            model_selection=None, deterministic=True)
     )