diff --git a/ml-agents/mlagents/trainers/components/bc/model.py b/ml-agents/mlagents/trainers/components/bc/model.py index c68fd5742f..7f57a1ec7e 100644 --- a/ml-agents/mlagents/trainers/components/bc/model.py +++ b/ml-agents/mlagents/trainers/components/bc/model.py @@ -74,7 +74,7 @@ def create_loss(self, learning_rate: float, anneal_steps: int) -> None: power=1.0, ) else: - self.annealed_learning_rate = learning_rate + self.annealed_learning_rate = tf.Variable(learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate=self.annealed_learning_rate) self.update_batch = optimizer.minimize(self.loss) diff --git a/ml-agents/mlagents/trainers/tests/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/test_bcmodule.py index 24f7b7efb7..3a26fd9f56 100644 --- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py +++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py @@ -138,6 +138,26 @@ def test_bcmodule_update(mock_env, trainer_config): env.close() +# Test with constant pretraining learning rate +@pytest.mark.parametrize( + "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"] +) +@mock.patch("mlagents.envs.environment.UnityEnvironment") +def test_bcmodule_constant_lr_update(mock_env, trainer_config): + mock_brain = mb.create_mock_3dball_brain() + trainer_config["pretraining"]["steps"] = 0 + env, policy = create_policy_with_bc_mock( + mock_env, mock_brain, trainer_config, False, "test.demo" + ) + stats = policy.bc_module.update() + for _, item in stats.items(): + assert isinstance(item, np.float32) + old_learning_rate = policy.bc_module.current_lr + + stats = policy.bc_module.update() + assert old_learning_rate == policy.bc_module.current_lr + + # Test with RNN @pytest.mark.parametrize( "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]