diff --git a/docs/Migrating.md b/docs/Migrating.md index 593d29f5a9..47330981c2 100644 --- a/docs/Migrating.md +++ b/docs/Migrating.md @@ -14,7 +14,21 @@ double-check that the versions are in the same. The versions can be found in # Migrating -## Migrating from Release 3 to latest +## Migrating from Release 7 to latest + +### Important changes +- Some trainer files were moved. If you were using the `TrainerFactory` class, it was moved to +the `trainers/trainer` folder. +- The `components` folder containing `bc` and `reward_signals` code was moved to the `trainers/tf` +folder + +### Steps to Migrate +- Replace calls to `from mlagents.trainers.trainer_util import TrainerFactory` to `from mlagents.trainers.trainer import TrainerFactory` +- Replace calls to `from mlagents.trainers.trainer_util import handle_existing_directories` to `from mlagents.trainers.directory_utils import validate_existing_directories` +- Replace `mlagents.trainers.components` with `mlagents.trainers.tf.components` in your import statements. + + +## Migrating from Release 3 to Release 7 ### Important changes - The Parameter Randomization feature has been merged with the Curriculum feature. It is now possible to specify a sampler diff --git a/ml-agents/mlagents/trainers/directory_utils.py b/ml-agents/mlagents/trainers/directory_utils.py new file mode 100644 index 0000000000..0e728ddc9d --- /dev/null +++ b/ml-agents/mlagents/trainers/directory_utils.py @@ -0,0 +1,42 @@ +import os +from mlagents.trainers.exception import UnityTrainerException + + +def validate_existing_directories( + output_path: str, resume: bool, force: bool, init_path: str = None +) -> None: + """ + Validates that if the run_id model exists, we do not overwrite it unless --force is specified. + Throws an exception if resume isn't specified and run_id exists. Throws an exception + if --resume is specified and run-id was not found. + :param model_path: The model path specified. + :param summary_path: The summary path to be used. + :param resume: Whether or not the --resume flag was passed. + :param force: Whether or not the --force flag was passed. + """ + + output_path_exists = os.path.isdir(output_path) + + if output_path_exists: + if not resume and not force: + raise UnityTrainerException( + "Previous data from this run ID was found. " + "Either specify a new run ID, use --resume to resume this run, " + "or use the --force parameter to overwrite existing data." + ) + else: + if resume: + raise UnityTrainerException( + "Previous data from this run ID was not found. " + "Train a new run by removing the --resume flag." + ) + + # Verify init path if specified. + if init_path is not None: + if not os.path.isdir(init_path): + raise UnityTrainerException( + "Could not initialize from {}. " + "Make sure models have already been saved with that run ID.".format( + init_path + ) + ) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 88edfc49cc..8a5e37a796 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -12,7 +12,8 @@ from mlagents import tf_utils from mlagents.trainers.trainer_controller import TrainerController from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager -from mlagents.trainers.trainer_util import TrainerFactory, handle_existing_directories +from mlagents.trainers.trainer import TrainerFactory +from mlagents.trainers.directory_utils import validate_existing_directories from mlagents.trainers.stats import ( TensorboardWriter, StatsReporter, @@ -75,7 +76,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: run_logs_dir = os.path.join(write_path, "run_logs") port: Optional[int] = env_settings.base_port # Check if directory exists - handle_existing_directories( + validate_existing_directories( write_path, checkpoint_settings.resume, checkpoint_settings.force, diff --git a/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py b/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py index 8a01384515..eb0533ba19 100644 --- a/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py +++ b/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py @@ -6,11 +6,11 @@ from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.optimizer import Optimizer from mlagents.trainers.trajectory import SplitObservations -from mlagents.trainers.components.reward_signals.reward_signal_factory import ( +from mlagents.trainers.tf.components.reward_signals.reward_signal_factory import ( create_reward_signal, ) from mlagents.trainers.settings import TrainerSettings, RewardSignalType -from mlagents.trainers.components.bc.module import BCModule +from mlagents.trainers.tf.components.bc.module import BCModule class TFOptimizer(Optimizer): # pylint: disable=W0223 diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index 0ba6ee9de6..c3777fc55a 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -16,7 +16,7 @@ from mlagents.trainers.trajectory import Trajectory from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers from mlagents.trainers.settings import TrainerSettings, PPOSettings, FrameworkType -from mlagents.trainers.components.reward_signals import RewardSignal +from mlagents.trainers.tf.components.reward_signals import RewardSignal from mlagents import torch_utils if torch_utils.is_available(): diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py index 220f6205d6..055d0a98b1 100644 --- a/ml-agents/mlagents/trainers/sac/trainer.py +++ b/ml-agents/mlagents/trainers/sac/trainer.py @@ -19,7 +19,7 @@ from mlagents.trainers.trajectory import Trajectory, SplitObservations from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers from mlagents.trainers.settings import TrainerSettings, SACSettings, FrameworkType -from mlagents.trainers.components.reward_signals import RewardSignal +from mlagents.trainers.tf.components.reward_signals import RewardSignal from mlagents import torch_utils if torch_utils.is_available(): diff --git a/ml-agents/mlagents/trainers/tests/check_env_trains.py b/ml-agents/mlagents/trainers/tests/check_env_trains.py index 48f8c3e279..2fadd3be31 100644 --- a/ml-agents/mlagents/trainers/tests/check_env_trains.py +++ b/ml-agents/mlagents/trainers/tests/check_env_trains.py @@ -3,7 +3,7 @@ import numpy as np from typing import Dict from mlagents.trainers.trainer_controller import TrainerController -from mlagents.trainers.trainer_util import TrainerFactory +from mlagents.trainers.trainer import TrainerFactory from mlagents.trainers.simple_env_manager import SimpleEnvManager from mlagents.trainers.stats import StatsReporter, StatsWriter, StatsSummary from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager diff --git a/ml-agents/mlagents/trainers/tests/tensorflow/test_bcmodule.py b/ml-agents/mlagents/trainers/tests/tensorflow/test_bcmodule.py index c20e8bda5c..1eceefd7d9 100644 --- a/ml-agents/mlagents/trainers/tests/tensorflow/test_bcmodule.py +++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_bcmodule.py @@ -4,7 +4,7 @@ import numpy as np from mlagents.trainers.policy.tf_policy import TFPolicy -from mlagents.trainers.components.bc.module import BCModule +from mlagents.trainers.tf.components.bc.module import BCModule from mlagents.trainers.settings import ( TrainerSettings, BehavioralCloningSettings, diff --git a/ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py index 4f11c34903..6eb1f8d3b0 100644 --- a/ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py +++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py @@ -11,7 +11,7 @@ RecordEnvironment, ) from mlagents.trainers.trainer_controller import TrainerController -from mlagents.trainers.trainer_util import TrainerFactory +from mlagents.trainers.trainer import TrainerFactory from mlagents.trainers.simple_env_manager import SimpleEnvManager from mlagents.trainers.demo_loader import write_demo from mlagents.trainers.stats import StatsReporter, StatsWriter, StatsSummary diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py index 0df04893eb..81e1e5847d 100644 --- a/ml-agents/mlagents/trainers/tests/test_learn.py +++ b/ml-agents/mlagents/trainers/tests/test_learn.py @@ -47,7 +47,7 @@ def basic_options(extra_args=None): @patch("mlagents.trainers.learn.write_timing_tree") @patch("mlagents.trainers.learn.write_run_options") -@patch("mlagents.trainers.learn.handle_existing_directories") +@patch("mlagents.trainers.learn.validate_existing_directories") @patch("mlagents.trainers.learn.TrainerFactory") @patch("mlagents.trainers.learn.SubprocessEnvManager") @patch("mlagents.trainers.learn.create_environment_factory") diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py index 8461bd5c31..1ff0fe7a8c 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py @@ -49,7 +49,6 @@ def trainer_controller_with_start_learning_mocks(basic_trainer_controller): trainer_mock.write_tensorboard_text = MagicMock() tc = basic_trainer_controller - tc.initialize_trainers = MagicMock() tc.trainers = {"testbrain": trainer_mock} tc.advance = MagicMock() tc.trainers["testbrain"].get_step = 0 diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_util.py b/ml-agents/mlagents/trainers/tests/test_trainer_util.py index 8398105ea1..62c7be9107 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py @@ -3,13 +3,14 @@ import os from unittest.mock import patch -from mlagents.trainers import trainer_util +from mlagents.trainers.trainer import TrainerFactory from mlagents.trainers.cli_utils import load_config, _load_config from mlagents.trainers.ppo.trainer import PPOTrainer from mlagents.trainers.exception import TrainerConfigError, UnityTrainerException from mlagents.trainers.settings import RunOptions from mlagents.trainers.tests.dummy_config import ppo_dummy_config from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager +from mlagents.trainers.directory_utils import validate_existing_directories @pytest.fixture @@ -49,7 +50,7 @@ def mock_constructor( assert artifact_path == os.path.join(output_path, brain_name) with patch.object(PPOTrainer, "__init__", mock_constructor): - trainer_factory = trainer_util.TrainerFactory( + trainer_factory = TrainerFactory( trainer_config=base_config, output_path=output_path, train_model=train_model, @@ -71,7 +72,7 @@ def test_handles_no_config_provided(): brain_name = "testbrain" no_default_config = RunOptions().behaviors - trainer_factory = trainer_util.TrainerFactory( + trainer_factory = TrainerFactory( trainer_config=no_default_config, output_path="output_path", train_model=True, @@ -112,25 +113,25 @@ def test_load_config_invalid_yaml(): def test_existing_directories(tmp_path): output_path = os.path.join(tmp_path, "runid") # Test fresh new unused path - should do nothing. - trainer_util.handle_existing_directories(output_path, False, False) + validate_existing_directories(output_path, False, False) # Test resume with fresh path - should throw an exception. with pytest.raises(UnityTrainerException): - trainer_util.handle_existing_directories(output_path, True, False) + validate_existing_directories(output_path, True, False) # make a directory os.mkdir(output_path) # Test try to train w.o. force, should complain with pytest.raises(UnityTrainerException): - trainer_util.handle_existing_directories(output_path, False, False) + validate_existing_directories(output_path, False, False) # Test try to train w/ resume - should work - trainer_util.handle_existing_directories(output_path, True, False) + validate_existing_directories(output_path, True, False) # Test try to train w/ force - should work - trainer_util.handle_existing_directories(output_path, False, True) + validate_existing_directories(output_path, False, True) # Test initialize option init_path = os.path.join(tmp_path, "runid2") with pytest.raises(UnityTrainerException): - trainer_util.handle_existing_directories(output_path, False, True, init_path) + validate_existing_directories(output_path, False, True, init_path) os.mkdir(init_path) # Should pass since the directory exists now. - trainer_util.handle_existing_directories(output_path, False, True, init_path) + validate_existing_directories(output_path, False, True, init_path) diff --git a/ml-agents/mlagents/trainers/components/__init__.py b/ml-agents/mlagents/trainers/tf/components/__init__.py similarity index 100% rename from ml-agents/mlagents/trainers/components/__init__.py rename to ml-agents/mlagents/trainers/tf/components/__init__.py diff --git a/ml-agents/mlagents/trainers/components/bc/__init__.py b/ml-agents/mlagents/trainers/tf/components/bc/__init__.py similarity index 100% rename from ml-agents/mlagents/trainers/components/bc/__init__.py rename to ml-agents/mlagents/trainers/tf/components/bc/__init__.py diff --git a/ml-agents/mlagents/trainers/components/bc/model.py b/ml-agents/mlagents/trainers/tf/components/bc/model.py similarity index 100% rename from ml-agents/mlagents/trainers/components/bc/model.py rename to ml-agents/mlagents/trainers/tf/components/bc/model.py diff --git a/ml-agents/mlagents/trainers/components/bc/module.py b/ml-agents/mlagents/trainers/tf/components/bc/module.py similarity index 100% rename from ml-agents/mlagents/trainers/components/bc/module.py rename to ml-agents/mlagents/trainers/tf/components/bc/module.py diff --git a/ml-agents/mlagents/trainers/components/reward_signals/__init__.py b/ml-agents/mlagents/trainers/tf/components/reward_signals/__init__.py similarity index 100% rename from ml-agents/mlagents/trainers/components/reward_signals/__init__.py rename to ml-agents/mlagents/trainers/tf/components/reward_signals/__init__.py diff --git a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/__init__.py b/ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/__init__.py similarity index 100% rename from ml-agents/mlagents/trainers/components/reward_signals/curiosity/__init__.py rename to ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/__init__.py diff --git a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py b/ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/model.py similarity index 100% rename from ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py rename to ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/model.py diff --git a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py b/ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/signal.py similarity index 95% rename from ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py rename to ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/signal.py index a18c48a393..63b5453ba1 100644 --- a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py +++ b/ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/signal.py @@ -2,8 +2,13 @@ import numpy as np from mlagents.tf_utils import tf -from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult -from mlagents.trainers.components.reward_signals.curiosity.model import CuriosityModel +from mlagents.trainers.tf.components.reward_signals import ( + RewardSignal, + RewardSignalResult, +) +from mlagents.trainers.tf.components.reward_signals.curiosity.model import ( + CuriosityModel, +) from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.buffer import AgentBuffer from mlagents.trainers.settings import CuriositySettings diff --git a/ml-agents/mlagents/trainers/components/reward_signals/extrinsic/__init__.py b/ml-agents/mlagents/trainers/tf/components/reward_signals/extrinsic/__init__.py similarity index 100% rename from ml-agents/mlagents/trainers/components/reward_signals/extrinsic/__init__.py rename to ml-agents/mlagents/trainers/tf/components/reward_signals/extrinsic/__init__.py diff --git a/ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py b/ml-agents/mlagents/trainers/tf/components/reward_signals/extrinsic/signal.py similarity index 76% rename from ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py rename to ml-agents/mlagents/trainers/tf/components/reward_signals/extrinsic/signal.py index cbe2c1d4f5..f975c36919 100644 --- a/ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py +++ b/ml-agents/mlagents/trainers/tf/components/reward_signals/extrinsic/signal.py @@ -1,6 +1,9 @@ import numpy as np -from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult +from mlagents.trainers.tf.components.reward_signals import ( + RewardSignal, + RewardSignalResult, +) from mlagents.trainers.buffer import AgentBuffer diff --git a/ml-agents/mlagents/trainers/components/reward_signals/gail/__init__.py b/ml-agents/mlagents/trainers/tf/components/reward_signals/gail/__init__.py similarity index 100% rename from ml-agents/mlagents/trainers/components/reward_signals/gail/__init__.py rename to ml-agents/mlagents/trainers/tf/components/reward_signals/gail/__init__.py diff --git a/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py b/ml-agents/mlagents/trainers/tf/components/reward_signals/gail/model.py similarity index 100% rename from ml-agents/mlagents/trainers/components/reward_signals/gail/model.py rename to ml-agents/mlagents/trainers/tf/components/reward_signals/gail/model.py diff --git a/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py b/ml-agents/mlagents/trainers/tf/components/reward_signals/gail/signal.py similarity index 96% rename from ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py rename to ml-agents/mlagents/trainers/tf/components/reward_signals/gail/signal.py index a61a177512..baeec49ac6 100644 --- a/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py +++ b/ml-agents/mlagents/trainers/tf/components/reward_signals/gail/signal.py @@ -2,9 +2,12 @@ import numpy as np from mlagents.tf_utils import tf -from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult +from mlagents.trainers.tf.components.reward_signals import ( + RewardSignal, + RewardSignalResult, +) from mlagents.trainers.policy.tf_policy import TFPolicy -from .model import GAILModel +from mlagents.trainers.tf.components.reward_signals.gail.model import GAILModel from mlagents.trainers.demo_loader import demo_to_buffer from mlagents.trainers.buffer import AgentBuffer from mlagents.trainers.settings import GAILSettings diff --git a/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py b/ml-agents/mlagents/trainers/tf/components/reward_signals/reward_signal_factory.py similarity index 78% rename from ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py rename to ml-agents/mlagents/trainers/tf/components/reward_signals/reward_signal_factory.py index bb29eaa10b..02fa63a9bc 100644 --- a/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py +++ b/ml-agents/mlagents/trainers/tf/components/reward_signals/reward_signal_factory.py @@ -1,11 +1,11 @@ from typing import Dict, Type from mlagents.trainers.exception import UnityTrainerException -from mlagents.trainers.components.reward_signals import RewardSignal -from mlagents.trainers.components.reward_signals.extrinsic.signal import ( +from mlagents.trainers.tf.components.reward_signals import RewardSignal +from mlagents.trainers.tf.components.reward_signals.extrinsic.signal import ( ExtrinsicRewardSignal, ) -from mlagents.trainers.components.reward_signals.gail.signal import GAILRewardSignal -from mlagents.trainers.components.reward_signals.curiosity.signal import ( +from mlagents.trainers.tf.components.reward_signals.gail.signal import GAILRewardSignal +from mlagents.trainers.tf.components.reward_signals.curiosity.signal import ( CuriosityRewardSignal, ) from mlagents.trainers.policy.tf_policy import TFPolicy diff --git a/ml-agents/mlagents/trainers/trainer/__init__.py b/ml-agents/mlagents/trainers/trainer/__init__.py index dab1508ebd..620008bdb1 100644 --- a/ml-agents/mlagents/trainers/trainer/__init__.py +++ b/ml-agents/mlagents/trainers/trainer/__init__.py @@ -1 +1,2 @@ from mlagents.trainers.trainer.trainer import Trainer # noqa +from mlagents.trainers.trainer.trainer_factory import TrainerFactory # noqa diff --git a/ml-agents/mlagents/trainers/trainer/rl_trainer.py b/ml-agents/mlagents/trainers/trainer/rl_trainer.py index da8c172615..ad311bf7b1 100644 --- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py +++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py @@ -13,7 +13,10 @@ from mlagents.trainers.optimizer import Optimizer from mlagents.trainers.buffer import AgentBuffer from mlagents.trainers.trainer import Trainer -from mlagents.trainers.components.reward_signals import RewardSignalResult, RewardSignal +from mlagents.trainers.tf.components.reward_signals import ( + RewardSignalResult, + RewardSignal, +) from mlagents_envs.timers import hierarchical_timer from mlagents_envs.base_env import BehaviorSpec from mlagents.trainers.policy.policy import Policy diff --git a/ml-agents/mlagents/trainers/trainer/trainer_factory.py b/ml-agents/mlagents/trainers/trainer/trainer_factory.py new file mode 100644 index 0000000000..daea1ccd0c --- /dev/null +++ b/ml-agents/mlagents/trainers/trainer/trainer_factory.py @@ -0,0 +1,156 @@ +import os +from typing import Dict + +from mlagents_envs.logging_util import get_logger +from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager +from mlagents.trainers.exception import TrainerConfigError +from mlagents.trainers.trainer import Trainer +from mlagents.trainers.ppo.trainer import PPOTrainer +from mlagents.trainers.sac.trainer import SACTrainer +from mlagents.trainers.ghost.trainer import GhostTrainer +from mlagents.trainers.ghost.controller import GhostController +from mlagents.trainers.settings import TrainerSettings, TrainerType, FrameworkType + + +logger = get_logger(__name__) + + +class TrainerFactory: + def __init__( + self, + trainer_config: Dict[str, TrainerSettings], + output_path: str, + train_model: bool, + load_model: bool, + seed: int, + param_manager: EnvironmentParameterManager, + init_path: str = None, + multi_gpu: bool = False, + force_torch: bool = False, + ): + """ + The TrainerFactory generates the Trainers based on the configuration passed as + input. + :param trainer_config: A dictionary from behavior name to TrainerSettings + :param output_path: The path to the directory where the artifacts generated by + the trainer will be saved. + :param train_model: If True, the Trainers will train the model and if False, + only perform inference. + :param load_model: If True, the Trainer will load neural networks weights from + the previous run. + :param seed: The seed of the Trainers. Dictates how the neural networks will be + initialized. + :param param_manager: The EnvironmentParameterManager that will dictate when/if + the EnvironmentParameters must change. + :param init_path: Path from which to load model. + :param multi_gpu: If True, multi-gpu will be used. (currently not available) + :param force_torch: If True, the Trainers will all use the PyTorch framework + instead of the TensorFlow framework. + """ + self.trainer_config = trainer_config + self.output_path = output_path + self.init_path = init_path + self.train_model = train_model + self.load_model = load_model + self.seed = seed + self.param_manager = param_manager + self.multi_gpu = multi_gpu + self.ghost_controller = GhostController() + self._force_torch = force_torch + + def generate(self, behavior_name: str) -> Trainer: + if behavior_name not in self.trainer_config.keys(): + logger.warning( + f"Behavior name {behavior_name} does not match any behaviors specified" + f"in the trainer configuration file: {sorted(self.trainer_config.keys())}" + ) + trainer_settings = self.trainer_config[behavior_name] + if self._force_torch: + trainer_settings.framework = FrameworkType.PYTORCH + return TrainerFactory._initialize_trainer( + trainer_settings, + behavior_name, + self.output_path, + self.train_model, + self.load_model, + self.ghost_controller, + self.seed, + self.param_manager, + self.init_path, + self.multi_gpu, + ) + + @staticmethod + def _initialize_trainer( + trainer_settings: TrainerSettings, + brain_name: str, + output_path: str, + train_model: bool, + load_model: bool, + ghost_controller: GhostController, + seed: int, + param_manager: EnvironmentParameterManager, + init_path: str = None, + multi_gpu: bool = False, + ) -> Trainer: + """ + Initializes a trainer given a provided trainer configuration and brain parameters, as well as + some general training session options. + + :param trainer_settings: Original trainer configuration loaded from YAML + :param brain_name: Name of the brain to be associated with trainer + :param output_path: Path to save the model and summary statistics + :param keep_checkpoints: How many model checkpoints to keep + :param train_model: Whether to train the model (vs. run inference) + :param load_model: Whether to load the model or randomly initialize + :param ghost_controller: The object that coordinates ghost trainers + :param seed: The random seed to use + :param param_manager: EnvironmentParameterManager, used to determine a reward buffer length for PPOTrainer + :param init_path: Path from which to load model, if different from model_path. + :return: + """ + trainer_artifact_path = os.path.join(output_path, brain_name) + if init_path is not None: + trainer_settings.init_path = os.path.join(init_path, brain_name) + + min_lesson_length = param_manager.get_minimum_reward_buffer_size(brain_name) + + trainer: Trainer = None # type: ignore # will be set to one of these, or raise + trainer_type = trainer_settings.trainer_type + + if trainer_type == TrainerType.PPO: + trainer = PPOTrainer( + brain_name, + min_lesson_length, + trainer_settings, + train_model, + load_model, + seed, + trainer_artifact_path, + ) + elif trainer_type == TrainerType.SAC: + trainer = SACTrainer( + brain_name, + min_lesson_length, + trainer_settings, + train_model, + load_model, + seed, + trainer_artifact_path, + ) + else: + raise TrainerConfigError( + f'The trainer config contains an unknown trainer type "{trainer_type}" for brain {brain_name}' + ) + + if trainer_settings.self_play is not None: + trainer = GhostTrainer( + trainer, + brain_name, + ghost_controller, + min_lesson_length, + trainer_settings, + train_model, + trainer_artifact_path, + ) + return trainer diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py index 3be4a6b43b..95ae70ccca 100644 --- a/ml-agents/mlagents/trainers/trainer_controller.py +++ b/ml-agents/mlagents/trainers/trainer_controller.py @@ -25,7 +25,7 @@ ) from mlagents.trainers.trainer import Trainer from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager -from mlagents.trainers.trainer_util import TrainerFactory +from mlagents.trainers.trainer import TrainerFactory from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers from mlagents.trainers.agent_processor import AgentManager from mlagents.tf_utils.globals import get_rank diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py deleted file mode 100644 index 08f87488be..0000000000 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ /dev/null @@ -1,197 +0,0 @@ -import os -from typing import Dict - -from mlagents_envs.logging_util import get_logger -from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager -from mlagents.trainers.exception import TrainerConfigError -from mlagents.trainers.trainer import Trainer -from mlagents.trainers.exception import UnityTrainerException -from mlagents.trainers.ppo.trainer import PPOTrainer -from mlagents.trainers.sac.trainer import SACTrainer -from mlagents.trainers.ghost.trainer import GhostTrainer -from mlagents.trainers.ghost.controller import GhostController -from mlagents.trainers.settings import TrainerSettings, TrainerType, FrameworkType - - -logger = get_logger(__name__) - - -class TrainerFactory: - def __init__( - self, - trainer_config: Dict[str, TrainerSettings], - output_path: str, - train_model: bool, - load_model: bool, - seed: int, - param_manager: EnvironmentParameterManager, - init_path: str = None, - multi_gpu: bool = False, - force_torch: bool = False, - ): - """ - The TrainerFactory generates the Trainers based on the configuration passed as - input. - :param trainer_config: A dictionary from behavior name to TrainerSettings - :param output_path: The path to the directory where the artifacts generated by - the trainer will be saved. - :param train_model: If True, the Trainers will train the model and if False, - only perform inference. - :param load_model: If True, the Trainer will load neural networks weights from - the previous run. - :param seed: The seed of the Trainers. Dictates how the neural networks will be - initialized. - :param param_manager: The EnvironmentParameterManager that will dictate when/if - the EnvironmentParameters must change. - :param init_path: Path from which to load model. - :param multi_gpu: If True, multi-gpu will be used. (currently not available) - :param force_torch: If True, the Trainers will all use the PyTorch framework - instead of the TensorFlow framework. - """ - self.trainer_config = trainer_config - self.output_path = output_path - self.init_path = init_path - self.train_model = train_model - self.load_model = load_model - self.seed = seed - self.param_manager = param_manager - self.multi_gpu = multi_gpu - self.ghost_controller = GhostController() - self._force_torch = force_torch - - def generate(self, behavior_name: str) -> Trainer: - if behavior_name not in self.trainer_config.keys(): - logger.warning( - f"Behavior name {behavior_name} does not match any behaviors specified" - f"in the trainer configuration file: {sorted(self.trainer_config.keys())}" - ) - trainer_settings = self.trainer_config[behavior_name] - if self._force_torch: - trainer_settings.framework = FrameworkType.PYTORCH - return initialize_trainer( - trainer_settings, - behavior_name, - self.output_path, - self.train_model, - self.load_model, - self.ghost_controller, - self.seed, - self.param_manager, - self.init_path, - self.multi_gpu, - ) - - -def initialize_trainer( - trainer_settings: TrainerSettings, - brain_name: str, - output_path: str, - train_model: bool, - load_model: bool, - ghost_controller: GhostController, - seed: int, - param_manager: EnvironmentParameterManager, - init_path: str = None, - multi_gpu: bool = False, -) -> Trainer: - """ - Initializes a trainer given a provided trainer configuration and brain parameters, as well as - some general training session options. - - :param trainer_settings: Original trainer configuration loaded from YAML - :param brain_name: Name of the brain to be associated with trainer - :param output_path: Path to save the model and summary statistics - :param keep_checkpoints: How many model checkpoints to keep - :param train_model: Whether to train the model (vs. run inference) - :param load_model: Whether to load the model or randomly initialize - :param ghost_controller: The object that coordinates ghost trainers - :param seed: The random seed to use - :param param_manager: EnvironmentParameterManager, used to determine a reward buffer length for PPOTrainer - :param init_path: Path from which to load model, if different from model_path. - :return: - """ - trainer_artifact_path = os.path.join(output_path, brain_name) - if init_path is not None: - trainer_settings.init_path = os.path.join(init_path, brain_name) - - min_lesson_length = param_manager.get_minimum_reward_buffer_size(brain_name) - - trainer: Trainer = None # type: ignore # will be set to one of these, or raise - trainer_type = trainer_settings.trainer_type - - if trainer_type == TrainerType.PPO: - trainer = PPOTrainer( - brain_name, - min_lesson_length, - trainer_settings, - train_model, - load_model, - seed, - trainer_artifact_path, - ) - elif trainer_type == TrainerType.SAC: - trainer = SACTrainer( - brain_name, - min_lesson_length, - trainer_settings, - train_model, - load_model, - seed, - trainer_artifact_path, - ) - else: - raise TrainerConfigError( - f'The trainer config contains an unknown trainer type "{trainer_type}" for brain {brain_name}' - ) - - if trainer_settings.self_play is not None: - trainer = GhostTrainer( - trainer, - brain_name, - ghost_controller, - min_lesson_length, - trainer_settings, - train_model, - trainer_artifact_path, - ) - return trainer - - -def handle_existing_directories( - output_path: str, resume: bool, force: bool, init_path: str = None -) -> None: - """ - Validates that if the run_id model exists, we do not overwrite it unless --force is specified. - Throws an exception if resume isn't specified and run_id exists. Throws an exception - if --resume is specified and run-id was not found. - :param model_path: The model path specified. - :param summary_path: The summary path to be used. - :param resume: Whether or not the --resume flag was passed. - :param force: Whether or not the --force flag was passed. - """ - - output_path_exists = os.path.isdir(output_path) - - if output_path_exists: - if not resume and not force: - raise UnityTrainerException( - "Previous data from this run ID was found. " - "Either specify a new run ID, use --resume to resume this run, " - "or use the --force parameter to overwrite existing data." - ) - else: - if resume: - raise UnityTrainerException( - "Previous data from this run ID was not found. " - "Train a new run by removing the --resume flag." - ) - - # Verify init path if specified. - if init_path is not None: - if not os.path.isdir(init_path): - raise UnityTrainerException( - "Could not initialize from {}. " - "Make sure models have already been saved with that run ID.".format( - init_path - ) - )