From 99cb5253a20af958fef937af36bce550e10a4516 Mon Sep 17 00:00:00 2001 From: Ruo-Ping Dong Date: Tue, 25 Aug 2020 15:39:48 -0700 Subject: [PATCH] brain_name to behavior_name --- .../mlagents/trainers/model_saver/model_saver.py | 10 +++++----- .../mlagents/trainers/model_saver/tf_model_saver.py | 10 +++++----- .../trainers/model_saver/torch_model_saver.py | 8 ++++---- ml-agents/mlagents/trainers/policy/torch_policy.py | 4 ++-- .../trainers/ppo/{optimizer.py => optimizer_tf.py} | 2 +- ml-agents/mlagents/trainers/ppo/trainer.py | 13 +++++++++---- .../trainers/sac/{optimizer.py => optimizer_tf.py} | 0 ml-agents/mlagents/trainers/sac/trainer.py | 13 +++++++++---- ml-agents/mlagents/trainers/tests/test_ppo.py | 2 +- .../mlagents/trainers/tests/test_reward_signals.py | 4 ++-- ml-agents/mlagents/trainers/tests/test_sac.py | 2 +- ml-agents/mlagents/trainers/tests/test_saver.py | 2 +- ml-agents/mlagents/trainers/tf/models.py | 4 ++-- .../mlagents/trainers/torch/model_serialization.py | 1 - 14 files changed, 42 insertions(+), 33 deletions(-) rename ml-agents/mlagents/trainers/ppo/{optimizer.py => optimizer_tf.py} (99%) rename ml-agents/mlagents/trainers/sac/{optimizer.py => optimizer_tf.py} (100%) diff --git a/ml-agents/mlagents/trainers/model_saver/model_saver.py b/ml-agents/mlagents/trainers/model_saver/model_saver.py index 5247e10df3..c88a358847 100644 --- a/ml-agents/mlagents/trainers/model_saver/model_saver.py +++ b/ml-agents/mlagents/trainers/model_saver/model_saver.py @@ -34,23 +34,23 @@ def _register_optimizer(self, optimizer): pass @abc.abstractmethod - def save_checkpoint(self, brain_name: str, step: int) -> str: + def save_checkpoint(self, behavior_name: str, step: int) -> str: """ Checkpoints the policy on disk. :param checkpoint_path: filepath to write the checkpoint - :param brain_name: Brain name of brain to be trained + :param behavior_name: Behavior name of bevavior to be trained """ pass @abc.abstractmethod - def export(self, output_filepath: str, brain_name: str) -> None: + def export(self, output_filepath: str, behavior_name: str) -> None: """ - Saves the serialized model, given a path and brain name. + Saves the serialized model, given a path and behavior name. This method will save the policy graph to the given filepath. The path should be provided without an extension as multiple serialized model formats may be generated as a result. :param output_filepath: path (without suffix) for the model file(s) - :param brain_name: Brain name of brain to be trained. + :param behavior_name: Behavior name of behavior to be trained. """ pass diff --git a/ml-agents/mlagents/trainers/model_saver/tf_model_saver.py b/ml-agents/mlagents/trainers/model_saver/tf_model_saver.py index 76848759a3..0883ae5325 100644 --- a/ml-agents/mlagents/trainers/model_saver/tf_model_saver.py +++ b/ml-agents/mlagents/trainers/model_saver/tf_model_saver.py @@ -55,8 +55,8 @@ def _register_policy(self, policy: TFPolicy) -> None: with self.policy.graph.as_default(): self.tf_saver = tf.train.Saver(max_to_keep=self._keep_checkpoints) - def save_checkpoint(self, brain_name: str, step: int) -> str: - checkpoint_path = os.path.join(self.model_path, f"{brain_name}-{step}") + def save_checkpoint(self, behavior_name: str, step: int) -> str: + checkpoint_path = os.path.join(self.model_path, f"{behavior_name}-{step}") # Save the TF checkpoint and graph definition if self.graph: with self.graph.as_default(): @@ -66,16 +66,16 @@ def save_checkpoint(self, brain_name: str, step: int) -> str: self.graph, self.model_path, "raw_graph_def.pb", as_text=False ) # also save the policy so we have optimized model files for each checkpoint - self.export(checkpoint_path, brain_name) + self.export(checkpoint_path, behavior_name) return checkpoint_path - def export(self, output_filepath: str, brain_name: str) -> None: + def export(self, output_filepath: str, behavior_name: str) -> None: # save model if there is only one worker or # only on worker-0 if there are multiple workers if self.policy and self.policy.rank is not None and self.policy.rank != 0: return export_policy_model( - self.model_path, output_filepath, brain_name, self.graph, self.sess + self.model_path, output_filepath, behavior_name, self.graph, self.sess ) def initialize_or_load(self, policy: Optional[TFPolicy] = None) -> None: diff --git a/ml-agents/mlagents/trainers/model_saver/torch_model_saver.py b/ml-agents/mlagents/trainers/model_saver/torch_model_saver.py index 20f7f62394..762b858de1 100644 --- a/ml-agents/mlagents/trainers/model_saver/torch_model_saver.py +++ b/ml-agents/mlagents/trainers/model_saver/torch_model_saver.py @@ -45,19 +45,19 @@ def register(self, module: Union[TorchPolicy, TorchOptimizer]) -> None: self.policy = module self.exporter = ModelSerializer(self.policy) - def save_checkpoint(self, brain_name: str, step: int) -> str: + def save_checkpoint(self, behavior_name: str, step: int) -> str: if not os.path.exists(self.model_path): os.makedirs(self.model_path) - checkpoint_path = os.path.join(self.model_path, f"{brain_name}-{step}") + checkpoint_path = os.path.join(self.model_path, f"{behavior_name}-{step}") state_dict = { name: module.state_dict() for name, module in self.modules.items() } torch.save(state_dict, f"{checkpoint_path}.pt") torch.save(state_dict, os.path.join(self.model_path, "checkpoint.pt")) - self.export(checkpoint_path, brain_name) + self.export(checkpoint_path, behavior_name) return checkpoint_path - def export(self, output_filepath: str, brain_name: str) -> None: + def export(self, output_filepath: str, behavior_name: str) -> None: if self.exporter is not None: self.exporter.export_policy_model(output_filepath) diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py index 96083bc205..53c77ba32c 100644 --- a/ml-agents/mlagents/trainers/policy/torch_policy.py +++ b/ml-agents/mlagents/trainers/policy/torch_policy.py @@ -37,7 +37,7 @@ def __init__( also use a CNN to encode visual input prior to the MLP. Supports discrete and continuous action spaces, as well as recurrent networks. :param seed: Random seed. - :param brain: Assigned BrainParameters object. + :param behavior_spec: Assigned BehaviorSpec object. :param trainer_settings: Defined training parameters. :param load: Whether a pre-trained model will be loaded or a new one created. :param tanh_squash: Whether to use a tanh function on the continuous output, @@ -214,7 +214,7 @@ def get_action( """ Decides actions given observations information, and takes them in environment. :param worker_id: - :param decision_requests: A dictionary of brain names and BrainInfo from environment. + :param decision_requests: A dictionary of behavior names and DecisionSteps from environment. :return: an ActionInfo containing action, memories, values and an object to be passed to add experiences """ diff --git a/ml-agents/mlagents/trainers/ppo/optimizer.py b/ml-agents/mlagents/trainers/ppo/optimizer_tf.py similarity index 99% rename from ml-agents/mlagents/trainers/ppo/optimizer.py rename to ml-agents/mlagents/trainers/ppo/optimizer_tf.py index e77ea21c5a..05ce4503c8 100644 --- a/ml-agents/mlagents/trainers/ppo/optimizer.py +++ b/ml-agents/mlagents/trainers/ppo/optimizer_tf.py @@ -177,7 +177,7 @@ def _create_dc_critic( name="old_probabilities", ) - # Break old log probs into separate branches + # Break old log log_probs into separate branches old_log_prob_branches = ModelUtils.break_into_branches( self.all_old_log_probs, self.policy.act_size ) diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index e6dbd09bc0..9925d18b3a 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -12,7 +12,7 @@ from mlagents.trainers.trainer.rl_trainer import RLTrainer from mlagents.trainers.policy import Policy from mlagents.trainers.policy.tf_policy import TFPolicy -from mlagents.trainers.ppo.optimizer import PPOOptimizer +from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer from mlagents.trainers.trajectory import Trajectory from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers from mlagents.trainers.settings import TrainerSettings, PPOSettings, FrameworkType @@ -34,7 +34,7 @@ class PPOTrainer(RLTrainer): def __init__( self, - brain_name: str, + behavior_name: str, reward_buff_cap: int, trainer_settings: TrainerSettings, training: bool, @@ -44,7 +44,7 @@ def __init__( ): """ Responsible for collecting experiences and training PPO model. - :param brain_name: The name of the brain associated with trainer config + :param behavior_name: The name of the behavior associated with trainer config :param reward_buff_cap: Max reward history to track in the reward buffer :param trainer_settings: The parameters for the trainer. :param training: Whether the trainer is set for training. @@ -53,7 +53,12 @@ def __init__( :param artifact_path: The directory within which to store artifacts from this trainer. """ super().__init__( - brain_name, trainer_settings, training, load, artifact_path, reward_buff_cap + behavior_name, + trainer_settings, + training, + load, + artifact_path, + reward_buff_cap, ) self.hyperparameters: PPOSettings = cast( PPOSettings, self.trainer_settings.hyperparameters diff --git a/ml-agents/mlagents/trainers/sac/optimizer.py b/ml-agents/mlagents/trainers/sac/optimizer_tf.py similarity index 100% rename from ml-agents/mlagents/trainers/sac/optimizer.py rename to ml-agents/mlagents/trainers/sac/optimizer_tf.py diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py index b0e27846b0..b566d387f3 100644 --- a/ml-agents/mlagents/trainers/sac/trainer.py +++ b/ml-agents/mlagents/trainers/sac/trainer.py @@ -14,7 +14,7 @@ from mlagents_envs.base_env import BehaviorSpec from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.policy import Policy -from mlagents.trainers.sac.optimizer import SACOptimizer +from mlagents.trainers.sac.optimizer_tf import SACOptimizer from mlagents.trainers.trainer.rl_trainer import RLTrainer from mlagents.trainers.trajectory import Trajectory, SplitObservations from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers @@ -41,7 +41,7 @@ class SACTrainer(RLTrainer): def __init__( self, - brain_name: str, + behavior_name: str, reward_buff_cap: int, trainer_settings: TrainerSettings, training: bool, @@ -51,7 +51,7 @@ def __init__( ): """ Responsible for collecting experiences and training SAC model. - :param brain_name: The name of the brain associated with trainer config + :param behavior_name: The name of the behavior associated with trainer config :param reward_buff_cap: Max reward history to track in the reward buffer :param trainer_settings: The parameters for the trainer. :param training: Whether the trainer is set for training. @@ -60,7 +60,12 @@ def __init__( :param artifact_path: The directory within which to store artifacts from this trainer. """ super().__init__( - brain_name, trainer_settings, training, load, artifact_path, reward_buff_cap + behavior_name, + trainer_settings, + training, + load, + artifact_path, + reward_buff_cap, ) self.seed = seed diff --git a/ml-agents/mlagents/trainers/tests/test_ppo.py b/ml-agents/mlagents/trainers/tests/test_ppo.py index b88f9df3ac..9b12315499 100644 --- a/ml-agents/mlagents/trainers/tests/test_ppo.py +++ b/ml-agents/mlagents/trainers/tests/test_ppo.py @@ -9,7 +9,7 @@ from mlagents.trainers.trainer.rl_trainer import RLTrainer from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards -from mlagents.trainers.ppo.optimizer import PPOOptimizer +from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.agent_processor import AgentManagerQueue from mlagents.trainers.tests import mock_brain as mb diff --git a/ml-agents/mlagents/trainers/tests/test_reward_signals.py b/ml-agents/mlagents/trainers/tests/test_reward_signals.py index 4adc931a9e..5760f8fa96 100644 --- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py +++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py @@ -3,8 +3,8 @@ import os import mlagents.trainers.tests.mock_brain as mb from mlagents.trainers.policy.tf_policy import TFPolicy -from mlagents.trainers.sac.optimizer import SACOptimizer -from mlagents.trainers.ppo.optimizer import PPOOptimizer +from mlagents.trainers.sac.optimizer_tf import SACOptimizer +from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG, SAC_CONFIG from mlagents.trainers.settings import ( GAILSettings, diff --git a/ml-agents/mlagents/trainers/tests/test_sac.py b/ml-agents/mlagents/trainers/tests/test_sac.py index 7d42bcdeba..7d76484ff4 100644 --- a/ml-agents/mlagents/trainers/tests/test_sac.py +++ b/ml-agents/mlagents/trainers/tests/test_sac.py @@ -7,7 +7,7 @@ from mlagents.trainers.trainer.rl_trainer import RLTrainer from mlagents.trainers.sac.trainer import SACTrainer -from mlagents.trainers.sac.optimizer import SACOptimizer +from mlagents.trainers.sac.optimizer_tf import SACOptimizer from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.agent_processor import AgentManagerQueue from mlagents.trainers.tests import mock_brain as mb diff --git a/ml-agents/mlagents/trainers/tests/test_saver.py b/ml-agents/mlagents/trainers/tests/test_saver.py index 1e9259fa7b..ca3016b09d 100644 --- a/ml-agents/mlagents/trainers/tests/test_saver.py +++ b/ml-agents/mlagents/trainers/tests/test_saver.py @@ -12,7 +12,7 @@ from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.tests import mock_brain as mb from mlagents.trainers.tests.test_nn_policy import create_policy_mock -from mlagents.trainers.ppo.optimizer import PPOOptimizer +from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer def test_register(tmp_path): diff --git a/ml-agents/mlagents/trainers/tf/models.py b/ml-agents/mlagents/trainers/tf/models.py index 7c5d0770ad..be7d4f8c10 100644 --- a/ml-agents/mlagents/trainers/tf/models.py +++ b/ml-agents/mlagents/trainers/tf/models.py @@ -510,8 +510,8 @@ def create_discrete_action_masking_layer( :param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action] :param action_size: A list containing the number of possible actions for each branch :return: The action output dimension [batch_size, num_branches], the concatenated - normalized probs (after softmax) - and the concatenated normalized log probs + normalized log_probs (after softmax) + and the concatenated normalized log log_probs """ branch_masks = ModelUtils.break_into_branches(action_masks, action_size) raw_probs = [ diff --git a/ml-agents/mlagents/trainers/torch/model_serialization.py b/ml-agents/mlagents/trainers/torch/model_serialization.py index 311af8d7fc..9d7716a318 100644 --- a/ml-agents/mlagents/trainers/torch/model_serialization.py +++ b/ml-agents/mlagents/trainers/torch/model_serialization.py @@ -54,7 +54,6 @@ def export_policy_model(self, output_filepath: str) -> None: Exports a Torch model for a Policy to .onnx format for Unity embedding. :param output_filepath: file path to output the model (without file suffix) - :param brain_name: Brain name of brain to be trained """ if not os.path.exists(output_filepath): os.makedirs(output_filepath)