From 9ee1cb34aa4a350dc7166270e4876c3f329774f5 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Wed, 19 Aug 2020 17:27:16 -0700 Subject: [PATCH 1/2] clean up docstrings create policies --- ml-agents/mlagents/trainers/ppo/trainer.py | 9 +++++---- ml-agents/mlagents/trainers/sac/trainer.py | 9 ++++++++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index c2a712a02f..fe1424ae3e 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -216,9 +216,10 @@ def create_tf_policy( create_graph: bool = False, ) -> TFPolicy: """ - Creates a PPO policy to trainers list of policies. + Creates a policy with a Tensorflow backend and PPO hyperparameters + :param parsed_behavior_id: :param behavior_spec: specifications for policy construction - :param create_graph: whether to create the graph when policy is constructed + :param create_graph: whether to create the tensorflow graph on construction :return policy """ policy = TFPolicy( @@ -234,9 +235,9 @@ def create_torch_policy( self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec ) -> TorchPolicy: """ - Creates a PPO policy to trainers list of policies. + Creates a policy with a PyTorch backend and PPO hyperparameters :param parsed_behavior_id: - :param brain_parameters: specifications for policy construction + :param behavior_spec: specifications for policy construction :return policy """ policy = TorchPolicy( diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py index 11cc6762c5..8fbf535a0f 100644 --- a/ml-agents/mlagents/trainers/sac/trainer.py +++ b/ml-agents/mlagents/trainers/sac/trainer.py @@ -233,6 +233,13 @@ def create_tf_policy( behavior_spec: BehaviorSpec, create_graph: bool = False, ) -> TFPolicy: + """ + Creates a policy with a Tensorflow backend and SAC hyperparameters + :param parsed_behavior_id: + :param behavior_spec: specifications for policy construction + :param create_graph: whether to create the tensorflow graph on construction + :return policy + """ policy = TFPolicy( self.seed, behavior_spec, @@ -248,7 +255,7 @@ def create_torch_policy( self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec ) -> TorchPolicy: """ - Creates a PPO policy to trainers list of policies. + Creates a policy with a PyTorch backend and SAC hyperparameters :param parsed_behavior_id: :param behavior_spec: specifications for policy construction :return policy From 7899905aa4b8a6e4c82739f320e1416713998537 Mon Sep 17 00:00:00 2001 From: Andrew Cohen Date: Thu, 20 Aug 2020 10:06:11 -0700 Subject: [PATCH 2/2] capitalize Tensorflow --- ml-agents/mlagents/trainers/ppo/trainer.py | 2 +- ml-agents/mlagents/trainers/sac/trainer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index fe1424ae3e..fbcfb56f1e 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -219,7 +219,7 @@ def create_tf_policy( Creates a policy with a Tensorflow backend and PPO hyperparameters :param parsed_behavior_id: :param behavior_spec: specifications for policy construction - :param create_graph: whether to create the tensorflow graph on construction + :param create_graph: whether to create the Tensorflow graph on construction :return policy """ policy = TFPolicy( diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py index 8fbf535a0f..6be7eb9524 100644 --- a/ml-agents/mlagents/trainers/sac/trainer.py +++ b/ml-agents/mlagents/trainers/sac/trainer.py @@ -237,7 +237,7 @@ def create_tf_policy( Creates a policy with a Tensorflow backend and SAC hyperparameters :param parsed_behavior_id: :param behavior_spec: specifications for policy construction - :param create_graph: whether to create the tensorflow graph on construction + :param create_graph: whether to create the Tensorflow graph on construction :return policy """ policy = TFPolicy(