Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
. venv/bin/activate
mkdir test-reports
pip freeze > test-reports/pip_versions.txt
pytest -n 2 --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=test-reports/junit.xml -p no:warnings
pytest --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=test-reports/junit.xml -p no:warnings

- run:
name: Verify there are no hidden/missing metafiles.
Expand Down
4 changes: 2 additions & 2 deletions experiment_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ def run_experiment(
evaluate_count = evaluate["TorchPolicy.evaluate"]["count"]
else:
if algo == "ppo":
update_total = update["TFPPOOptimizer.update"]["total"]
update_count = update["TFPPOOptimizer.update"]["count"]
update_total = update["PPOOptimizer.update"]["total"]
update_count = update["PPOOptimizer.update"]["count"]
else:
update_total = update["SACTrainer._update_policy"]["total"]
update_count = update["SACTrainer._update_policy"]["count"]
Expand Down
7 changes: 7 additions & 0 deletions ml-agents/mlagents/trainers/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,13 @@ def _create_parser() -> argparse.ArgumentParser:
action=DetectDefaultStoreTrue,
help="Forces training using CPU only",
)
argparser.add_argument(
"--torch",
default=False,
action=DetectDefaultStoreTrue,
help="(Experimental) Use the PyTorch framework instead of TensorFlow. Install PyTorch "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to hide the help, or do we not care if the users see this?

Copy link
Contributor Author

@ervteng ervteng Aug 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes - argparse.SUPPRESS. I can hide it. Should we? I guess it depends on whether we want people to try it or not.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's not hide it then

"before using this option",
)

eng_conf = argparser.add_argument_group(title="Engine Configuration")
eng_conf.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/ppo/optimizer_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from mlagents.trainers.settings import TrainerSettings, PPOSettings


class TFPPOOptimizer(TFOptimizer):
class PPOOptimizer(TFOptimizer):
def __init__(self, policy: TFPolicy, trainer_params: TrainerSettings):
"""
Takes a Policy and a Dict of trainer parameters and creates an Optimizer around the policy.
Expand Down
17 changes: 11 additions & 6 deletions ml-agents/mlagents/trainers/ppo/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,25 @@
from mlagents_envs.logging_util import get_logger
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.policy import Policy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import TFPPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import (
TrainerSettings,
PPOSettings,
TestingConfiguration,
FrameworkType,
)

try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
TorchPPOOptimizer = None # type: ignore


logger = get_logger(__name__)

Expand Down Expand Up @@ -58,7 +64,6 @@ def __init__(
)
self.load = load
self.seed = seed
self.framework = "torch" if TestingConfiguration.use_torch else "tf"
if TestingConfiguration.max_steps > 0:
self.trainer_settings.max_steps = TestingConfiguration.max_steps
self.policy: Policy = None # type: ignore
Expand Down Expand Up @@ -254,12 +259,12 @@ def add_policy(
)
self.policy = policy
self.policies[parsed_behavior_id.behavior_id] = policy
if self.framework == "torch":
if self.framework == FrameworkType.PYTORCH:
self.optimizer = TorchPPOOptimizer( # type: ignore
self.policy, self.trainer_settings # type: ignore
) # type: ignore
else:
self.optimizer = TFPPOOptimizer( # type: ignore
self.optimizer = PPOOptimizer( # type: ignore
self.policy, self.trainer_settings # type: ignore
) # type: ignore
for _reward_signal in self.optimizer.reward_signals.keys():
Expand Down
12 changes: 8 additions & 4 deletions ml-agents/mlagents/trainers/sac/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,14 @@
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.trajectory import Trajectory, SplitObservations
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer
from mlagents.trainers.settings import TrainerSettings, SACSettings
from mlagents.trainers.settings import TrainerSettings, SACSettings, FrameworkType

try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
TorchSACOptimizer = None # type: ignore

logger = get_logger(__name__)

Expand Down Expand Up @@ -353,7 +357,7 @@ def add_policy(
)
self.policy = policy
self.policies[parsed_behavior_id.behavior_id] = policy
if self.framework == "torch":
if self.framework == FrameworkType.PYTORCH:
self.optimizer = TorchSACOptimizer( # type: ignore
self.policy, self.trainer_settings # type: ignore
) # type: ignore
Expand Down
14 changes: 13 additions & 1 deletion ml-agents/mlagents/trainers/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,11 @@ def to_settings(self) -> type:
return _mapping[self]


class FrameworkType(Enum):
TENSORFLOW: str = "tensorflow"
PYTORCH: str = "pytorch"


@attr.s(auto_attribs=True)
class TrainerSettings(ExportableSettings):
trainer_type: TrainerType = TrainerType.PPO
Expand All @@ -546,6 +551,7 @@ def _set_default_hyperparameters(self):
threaded: bool = True
self_play: Optional[SelfPlaySettings] = None
behavioral_cloning: Optional[BehavioralCloningSettings] = None
framework: FrameworkType = FrameworkType.TENSORFLOW
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are 2 ways to have a pytorch trainer then, with --torch or by adding framework: torch to the config file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, that's true with every CLI option - it goes into the YAML somehow. That way a run can be specified entirely from a YAML file - this is for cloud training and for reproducibility.


cattr.register_structure_hook(
Dict[RewardSignalType, RewardSignalSettings], RewardSignalSettings.structure
Expand Down Expand Up @@ -713,7 +719,13 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions":
configured_dict["engine_settings"][key] = val
else: # Base options
configured_dict[key] = val
return RunOptions.from_dict(configured_dict)

# Apply --torch retroactively
final_runoptions = RunOptions.from_dict(configured_dict)
if "torch" in DetectDefault.non_default_args:
for trainer_set in final_runoptions.behaviors.values():
trainer_set.framework = FrameworkType.PYTORCH
return final_runoptions

@staticmethod
def from_dict(options_dict: Dict[str, Any]) -> "RunOptions":
Expand Down
4 changes: 2 additions & 2 deletions ml-agents/mlagents/trainers/tests/test_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers

from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
from mlagents.trainers.ppo.optimizer_tf import TFPPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests import mock_brain as mb
Expand Down Expand Up @@ -52,7 +52,7 @@ def _create_ppo_optimizer_ops_mock(dummy_config, use_rnn, use_discrete, use_visu
policy = TFPolicy(
0, mock_specs, trainer_settings, "test", False, create_tf_graph=False
)
optimizer = TFPPOOptimizer(policy, trainer_settings)
optimizer = PPOOptimizer(policy, trainer_settings)
return optimizer


Expand Down
4 changes: 2 additions & 2 deletions ml-agents/mlagents/trainers/tests/test_reward_signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.ppo.optimizer_tf import TFPPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG, SAC_CONFIG
from mlagents.trainers.settings import (
GAILSettings,
Expand Down Expand Up @@ -75,7 +75,7 @@ def create_optimizer_mock(
if trainer_settings.trainer_type == TrainerType.SAC:
optimizer = SACOptimizer(policy, trainer_settings)
else:
optimizer = TFPPOOptimizer(policy, trainer_settings)
optimizer = PPOOptimizer(policy, trainer_settings)
return optimizer


Expand Down
5 changes: 4 additions & 1 deletion ml-agents/mlagents/trainers/tests/test_rl_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ def _update_policy(self):
def add_policy(self, mock_behavior_id, mock_policy):
self.policies[mock_behavior_id] = mock_policy

def create_policy(self):
def create_tf_policy(self):
return mock.Mock()

def create_torch_policy(self):
return mock.Mock()

def _process_trajectory(self, trajectory):
Expand Down
3 changes: 3 additions & 0 deletions ml-agents/mlagents/trainers/tests/torch/test_networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def test_networkbody_vector():


def test_networkbody_lstm():
torch.manual_seed(0)
obs_size = 4
seq_len = 16
network_settings = NetworkSettings(
Expand All @@ -64,6 +65,7 @@ def test_networkbody_lstm():


def test_networkbody_visual():
torch.manual_seed(0)
vec_obs_size = 4
obs_size = (84, 84, 3)
network_settings = NetworkSettings()
Expand All @@ -89,6 +91,7 @@ def test_networkbody_visual():


def test_valuenetwork():
torch.manual_seed(0)
obs_size = 4
num_outputs = 2
network_settings = NetworkSettings()
Expand Down
21 changes: 17 additions & 4 deletions ml-agents/mlagents/trainers/trainer/rl_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,18 @@
from mlagents_envs.timers import hierarchical_timer
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.policy.policy import Policy
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.settings import TestingConfiguration
from mlagents.trainers.settings import TestingConfiguration, FrameworkType
from mlagents.trainers.stats import StatsPropertyType
from mlagents.trainers.exception import UnityTrainerException

try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
except ModuleNotFoundError:
TorchPolicy = None # type: ignore

RewardSignalResults = Dict[str, RewardSignalResult]

Expand All @@ -50,7 +55,8 @@ def __init__(self, *args, **kwargs):
self._stats_reporter.add_property(
StatsPropertyType.HYPERPARAMETERS, self.trainer_settings.as_dict()
)
self.framework = "torch" if TestingConfiguration.use_torch else "tf"
self.framework = self.trainer_settings.framework
logger.debug(f"Using framework {self.framework.value}")
if TestingConfiguration.max_steps > 0:
self.trainer_settings.max_steps = TestingConfiguration.max_steps
self._next_save_step = 0
Expand Down Expand Up @@ -99,7 +105,11 @@ def _is_ready_update(self):
def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> Policy:
if self.framework == "torch":
if self.framework == FrameworkType.PYTORCH and TorchPolicy is None:
raise UnityTrainerException(
"To use the experimental PyTorch backend, install the PyTorch Python package first."
)
elif self.framework == FrameworkType.PYTORCH:
return self.create_torch_policy(parsed_behavior_id, behavior_spec)
else:
return self.create_tf_policy(parsed_behavior_id, behavior_spec)
Expand Down Expand Up @@ -165,6 +175,9 @@ def save_model(self) -> None:
logger.warning(
"Trainer has multiple policies, but default behavior only saves the first."
)
elif n_policies == 0:
logger.warning("Trainer has no policies, not saving anything.")
return
policy = list(self.policies.values())[0]
settings = SerializationSettings(policy.model_path, self.brain_name)
model_checkpoint = self._checkpoint()
Expand Down
3 changes: 3 additions & 0 deletions test_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,8 @@ pytest>4.0.0,<6.0.0
pytest-cov==2.6.1
pytest-xdist

# PyTorch tests are here for the time being, before they are used in the codebase.
torch>=1.5.0

# onnx doesn't currently have a wheel for 3.8
tf2onnx>=1.5.5;python_version<'3.8'