Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion docs/Migrating.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,21 @@ double-check that the versions are in the same. The versions can be found in

# Migrating

## Migrating from Release 3 to latest
## Migrating from Release 7 to latest

### Important changes
- Some trainer files were moved. If you were using the `TrainerFactory` class, it was moved to
the `trainers/trainer` folder.
- The `components` folder containing `bc` and `reward_signals` code was moved to the `trainers/tf`
folder

### Steps to Migrate
- Replace calls to `from mlagents.trainers.trainer_util import TrainerFactory` to `from mlagents.trainers.trainer import TrainerFactory`
- Replace calls to `from mlagents.trainers.trainer_util import handle_existing_directories` to `from mlagents.trainers.directory_utils import validate_existing_directories`
- Replace `mlagents.trainers.components` with `mlagents.trainers.tf.components` in your import statements.


## Migrating from Release 3 to Release 7

### Important changes
- The Parameter Randomization feature has been merged with the Curriculum feature. It is now possible to specify a sampler
Expand Down
42 changes: 42 additions & 0 deletions ml-agents/mlagents/trainers/directory_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
from mlagents.trainers.exception import UnityTrainerException


def validate_existing_directories(
output_path: str, resume: bool, force: bool, init_path: str = None
) -> None:
"""
Validates that if the run_id model exists, we do not overwrite it unless --force is specified.
Throws an exception if resume isn't specified and run_id exists. Throws an exception
if --resume is specified and run-id was not found.
:param model_path: The model path specified.
:param summary_path: The summary path to be used.
:param resume: Whether or not the --resume flag was passed.
:param force: Whether or not the --force flag was passed.
"""

output_path_exists = os.path.isdir(output_path)

if output_path_exists:
if not resume and not force:
raise UnityTrainerException(
"Previous data from this run ID was found. "
"Either specify a new run ID, use --resume to resume this run, "
"or use the --force parameter to overwrite existing data."
)
else:
if resume:
raise UnityTrainerException(
"Previous data from this run ID was not found. "
"Train a new run by removing the --resume flag."
)

# Verify init path if specified.
if init_path is not None:
if not os.path.isdir(init_path):
raise UnityTrainerException(
"Could not initialize from {}. "
"Make sure models have already been saved with that run ID.".format(
init_path
)
)
5 changes: 3 additions & 2 deletions ml-agents/mlagents/trainers/learn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
from mlagents import tf_utils
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents.trainers.trainer_util import TrainerFactory, handle_existing_directories
from mlagents.trainers.trainer import TrainerFactory
from mlagents.trainers.directory_utils import validate_existing_directories
from mlagents.trainers.stats import (
TensorboardWriter,
StatsReporter,
Expand Down Expand Up @@ -75,7 +76,7 @@ def run_training(run_seed: int, options: RunOptions) -> None:
run_logs_dir = os.path.join(write_path, "run_logs")
port: Optional[int] = env_settings.base_port
# Check if directory exists
handle_existing_directories(
validate_existing_directories(
write_path,
checkpoint_settings.resume,
checkpoint_settings.force,
Expand Down
4 changes: 2 additions & 2 deletions ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.optimizer import Optimizer
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.components.reward_signals.reward_signal_factory import (
from mlagents.trainers.tf.components.reward_signals.reward_signal_factory import (
create_reward_signal,
)
from mlagents.trainers.settings import TrainerSettings, RewardSignalType
from mlagents.trainers.components.bc.module import BCModule
from mlagents.trainers.tf.components.bc.module import BCModule


class TFOptimizer(Optimizer): # pylint: disable=W0223
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/ppo/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, PPOSettings, FrameworkType
from mlagents.trainers.components.reward_signals import RewardSignal
from mlagents.trainers.tf.components.reward_signals import RewardSignal
from mlagents import torch_utils

if torch_utils.is_available():
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/sac/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from mlagents.trainers.trajectory import Trajectory, SplitObservations
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, SACSettings, FrameworkType
from mlagents.trainers.components.reward_signals import RewardSignal
from mlagents.trainers.tf.components.reward_signals import RewardSignal
from mlagents import torch_utils

if torch_utils.is_available():
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/tests/check_env_trains.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
from typing import Dict
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.trainers.trainer import TrainerFactory
from mlagents.trainers.simple_env_manager import SimpleEnvManager
from mlagents.trainers.stats import StatsReporter, StatsWriter, StatsSummary
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np

from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.components.bc.module import BCModule
from mlagents.trainers.tf.components.bc.module import BCModule
from mlagents.trainers.settings import (
TrainerSettings,
BehavioralCloningSettings,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
RecordEnvironment,
)
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.trainers.trainer import TrainerFactory
from mlagents.trainers.simple_env_manager import SimpleEnvManager
from mlagents.trainers.demo_loader import write_demo
from mlagents.trainers.stats import StatsReporter, StatsWriter, StatsSummary
Expand Down
2 changes: 1 addition & 1 deletion ml-agents/mlagents/trainers/tests/test_learn.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def basic_options(extra_args=None):

@patch("mlagents.trainers.learn.write_timing_tree")
@patch("mlagents.trainers.learn.write_run_options")
@patch("mlagents.trainers.learn.handle_existing_directories")
@patch("mlagents.trainers.learn.validate_existing_directories")
@patch("mlagents.trainers.learn.TrainerFactory")
@patch("mlagents.trainers.learn.SubprocessEnvManager")
@patch("mlagents.trainers.learn.create_environment_factory")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def trainer_controller_with_start_learning_mocks(basic_trainer_controller):
trainer_mock.write_tensorboard_text = MagicMock()

tc = basic_trainer_controller
tc.initialize_trainers = MagicMock()
tc.trainers = {"testbrain": trainer_mock}
tc.advance = MagicMock()
tc.trainers["testbrain"].get_step = 0
Expand Down
21 changes: 11 additions & 10 deletions ml-agents/mlagents/trainers/tests/test_trainer_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
import os
from unittest.mock import patch

from mlagents.trainers import trainer_util
from mlagents.trainers.trainer import TrainerFactory
from mlagents.trainers.cli_utils import load_config, _load_config
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.exception import TrainerConfigError, UnityTrainerException
from mlagents.trainers.settings import RunOptions
from mlagents.trainers.tests.dummy_config import ppo_dummy_config
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents.trainers.directory_utils import validate_existing_directories


@pytest.fixture
Expand Down Expand Up @@ -49,7 +50,7 @@ def mock_constructor(
assert artifact_path == os.path.join(output_path, brain_name)

with patch.object(PPOTrainer, "__init__", mock_constructor):
trainer_factory = trainer_util.TrainerFactory(
trainer_factory = TrainerFactory(
trainer_config=base_config,
output_path=output_path,
train_model=train_model,
Expand All @@ -71,7 +72,7 @@ def test_handles_no_config_provided():
brain_name = "testbrain"
no_default_config = RunOptions().behaviors

trainer_factory = trainer_util.TrainerFactory(
trainer_factory = TrainerFactory(
trainer_config=no_default_config,
output_path="output_path",
train_model=True,
Expand Down Expand Up @@ -112,25 +113,25 @@ def test_load_config_invalid_yaml():
def test_existing_directories(tmp_path):
output_path = os.path.join(tmp_path, "runid")
# Test fresh new unused path - should do nothing.
trainer_util.handle_existing_directories(output_path, False, False)
validate_existing_directories(output_path, False, False)
# Test resume with fresh path - should throw an exception.
with pytest.raises(UnityTrainerException):
trainer_util.handle_existing_directories(output_path, True, False)
validate_existing_directories(output_path, True, False)

# make a directory
os.mkdir(output_path)
# Test try to train w.o. force, should complain
with pytest.raises(UnityTrainerException):
trainer_util.handle_existing_directories(output_path, False, False)
validate_existing_directories(output_path, False, False)
# Test try to train w/ resume - should work
trainer_util.handle_existing_directories(output_path, True, False)
validate_existing_directories(output_path, True, False)
# Test try to train w/ force - should work
trainer_util.handle_existing_directories(output_path, False, True)
validate_existing_directories(output_path, False, True)

# Test initialize option
init_path = os.path.join(tmp_path, "runid2")
with pytest.raises(UnityTrainerException):
trainer_util.handle_existing_directories(output_path, False, True, init_path)
validate_existing_directories(output_path, False, True, init_path)
os.mkdir(init_path)
# Should pass since the directory exists now.
trainer_util.handle_existing_directories(output_path, False, True, init_path)
validate_existing_directories(output_path, False, True, init_path)
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@
import numpy as np
from mlagents.tf_utils import tf

from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
from mlagents.trainers.components.reward_signals.curiosity.model import CuriosityModel
from mlagents.trainers.tf.components.reward_signals import (
RewardSignal,
RewardSignalResult,
)
from mlagents.trainers.tf.components.reward_signals.curiosity.model import (
CuriosityModel,
)
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.settings import CuriositySettings
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import numpy as np

from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
from mlagents.trainers.tf.components.reward_signals import (
RewardSignal,
RewardSignalResult,
)
from mlagents.trainers.buffer import AgentBuffer


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
import numpy as np
from mlagents.tf_utils import tf

from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
from mlagents.trainers.tf.components.reward_signals import (
RewardSignal,
RewardSignalResult,
)
from mlagents.trainers.policy.tf_policy import TFPolicy
from .model import GAILModel
from mlagents.trainers.tf.components.reward_signals.gail.model import GAILModel
from mlagents.trainers.demo_loader import demo_to_buffer
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.settings import GAILSettings
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from typing import Dict, Type
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.components.reward_signals import RewardSignal
from mlagents.trainers.components.reward_signals.extrinsic.signal import (
from mlagents.trainers.tf.components.reward_signals import RewardSignal
from mlagents.trainers.tf.components.reward_signals.extrinsic.signal import (
ExtrinsicRewardSignal,
)
from mlagents.trainers.components.reward_signals.gail.signal import GAILRewardSignal
from mlagents.trainers.components.reward_signals.curiosity.signal import (
from mlagents.trainers.tf.components.reward_signals.gail.signal import GAILRewardSignal
from mlagents.trainers.tf.components.reward_signals.curiosity.signal import (
CuriosityRewardSignal,
)
from mlagents.trainers.policy.tf_policy import TFPolicy
Expand Down
1 change: 1 addition & 0 deletions ml-agents/mlagents/trainers/trainer/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from mlagents.trainers.trainer.trainer import Trainer # noqa
from mlagents.trainers.trainer.trainer_factory import TrainerFactory # noqa
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather remove this and keep the import explicit:

from mlagents.trainers.trainer.trainer_factory import TrainerFactory

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, why not just keep trainer_factory.py in the trainers directory?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am trying to compartmentalize. The TrainerFactory creates a Trainer, so I think it makes sense to have it in the trainers.trainer folder.

The only reason I put the import inside the init is because mlagents.trainers.trainer.trainer_factory is 3 times trainer :)

5 changes: 4 additions & 1 deletion ml-agents/mlagents/trainers/trainer/rl_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
from mlagents.trainers.optimizer import Optimizer
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trainer import Trainer
from mlagents.trainers.components.reward_signals import RewardSignalResult, RewardSignal
from mlagents.trainers.tf.components.reward_signals import (
RewardSignalResult,
RewardSignal,
)
from mlagents_envs.timers import hierarchical_timer
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.policy.policy import Policy
Expand Down
Loading