diff --git a/experiment_torch.py b/experiment_torch.py deleted file mode 100644 index 6fe65c15d7..0000000000 --- a/experiment_torch.py +++ /dev/null @@ -1,248 +0,0 @@ -import json -import os -import torch -from mlagents.tf_utils import tf -import argparse -from mlagents.trainers.learn import run_cli, parse_command_line -from mlagents.trainers.settings import TestingConfiguration -from mlagents.trainers.stats import StatsReporter -from mlagents_envs.timers import _thread_timer_stacks - - -def run_experiment( - name: str, - steps: int, - use_torch: bool, - algo: str, - num_torch_threads: int, - use_gpu: bool, - num_envs: int = 1, - config_name=None, -): - TestingConfiguration.env_name = name - TestingConfiguration.max_steps = steps - TestingConfiguration.use_torch = use_torch - TestingConfiguration.device = "cuda:0" if use_gpu else "cpu" - if use_gpu: - tf.device("/GPU:0") - else: - tf.device("/device:CPU:0") - if not torch.cuda.is_available() and use_gpu: - return ( - name, - str(steps), - str(use_torch), - algo, - str(num_torch_threads), - str(num_envs), - str(use_gpu), - "na", - "na", - "na", - "na", - "na", - "na", - "na", - ) - if config_name is None: - config_name = name - run_options = parse_command_line( - [f"config/{algo}/{config_name}.yaml", "--num-envs", f"{num_envs}"] - ) - run_options.checkpoint_settings.run_id = ( - f"{name}_test_" + str(steps) + "_" + ("torch" if use_torch else "tf") - ) - run_options.checkpoint_settings.force = True - # run_options.env_settings.num_envs = num_envs - for trainer_settings in run_options.behaviors.values(): - trainer_settings.threaded = False - timers_path = os.path.join( - "results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json" - ) - if use_torch: - torch.set_num_threads(num_torch_threads) - run_cli(run_options) - StatsReporter.writers.clear() - StatsReporter.stats_dict.clear() - _thread_timer_stacks.clear() - with open(timers_path) as timers_json_file: - timers_json = json.load(timers_json_file) - total = timers_json["total"] - tc_advance = timers_json["children"]["TrainerController.start_learning"][ - "children" - ]["TrainerController.advance"] - evaluate = timers_json["children"]["TrainerController.start_learning"][ - "children" - ]["TrainerController.advance"]["children"]["env_step"]["children"][ - "SubprocessEnvManager._take_step" - ][ - "children" - ] - update = timers_json["children"]["TrainerController.start_learning"][ - "children" - ]["TrainerController.advance"]["children"]["trainer_advance"]["children"][ - "_update_policy" - ][ - "children" - ] - tc_advance_total = tc_advance["total"] - tc_advance_count = tc_advance["count"] - if use_torch: - if algo == "ppo": - update_total = update["TorchPPOOptimizer.update"]["total"] - update_count = update["TorchPPOOptimizer.update"]["count"] - else: - update_total = update["SACTrainer._update_policy"]["total"] - update_count = update["SACTrainer._update_policy"]["count"] - evaluate_total = evaluate["TorchPolicy.evaluate"]["total"] - evaluate_count = evaluate["TorchPolicy.evaluate"]["count"] - else: - if algo == "ppo": - update_total = update["PPOOptimizer.update"]["total"] - update_count = update["PPOOptimizer.update"]["count"] - else: - update_total = update["SACTrainer._update_policy"]["total"] - update_count = update["SACTrainer._update_policy"]["count"] - evaluate_total = evaluate["NNPolicy.evaluate"]["total"] - evaluate_count = evaluate["NNPolicy.evaluate"]["count"] - # todo: do total / count - return ( - name, - str(steps), - str(use_torch), - algo, - str(num_torch_threads), - str(num_envs), - str(use_gpu), - str(total), - str(tc_advance_total), - str(tc_advance_count), - str(update_total), - str(update_count), - str(evaluate_total), - str(evaluate_count), - ) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--steps", default=25000, type=int, help="The number of steps") - parser.add_argument("--num-envs", default=1, type=int, help="The number of envs") - parser.add_argument( - "--gpu", default=False, action="store_true", help="If true, will use the GPU" - ) - parser.add_argument( - "--threads", - default=False, - action="store_true", - help="If true, will try both 1 and 8 threads for torch", - ) - parser.add_argument( - "--ball", - default=False, - action="store_true", - help="If true, will only do 3dball", - ) - parser.add_argument( - "--sac", - default=False, - action="store_true", - help="If true, will run sac instead of ppo", - ) - args = parser.parse_args() - - if args.gpu: - os.environ["CUDA_VISIBLE_DEVICES"] = "0" - else: - os.environ["CUDA_VISIBLE_DEVICES"] = "-1" - - algo = "ppo" - if args.sac: - algo = "sac" - - envs_config_tuples = [ - ("3DBall", "3DBall"), - ("GridWorld", "GridWorld"), - ("PushBlock", "PushBlock"), - ("CrawlerStaticTarget", "CrawlerStatic"), - ] - if algo == "ppo": - envs_config_tuples += [ - ("Hallway", "Hallway"), - ("VisualHallway", "VisualHallway"), - ] - if args.ball: - envs_config_tuples = [("3DBall", "3DBall")] - - labels = ( - "name", - "steps", - "use_torch", - "algorithm", - "num_torch_threads", - "num_envs", - "use_gpu", - "total", - "tc_advance_total", - "tc_advance_count", - "update_total", - "update_count", - "evaluate_total", - "evaluate_count", - ) - - results = [] - results.append(labels) - f = open( - f"result_data_steps_{args.steps}_algo_{algo}_envs_{args.num_envs}_gpu_{args.gpu}_thread_{args.threads}.txt", - "w", - ) - f.write(" ".join(labels) + "\n") - - for env_config in envs_config_tuples: - data = run_experiment( - name=env_config[0], - steps=args.steps, - use_torch=True, - algo=algo, - num_torch_threads=1, - use_gpu=args.gpu, - num_envs=args.num_envs, - config_name=env_config[1], - ) - results.append(data) - f.write(" ".join(data) + "\n") - - if args.threads: - data = run_experiment( - name=env_config[0], - steps=args.steps, - use_torch=True, - algo=algo, - num_torch_threads=8, - use_gpu=args.gpu, - num_envs=args.num_envs, - config_name=env_config[1], - ) - results.append(data) - f.write(" ".join(data) + "\n") - - data = run_experiment( - name=env_config[0], - steps=args.steps, - use_torch=False, - algo=algo, - num_torch_threads=1, - use_gpu=args.gpu, - num_envs=args.num_envs, - config_name=env_config[1], - ) - results.append(data) - f.write(" ".join(data) + "\n") - for r in results: - print(*r) - f.close() - - -if __name__ == "__main__": - main() diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 16d1931d99..7105a5093f 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -21,7 +21,7 @@ ) from mlagents.trainers.cli_utils import parser from mlagents_envs.environment import UnityEnvironment -from mlagents.trainers.settings import RunOptions, TestingConfiguration +from mlagents.trainers.settings import RunOptions from mlagents.trainers.training_status import GlobalTrainingStatus from mlagents_envs.base_env import BaseEnv @@ -35,8 +35,6 @@ ) from mlagents_envs import logging_util -from mlagents_envs.registry import default_registry - logger = logging_util.get_logger(__name__) TRAINING_STATUS_FILE_NAME = "training_status.json" @@ -198,27 +196,16 @@ def create_unity_environment( ) -> UnityEnvironment: # Make sure that each environment gets a different seed env_seed = seed + worker_id - if TestingConfiguration.env_name == "": - return UnityEnvironment( - file_name=env_path, - worker_id=worker_id, - seed=env_seed, - no_graphics=no_graphics, - base_port=start_port, - additional_args=env_args, - side_channels=side_channels, - log_folder=log_folder, - ) - else: - return default_registry[TestingConfiguration.env_name].make( - seed=env_seed, - no_graphics=no_graphics, - base_port=start_port, - worker_id=worker_id, - additional_args=env_args, - side_channels=side_channels, - log_folder=log_folder, - ) + return UnityEnvironment( + file_name=env_path, + worker_id=worker_id, + seed=env_seed, + no_graphics=no_graphics, + base_port=start_port, + additional_args=env_args, + side_channels=side_channels, + log_folder=log_folder, + ) return create_unity_environment diff --git a/ml-agents/mlagents/trainers/policy/torch_policy.py b/ml-agents/mlagents/trainers/policy/torch_policy.py index 5fa135f6a2..e88d7c066f 100644 --- a/ml-agents/mlagents/trainers/policy/torch_policy.py +++ b/ml-agents/mlagents/trainers/policy/torch_policy.py @@ -8,7 +8,7 @@ from mlagents_envs.base_env import DecisionSteps, BehaviorSpec from mlagents_envs.timers import timed -from mlagents.trainers.settings import TrainerSettings, TestingConfiguration +from mlagents.trainers.settings import TrainerSettings from mlagents.trainers.trajectory import SplitObservations from mlagents.trainers.torch.networks import ( SharedActorCritic, @@ -57,10 +57,7 @@ def __init__( ) # could be much simpler if TorchPolicy is nn.Module self.grads = None - if TestingConfiguration.device != "cpu": - torch.set_default_tensor_type(torch.cuda.FloatTensor) - else: - torch.set_default_tensor_type(torch.FloatTensor) + torch.set_default_tensor_type(torch.FloatTensor) reward_signal_configs = trainer_settings.reward_signals reward_signal_names = [key.value for key, _ in reward_signal_configs.items()] @@ -83,7 +80,7 @@ def __init__( tanh_squash=tanh_squash, ) - self.actor_critic.to(TestingConfiguration.device) + self.actor_critic.to("cpu") def split_decision_step(self, decision_requests): vec_vis_obs = SplitObservations.from_observations(decision_requests.obs) diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index c16bc3439d..67a596948e 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -15,12 +15,7 @@ from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer from mlagents.trainers.trajectory import Trajectory from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers -from mlagents.trainers.settings import ( - TrainerSettings, - PPOSettings, - TestingConfiguration, - FrameworkType, -) +from mlagents.trainers.settings import TrainerSettings, PPOSettings, FrameworkType from mlagents.trainers.components.reward_signals import RewardSignal try: @@ -64,8 +59,6 @@ def __init__( PPOSettings, self.trainer_settings.hyperparameters ) self.seed = seed - if TestingConfiguration.max_steps > 0: - self.trainer_settings.max_steps = TestingConfiguration.max_steps self.policy: Policy = None # type: ignore def _process_trajectory(self, trajectory: Trajectory) -> None: diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 8a010606da..7054ce9be4 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -46,13 +46,6 @@ def defaultdict_to_dict(d: DefaultDict) -> Dict: return {key: cattr.unstructure(val) for key, val in d.items()} -class TestingConfiguration: - use_torch = True - max_steps = 0 - env_name = "" - device = "cpu" - - class SerializationSettings: convert_to_barracuda = True convert_to_onnx = True diff --git a/ml-agents/mlagents/trainers/trainer/rl_trainer.py b/ml-agents/mlagents/trainers/trainer/rl_trainer.py index 7ae4f08c21..27e30a88a7 100644 --- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py +++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py @@ -21,19 +21,15 @@ from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers from mlagents.trainers.agent_processor import AgentManagerQueue from mlagents.trainers.trajectory import Trajectory -from mlagents.trainers.settings import ( - TestingConfiguration, - TrainerSettings, - FrameworkType, -) +from mlagents.trainers.settings import TrainerSettings, FrameworkType from mlagents.trainers.stats import StatsPropertyType from mlagents.trainers.saver.saver import BaseSaver -from mlagents.trainers.saver.torch_saver import TorchSaver from mlagents.trainers.saver.tf_saver import TFSaver from mlagents.trainers.exception import UnityTrainerException try: from mlagents.trainers.policy.torch_policy import TorchPolicy + from mlagents.trainers.saver.torch_saver import TorchSaver except ModuleNotFoundError: TorchPolicy = None # type: ignore @@ -63,8 +59,6 @@ def __init__(self, *args, **kwargs): self.framework = self.trainer_settings.framework logger.debug(f"Using framework {self.framework.value}") - if TestingConfiguration.max_steps > 0: - self.trainer_settings.max_steps = TestingConfiguration.max_steps self._next_save_step = 0 self._next_summary_step = 0 self.saver = self.create_saver(