From 41db7c873039f154603ca5f77198e81aa6cfd5f3 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Wed, 3 Jun 2020 15:44:59 -0700 Subject: [PATCH 01/19] better errors for missing constants --- .../Runtime/Inference/BarracudaModelParamLoader.cs | 10 ++++++++++ com.unity.ml-agents/Runtime/Inference/TensorNames.cs | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs b/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs index 41efa93653..23cf0c908b 100644 --- a/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs +++ b/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs @@ -149,6 +149,16 @@ public static IEnumerable CheckModel(Model model, BrainParameters brainP return failedModelChecks; } + foreach (var constantName in TensorNames.RequiredConstants) + { + var tensor = model.GetTensorByName(constantName); + if (tensor == null) + { + failedModelChecks.Add($"Required constant \"{constantName}\" was not found in the model file."); + return failedModelChecks; + } + } + var modelApiVersion = (int)model.GetTensorByName(TensorNames.VersionNumber)[0]; var memorySize = (int)model.GetTensorByName(TensorNames.MemorySize)[0]; var isContinuousInt = (int)model.GetTensorByName(TensorNames.IsContinuousControl)[0]; diff --git a/com.unity.ml-agents/Runtime/Inference/TensorNames.cs b/com.unity.ml-agents/Runtime/Inference/TensorNames.cs index e4c8ea56fd..b0101fc0a5 100644 --- a/com.unity.ml-agents/Runtime/Inference/TensorNames.cs +++ b/com.unity.ml-agents/Runtime/Inference/TensorNames.cs @@ -25,5 +25,10 @@ internal static class TensorNames public const string IsContinuousControl = "is_continuous_control"; public const string ActionOutputShape = "action_output_shape"; public const string ActionOutput = "action"; + + public static readonly string[] RequiredConstants = + { + VersionNumber, MemorySize, IsContinuousControl, ActionOutputShape + }; } } From b1c0f241246f22499a8ac817e73ba34391e1011c Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Wed, 3 Jun 2020 16:39:12 -0700 Subject: [PATCH 02/19] run inference in yamato after training --- ml-agents/tests/yamato/training_int_tests.py | 49 ++++++++++++++++---- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index 1f17480399..f081ff29ab 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -3,6 +3,7 @@ import sys import subprocess import time +from typing import Any from .yamato_utils import ( get_base_path, @@ -16,7 +17,7 @@ ) -def run_training(python_version, csharp_version): +def run_training(python_version: str, csharp_version: str) -> bool: latest = "latest" run_id = int(time.time() * 1000.0) print( @@ -27,7 +28,7 @@ def run_training(python_version, csharp_version): if os.path.exists(nn_file_expected): # Should never happen - make sure nothing leftover from an old test. print("Artifacts from previous build found!") - sys.exit(1) + return False base_path = get_base_path() print(f"Running in base path {base_path}") @@ -50,8 +51,8 @@ def run_training(python_version, csharp_version): build_returncode = run_standalone_build(base_path) if build_returncode != 0: - print("Standalone build FAILED!") - sys.exit(build_returncode) + print(f"Standalone build FAILED! with return code {build_returncode}") + return False # Now rename the newly-built executable, and restore the old one os.rename(full_player_path, final_player_path) @@ -66,7 +67,7 @@ def run_training(python_version, csharp_version): # and reduce the batch_size and buffer_size enough to ensure an update step happens. yaml_out = "override.yaml" if python_version: - overrides = {"max_steps": 100, "batch_size": 10, "buffer_size": 10} + overrides: Any = {"max_steps": 100, "batch_size": 10, "buffer_size": 10} override_legacy_config_file( python_version, "config/trainer_config.yaml", yaml_out, **overrides ) @@ -77,9 +78,9 @@ def run_training(python_version, csharp_version): } override_config_file("config/ppo/3DBall.yaml", yaml_out, overrides) + env_path = os.path.join(get_base_output_path(), standalone_player_path) mla_learn_cmd = ( - f"mlagents-learn {yaml_out} --force --env=" - f"{os.path.join(get_base_output_path(), standalone_player_path)} " + f"mlagents-learn {yaml_out} --force --env={env_path} " f"--run-id={run_id} --no-graphics --env-args -logFile -" ) # noqa res = subprocess.run( @@ -88,10 +89,35 @@ def run_training(python_version, csharp_version): if res.returncode != 0 or not os.path.exists(nn_file_expected): print("mlagents-learn run FAILED!") - sys.exit(1) + return False + + if csharp_version is None and python_version is None: + inference_ok = run_inference(env_path, os.path.dirname(nn_file_expected)) + if not inference_ok: + return False print("mlagents-learn run SUCCEEDED!") - sys.exit(0) + return True + + +def run_inference(env_path: str, output_path: str) -> bool: + args = [ + env_path, + "-nographics", + "-logfile", + "-", + "--mlagents-override-model-directory", + output_path, + "--mlagents-quit-on-load-failure", + "--mlagents-quit-after-episodes", + "2", + ] + res = subprocess.run(args) + if res.returncode != 0: + print(res.stdout) + print("Error running inference!") + + return True def main(): @@ -101,7 +127,10 @@ def main(): args = parser.parse_args() try: - run_training(args.python, args.csharp) + ok = run_training(args.python, args.csharp) + if not ok: + sys.exit(1) + finally: # Cleanup - this gets executed even if we hit sys.exit() undo_git_checkout() From 75e45dd98cfa35f0e987b78f6247ea09ccce3a12 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Thu, 4 Jun 2020 09:29:03 -0700 Subject: [PATCH 03/19] add extension --- ml-agents/tests/yamato/training_int_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index f081ff29ab..d4c631d25a 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -78,7 +78,7 @@ def run_training(python_version: str, csharp_version: str) -> bool: } override_config_file("config/ppo/3DBall.yaml", yaml_out, overrides) - env_path = os.path.join(get_base_output_path(), standalone_player_path) + env_path = os.path.join(get_base_output_path(), standalone_player_path + ".app") mla_learn_cmd = ( f"mlagents-learn {yaml_out} --force --env={env_path} " f"--run-id={run_id} --no-graphics --env-args -logFile -" From 9b434da0c4f19659ec5a9aa071a8897a4d495f7c Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Thu, 4 Jun 2020 11:01:26 -0700 Subject: [PATCH 04/19] debug subprocess args --- ml-agents-envs/mlagents_envs/env_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ml-agents-envs/mlagents_envs/env_utils.py b/ml-agents-envs/mlagents_envs/env_utils.py index 7af5fbae55..776015847c 100644 --- a/ml-agents-envs/mlagents_envs/env_utils.py +++ b/ml-agents-envs/mlagents_envs/env_utils.py @@ -89,6 +89,7 @@ def launch_executable(file_name: str, args: List[str]) -> subprocess.Popen: get_logger(__name__).debug("This is the launch string {}".format(launch_string)) # Launch Unity environment subprocess_args = [launch_string] + args + print(subprocess_args) try: return subprocess.Popen( subprocess_args, From b729418f973b4cc2b31a27800bdf141063089a27 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Thu, 4 Jun 2020 11:40:36 -0700 Subject: [PATCH 05/19] fix exe path --- ml-agents-envs/mlagents_envs/env_utils.py | 2 +- ml-agents/tests/yamato/training_int_tests.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ml-agents-envs/mlagents_envs/env_utils.py b/ml-agents-envs/mlagents_envs/env_utils.py index 776015847c..915a80b9b6 100644 --- a/ml-agents-envs/mlagents_envs/env_utils.py +++ b/ml-agents-envs/mlagents_envs/env_utils.py @@ -89,7 +89,7 @@ def launch_executable(file_name: str, args: List[str]) -> subprocess.Popen: get_logger(__name__).debug("This is the launch string {}".format(launch_string)) # Launch Unity environment subprocess_args = [launch_string] + args - print(subprocess_args) + # print(subprocess_args) try: return subprocess.Popen( subprocess_args, diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index d4c631d25a..8d9e826eb2 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -78,7 +78,8 @@ def run_training(python_version: str, csharp_version: str) -> bool: } override_config_file("config/ppo/3DBall.yaml", yaml_out, overrides) - env_path = os.path.join(get_base_output_path(), standalone_player_path + ".app") + env_path = os.path.join(get_base_output_path(), standalone_player_path) + exe_path = env_path + ".app/Contents/MacOS/UnityEnvironment" mla_learn_cmd = ( f"mlagents-learn {yaml_out} --force --env={env_path} " f"--run-id={run_id} --no-graphics --env-args -logFile -" @@ -92,7 +93,7 @@ def run_training(python_version: str, csharp_version: str) -> bool: return False if csharp_version is None and python_version is None: - inference_ok = run_inference(env_path, os.path.dirname(nn_file_expected)) + inference_ok = run_inference(exe_path, os.path.dirname(nn_file_expected)) if not inference_ok: return False @@ -100,9 +101,9 @@ def run_training(python_version: str, csharp_version: str) -> bool: return True -def run_inference(env_path: str, output_path: str) -> bool: +def run_inference(exe_path: str, output_path: str) -> bool: args = [ - env_path, + exe_path, "-nographics", "-logfile", "-", From 44ec1165479292e0e7ebc6c8eef0c06c39529488 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Thu, 4 Jun 2020 16:20:08 -0700 Subject: [PATCH 06/19] search for executable --- ml-agents/tests/yamato/training_int_tests.py | 14 ++++++++++---- ml-agents/tests/yamato/yamato_utils.py | 17 +++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index 8d9e826eb2..e31e66a394 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -6,6 +6,7 @@ from typing import Any from .yamato_utils import ( + find_executables, get_base_path, get_base_output_path, run_standalone_build, @@ -78,8 +79,7 @@ def run_training(python_version: str, csharp_version: str) -> bool: } override_config_file("config/ppo/3DBall.yaml", yaml_out, overrides) - env_path = os.path.join(get_base_output_path(), standalone_player_path) - exe_path = env_path + ".app/Contents/MacOS/UnityEnvironment" + env_path = os.path.join(get_base_output_path(), standalone_player_path + ".app") mla_learn_cmd = ( f"mlagents-learn {yaml_out} --force --env={env_path} " f"--run-id={run_id} --no-graphics --env-args -logFile -" @@ -93,7 +93,7 @@ def run_training(python_version: str, csharp_version: str) -> bool: return False if csharp_version is None and python_version is None: - inference_ok = run_inference(exe_path, os.path.dirname(nn_file_expected)) + inference_ok = run_inference(env_path, os.path.dirname(nn_file_expected)) if not inference_ok: return False @@ -101,7 +101,13 @@ def run_training(python_version: str, csharp_version: str) -> bool: return True -def run_inference(exe_path: str, output_path: str) -> bool: +def run_inference(env_path: str, output_path: str) -> bool: + exes = find_executables(env_path) + if len(exes) != 0: + print(f"Can't determine the player executable in {env_path}. Found {exes}.") + return False + + exe_path = exes[0] args = [ exe_path, "-nographics", diff --git a/ml-agents/tests/yamato/yamato_utils.py b/ml-agents/tests/yamato/yamato_utils.py index e6c72aa427..f38434a11e 100644 --- a/ml-agents/tests/yamato/yamato_utils.py +++ b/ml-agents/tests/yamato/yamato_utils.py @@ -84,6 +84,23 @@ def run_standalone_build( return res.returncode +def find_executables(root_dir: str) -> List[str]: + """ + Try to find the player executable. This seems to vary between Unity versions. + """ + ignored_extension = frozenset([".dll", ".dylib", ".bundle"]) + exes = [] + for root, _, files in os.walk(root_dir): + for filename in files: + file_root, ext = os.path.splitext(filename) + if ext in ignored_extension: + continue + file_path = os.path.join(root, filename) + if os.access(file_path, os.X_OK): + exes.append(file_path) + return exes + + def init_venv( mlagents_python_version: str = None, extra_packages: Optional[List[str]] = None ) -> str: From aa40f456d04060241c0640fd1f27bf596e4f9f24 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Thu, 4 Jun 2020 16:37:28 -0700 Subject: [PATCH 07/19] fix dumb bug --- ml-agents/tests/yamato/training_int_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index e31e66a394..b287f0bd28 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -103,7 +103,7 @@ def run_training(python_version: str, csharp_version: str) -> bool: def run_inference(env_path: str, output_path: str) -> bool: exes = find_executables(env_path) - if len(exes) != 0: + if len(exes) != 1: print(f"Can't determine the player executable in {env_path}. Found {exes}.") return False From 18cc5e447a176c8329a3b4606c52e6141d9eb597 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Thu, 4 Jun 2020 16:57:04 -0700 Subject: [PATCH 08/19] -batchmode --- ml-agents/tests/yamato/training_int_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index b287f0bd28..de5649fd4c 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -111,6 +111,7 @@ def run_inference(env_path: str, output_path: str) -> bool: args = [ exe_path, "-nographics", + "-batchmode", "-logfile", "-", "--mlagents-override-model-directory", From d732061603b066e1039c7a56192dab84a63857d9 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Fri, 5 Jun 2020 10:29:27 -0700 Subject: [PATCH 09/19] fail if inference fails --- ml-agents/tests/yamato/training_int_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index de5649fd4c..8c3b9cd65a 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -124,6 +124,7 @@ def run_inference(env_path: str, output_path: str) -> bool: if res.returncode != 0: print(res.stdout) print("Error running inference!") + return False return True From 9e4e415bbd367480982436e1d63c2ac71e2a8dce Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Mon, 8 Jun 2020 14:14:24 -0700 Subject: [PATCH 10/19] install tf2onnx on yamato --- ml-agents/tests/yamato/training_int_tests.py | 5 ++++- ml-agents/tests/yamato/yamato_utils.py | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index 8c3b9cd65a..213ae92ca6 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -93,7 +93,10 @@ def run_training(python_version: str, csharp_version: str) -> bool: return False if csharp_version is None and python_version is None: - inference_ok = run_inference(env_path, os.path.dirname(nn_file_expected)) + # Use abs path so that loading doesn't get confused + inference_ok = run_inference( + env_path, os.path.abspath(os.path.dirname(nn_file_expected)) + ) if not inference_ok: return False diff --git a/ml-agents/tests/yamato/yamato_utils.py b/ml-agents/tests/yamato/yamato_utils.py index f38434a11e..61d5103501 100644 --- a/ml-agents/tests/yamato/yamato_utils.py +++ b/ml-agents/tests/yamato/yamato_utils.py @@ -122,6 +122,7 @@ def init_venv( "--upgrade setuptools", # TODO build these and publish to internal pypi "~/tensorflow_pkg/tensorflow-2.0.0-cp37-cp37m-macosx_10_14_x86_64.whl", + "tf2onnx>=1.6.1", ] if mlagents_python_version: # install from pypi From 947f93a2adc0e5360250b4300ceba4659e479497 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Mon, 8 Jun 2020 14:54:28 -0700 Subject: [PATCH 11/19] allow onnx for overrides (expect to fail now) --- .../SharedAssets/Scripts/ModelOverrider.cs | 11 +++++++-- ml-agents/tests/yamato/training_int_tests.py | 23 ++++++++++++------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs index 7455bf3165..96abccbf62 100644 --- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs +++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs @@ -15,7 +15,7 @@ namespace Unity.MLAgentsExamples /// Utility class to allow the NNModel file for an agent to be overriden during inference. /// This is used internally to validate the file after training is done. /// The behavior name to override and file path are specified on the commandline, e.g. - /// player.exe --mlagents-override-model behavior1 /path/to/model1.nn --mlagents-override-model behavior2 /path/to/model2.nn + /// player.exe --mlagents-override-model-directory /path/to/models /// /// Additionally, a number of episodes to run can be specified; after this, the application will quit. /// Note this will only work with example scenes that have 1:1 Agent:Behaviors. More complicated scenes like WallJump @@ -25,6 +25,7 @@ public class ModelOverrider : MonoBehaviour { const string k_CommandLineModelOverrideFlag = "--mlagents-override-model"; const string k_CommandLineModelOverrideDirectoryFlag = "--mlagents-override-model-directory"; + const string k_CommandLineModelOverrideExtensionFlag = "--mlagents-override-model-extension"; const string k_CommandLineQuitAfterEpisodesFlag = "--mlagents-quit-after-episodes"; const string k_CommandLineQuitOnLoadFailure = "--mlagents-quit-on-load-failure"; @@ -36,6 +37,8 @@ public class ModelOverrider : MonoBehaviour string m_BehaviorNameOverrideDirectory; + string m_OverrideExtension = ".nn"; + // Cached loaded NNModels, with the behavior name as the key. Dictionary m_CachedModels = new Dictionary(); @@ -105,6 +108,10 @@ void GetAssetPathFromCommandLine() { m_BehaviorNameOverrideDirectory = args[i + 1].Trim(); } + else if (args[i] == k_CommandLineModelOverrideExtensionFlag && i < args.Length-1) + { + m_OverrideExtension = args[i + 1].Trim(); + } else if (args[i] == k_CommandLineQuitAfterEpisodesFlag && i < args.Length-1) { Int32.TryParse(args[i + 1], out maxEpisodes); @@ -181,7 +188,7 @@ public NNModel GetModelForBehaviorName(string behaviorName) } else if(!string.IsNullOrEmpty(m_BehaviorNameOverrideDirectory)) { - assetPath = Path.Combine(m_BehaviorNameOverrideDirectory, $"{behaviorName}.nn"); + assetPath = Path.Combine(m_BehaviorNameOverrideDirectory, $"{behaviorName}{m_OverrideExtension}"); } if (string.IsNullOrEmpty(assetPath)) diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index 213ae92ca6..636e632cd0 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -94,17 +94,18 @@ def run_training(python_version: str, csharp_version: str) -> bool: if csharp_version is None and python_version is None: # Use abs path so that loading doesn't get confused - inference_ok = run_inference( - env_path, os.path.abspath(os.path.dirname(nn_file_expected)) - ) - if not inference_ok: - return False + model_path = os.path.abspath(os.path.dirname(nn_file_expected)) + for extension in [".nn", ".onnx"]: + inference_ok = run_inference(env_path, model_path, extension) + if not inference_ok: + return False print("mlagents-learn run SUCCEEDED!") return True -def run_inference(env_path: str, output_path: str) -> bool: +def run_inference(env_path: str, output_path: str, model_extension: str) -> bool: + start_time = time.time() exes = find_executables(env_path) if len(exes) != 1: print(f"Can't determine the player executable in {env_path}. Found {exes}.") @@ -115,19 +116,25 @@ def run_inference(env_path: str, output_path: str) -> bool: exe_path, "-nographics", "-batchmode", - "-logfile", - "-", + # "-logfile", + # "-", "--mlagents-override-model-directory", output_path, "--mlagents-quit-on-load-failure", "--mlagents-quit-after-episodes", "2", + "--mlagents-override-model-directory", + model_extension, ] res = subprocess.run(args) + end_time = time.time() if res.returncode != 0: + print(" ".join(args)) print(res.stdout) print("Error running inference!") return False + else: + print(f"Inference succeeded! Took {end_time - start_time} seconds") return True From 094373e95b6d89b90e62bc565bf56ee1a98f9a73 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Mon, 8 Jun 2020 15:05:09 -0700 Subject: [PATCH 12/19] enable logs --- ml-agents/tests/yamato/training_int_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index 636e632cd0..3e096c6bd7 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -116,8 +116,8 @@ def run_inference(env_path: str, output_path: str, model_extension: str) -> bool exe_path, "-nographics", "-batchmode", - # "-logfile", - # "-", + "-logfile", + "-", "--mlagents-override-model-directory", output_path, "--mlagents-quit-on-load-failure", From 563b084d76f2a61e7cce44fb0dc33da574368831 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Mon, 8 Jun 2020 15:12:10 -0700 Subject: [PATCH 13/19] fix commandline arg --- ml-agents/tests/yamato/training_int_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index 3e096c6bd7..7631ef6b2a 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -123,7 +123,7 @@ def run_inference(env_path: str, output_path: str, model_extension: str) -> bool "--mlagents-quit-on-load-failure", "--mlagents-quit-after-episodes", "2", - "--mlagents-override-model-directory", + "--mlagents-override-model-extension", model_extension, ] res = subprocess.run(args) From 4c64e2382e235db7360e844e2f46fac4d6cb3cba Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Mon, 8 Jun 2020 15:35:40 -0700 Subject: [PATCH 14/19] catch exception from SetModel and exit --- .../SharedAssets/Scripts/ModelOverrider.cs | 36 ++++++++++++++----- ml-agents/tests/yamato/training_int_tests.py | 2 +- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs index 96abccbf62..395a316685 100644 --- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs +++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs @@ -224,27 +224,47 @@ public NNModel GetModelForBehaviorName(string behaviorName) /// void OverrideModel() { + bool overrideOk = false; + string overrideError = null; + m_Agent.LazyInitialize(); var bp = m_Agent.GetComponent(); var behaviorName = bp.BehaviorName; var nnModel = GetModelForBehaviorName(behaviorName); - if (nnModel == null && m_QuitOnLoadFailure) + if (nnModel == null) { - Debug.Log( + overrideError = $"Didn't find a model for behaviorName {behaviorName}. Make " + $"sure the behaviorName is set correctly in the commandline " + - $"and that the model file exists" - ); + $"and that the model file exists"; + } + else + { + var modelName = nnModel != null ? nnModel.name : ""; + Debug.Log($"Overriding behavior {behaviorName} for agent with model {modelName}"); + try + { + m_Agent.SetModel(GetOverrideBehaviorName(behaviorName), nnModel); + overrideOk = true; + } + catch (Exception e) + { + overrideError = e.ToString(); + } + } + + if (!overrideOk && m_QuitOnLoadFailure) + { + if(!string.IsNullOrEmpty(overrideError)) + { + Debug.LogWarning(overrideError); + } Application.Quit(1); #if UNITY_EDITOR EditorApplication.isPlaying = false; #endif } - var modelName = nnModel != null ? nnModel.name : ""; - Debug.Log($"Overriding behavior {behaviorName} for agent with model {modelName}"); - // This might give a null model; that's better because we'll fall back to the Heuristic - m_Agent.SetModel(GetOverrideBehaviorName(behaviorName), nnModel); } } diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index 7631ef6b2a..5e26205111 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -122,7 +122,7 @@ def run_inference(env_path: str, output_path: str, model_extension: str) -> bool output_path, "--mlagents-quit-on-load-failure", "--mlagents-quit-after-episodes", - "2", + "1", "--mlagents-override-model-extension", model_extension, ] From a3041630924935121f2e146b8ae373304564d4e8 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Mon, 8 Jun 2020 15:50:11 -0700 Subject: [PATCH 15/19] cleanup error message --- .../ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs index 395a316685..4bac55738e 100644 --- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs +++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs @@ -250,7 +250,7 @@ void OverrideModel() } catch (Exception e) { - overrideError = e.ToString(); + overrideError = $"Exception calling Agent.SetModel: {e}"; } } From 11b3817e2158e1cb54103fcf45e3d4e26571cdb2 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Mon, 8 Jun 2020 16:29:04 -0700 Subject: [PATCH 16/19] model artifacts, logs as artifacts, fix pip --- .yamato/training-int-tests.yml | 3 +++ ml-agents/tests/yamato/training_int_tests.py | 24 +++++++++++++++++--- ml-agents/tests/yamato/yamato_utils.py | 2 +- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/.yamato/training-int-tests.yml b/.yamato/training-int-tests.yml index c6301990fc..4cc9da938c 100644 --- a/.yamato/training-int-tests.yml +++ b/.yamato/training-int-tests.yml @@ -37,7 +37,10 @@ test_mac_training_int_{{ editor.version }}: logs: paths: - "artifacts/standalone_build.txt" + - "artifacts/inference.nn.txt" + - "artifacts/inference.onnx.txt" standalonebuild: paths: - "artifacts/testplayer*/**" + - "artifacts/models/**" {% endfor %} diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index 5e26205111..2bc071d118 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -1,5 +1,6 @@ import argparse import os +import shutil import sys import subprocess import time @@ -26,6 +27,9 @@ def run_training(python_version: str, csharp_version: str) -> bool: ) output_dir = "models" if python_version else "results" nn_file_expected = f"./{output_dir}/{run_id}/3DBall.nn" + onnx_file_expected = f"./{output_dir}/{run_id}/3DBall.onnx" + frozen_graph_file_expected = f"./{output_dir}/{run_id}/3DBall/frozen_graph_def.pb" + if os.path.exists(nn_file_expected): # Should never happen - make sure nothing leftover from an old test. print("Artifacts from previous build found!") @@ -88,14 +92,26 @@ def run_training(python_version: str, csharp_version: str) -> bool: f"source {venv_path}/bin/activate; {mla_learn_cmd}", shell=True ) - if res.returncode != 0 or not os.path.exists(nn_file_expected): + # Save models as artifacts (only if we're using latest python and C#) + if csharp_version is None and python_version is None: + model_artifacts_dir = os.path.join(get_base_output_path(), "models") + os.makedirs(model_artifacts_dir, exist_ok=True) + shutil.copy(nn_file_expected, model_artifacts_dir) + shutil.copy(onnx_file_expected, model_artifacts_dir) + shutil.copy(frozen_graph_file_expected, model_artifacts_dir) + + if ( + res.returncode != 0 + or not os.path.exists(nn_file_expected) + or not os.path.exists(onnx_file_expected) + ): print("mlagents-learn run FAILED!") return False if csharp_version is None and python_version is None: # Use abs path so that loading doesn't get confused model_path = os.path.abspath(os.path.dirname(nn_file_expected)) - for extension in [".nn", ".onnx"]: + for extension in [".onnx", ".nn"]: inference_ok = run_inference(env_path, model_path, extension) if not inference_ok: return False @@ -111,13 +127,15 @@ def run_inference(env_path: str, output_path: str, model_extension: str) -> bool print(f"Can't determine the player executable in {env_path}. Found {exes}.") return False + log_output_path = f"{get_base_output_path()}/inference{model_extension}.txt" + exe_path = exes[0] args = [ exe_path, "-nographics", "-batchmode", "-logfile", - "-", + log_output_path, "--mlagents-override-model-directory", output_path, "--mlagents-quit-on-load-failure", diff --git a/ml-agents/tests/yamato/yamato_utils.py b/ml-agents/tests/yamato/yamato_utils.py index 61d5103501..611dac7f17 100644 --- a/ml-agents/tests/yamato/yamato_utils.py +++ b/ml-agents/tests/yamato/yamato_utils.py @@ -122,7 +122,7 @@ def init_venv( "--upgrade setuptools", # TODO build these and publish to internal pypi "~/tensorflow_pkg/tensorflow-2.0.0-cp37-cp37m-macosx_10_14_x86_64.whl", - "tf2onnx>=1.6.1", + "tf2onnx==1.6.1", ] if mlagents_python_version: # install from pypi From d171bcae152a817aad747ca7a995ddc0dfb4845d Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Mon, 8 Jun 2020 18:07:23 -0700 Subject: [PATCH 17/19] don't run onnx --- .../Examples/SharedAssets/Scripts/ModelOverrider.cs | 9 +++++++++ ml-agents/tests/yamato/training_int_tests.py | 8 +++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs index 4bac55738e..d04a46221c 100644 --- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs +++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs @@ -111,6 +111,15 @@ void GetAssetPathFromCommandLine() else if (args[i] == k_CommandLineModelOverrideExtensionFlag && i < args.Length-1) { m_OverrideExtension = args[i + 1].Trim(); + if (m_OverrideExtension.Equals(".onnx", StringComparison.OrdinalIgnoreCase)) + { + // Not supported yet - need to update the model loading code to support + Debug.LogError("ONNX loading not supported yet."); + Application.Quit(1); +#if UNITY_EDITOR + EditorApplication.isPlaying = false; +#endif + } } else if (args[i] == k_CommandLineQuitAfterEpisodesFlag && i < args.Length-1) { diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index 2bc071d118..f55787e4f2 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -111,7 +111,9 @@ def run_training(python_version: str, csharp_version: str) -> bool: if csharp_version is None and python_version is None: # Use abs path so that loading doesn't get confused model_path = os.path.abspath(os.path.dirname(nn_file_expected)) - for extension in [".onnx", ".nn"]: + # Onnx loading for overrides not currently supported, but this is + # where to add it in when it is. + for extension in [".nn"]: inference_ok = run_inference(env_path, model_path, extension) if not inference_ok: return False @@ -147,9 +149,9 @@ def run_inference(env_path: str, output_path: str, model_extension: str) -> bool res = subprocess.run(args) end_time = time.time() if res.returncode != 0: - print(" ".join(args)) - print(res.stdout) print("Error running inference!") + print("Command line: " + " ".join(args)) + subprocess.run(["cat", log_output_path]) return False else: print(f"Inference succeeded! Took {end_time - start_time} seconds") From f0f75128aa61e91ecaa847df0f2f26094c1b43bf Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Mon, 8 Jun 2020 18:26:54 -0700 Subject: [PATCH 18/19] cleanup and comment --- .../ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs | 2 ++ ml-agents-envs/mlagents_envs/env_utils.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs index d04a46221c..20110c8a4f 100644 --- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs +++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs @@ -219,6 +219,8 @@ public NNModel GetModelForBehaviorName(string behaviorName) return null; } + // Note - this approach doesn't work for onnx files. Need to replace with + // the equivalent of ONNXModelImporter.OnImportAsset() var asset = ScriptableObject.CreateInstance(); asset.modelData = ScriptableObject.CreateInstance(); asset.modelData.Value = model; diff --git a/ml-agents-envs/mlagents_envs/env_utils.py b/ml-agents-envs/mlagents_envs/env_utils.py index 915a80b9b6..7af5fbae55 100644 --- a/ml-agents-envs/mlagents_envs/env_utils.py +++ b/ml-agents-envs/mlagents_envs/env_utils.py @@ -89,7 +89,6 @@ def launch_executable(file_name: str, args: List[str]) -> subprocess.Popen: get_logger(__name__).debug("This is the launch string {}".format(launch_string)) # Launch Unity environment subprocess_args = [launch_string] + args - # print(subprocess_args) try: return subprocess.Popen( subprocess_args, From d5f1e2aad75daa07000b7ef828d3e4ccb837c016 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Thu, 25 Jun 2020 13:47:34 -0700 Subject: [PATCH 19/19] update extension handling --- .../SharedAssets/Scripts/ModelOverrider.cs | 15 +++++++++------ ml-agents/tests/yamato/training_int_tests.py | 4 ++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs index 20110c8a4f..b8727ba956 100644 --- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs +++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs @@ -23,6 +23,7 @@ namespace Unity.MLAgentsExamples /// public class ModelOverrider : MonoBehaviour { + HashSet k_SupportedExtensions = new HashSet{"nn", "onnx"}; const string k_CommandLineModelOverrideFlag = "--mlagents-override-model"; const string k_CommandLineModelOverrideDirectoryFlag = "--mlagents-override-model-directory"; const string k_CommandLineModelOverrideExtensionFlag = "--mlagents-override-model-extension"; @@ -37,7 +38,7 @@ public class ModelOverrider : MonoBehaviour string m_BehaviorNameOverrideDirectory; - string m_OverrideExtension = ".nn"; + string m_OverrideExtension = "nn"; // Cached loaded NNModels, with the behavior name as the key. Dictionary m_CachedModels = new Dictionary(); @@ -110,11 +111,13 @@ void GetAssetPathFromCommandLine() } else if (args[i] == k_CommandLineModelOverrideExtensionFlag && i < args.Length-1) { - m_OverrideExtension = args[i + 1].Trim(); - if (m_OverrideExtension.Equals(".onnx", StringComparison.OrdinalIgnoreCase)) + m_OverrideExtension = args[i + 1].Trim().ToLower(); + var isKnownExtension = k_SupportedExtensions.Contains(m_OverrideExtension); + // Not supported yet - need to update the model loading code to support + var isOnnx = m_OverrideExtension.Equals("onnx"); + if (!isKnownExtension || isOnnx) { - // Not supported yet - need to update the model loading code to support - Debug.LogError("ONNX loading not supported yet."); + Debug.LogError($"loading unsupported format: {m_OverrideExtension}"); Application.Quit(1); #if UNITY_EDITOR EditorApplication.isPlaying = false; @@ -197,7 +200,7 @@ public NNModel GetModelForBehaviorName(string behaviorName) } else if(!string.IsNullOrEmpty(m_BehaviorNameOverrideDirectory)) { - assetPath = Path.Combine(m_BehaviorNameOverrideDirectory, $"{behaviorName}{m_OverrideExtension}"); + assetPath = Path.Combine(m_BehaviorNameOverrideDirectory, $"{behaviorName}.{m_OverrideExtension}"); } if (string.IsNullOrEmpty(assetPath)) diff --git a/ml-agents/tests/yamato/training_int_tests.py b/ml-agents/tests/yamato/training_int_tests.py index f55787e4f2..e7c315012d 100644 --- a/ml-agents/tests/yamato/training_int_tests.py +++ b/ml-agents/tests/yamato/training_int_tests.py @@ -113,7 +113,7 @@ def run_training(python_version: str, csharp_version: str) -> bool: model_path = os.path.abspath(os.path.dirname(nn_file_expected)) # Onnx loading for overrides not currently supported, but this is # where to add it in when it is. - for extension in [".nn"]: + for extension in ["nn"]: inference_ok = run_inference(env_path, model_path, extension) if not inference_ok: return False @@ -129,7 +129,7 @@ def run_inference(env_path: str, output_path: str, model_extension: str) -> bool print(f"Can't determine the player executable in {env_path}. Found {exes}.") return False - log_output_path = f"{get_base_output_path()}/inference{model_extension}.txt" + log_output_path = f"{get_base_output_path()}/inference.{model_extension}.txt" exe_path = exes[0] args = [