diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f829d09..721fa02 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,11 @@ CHANGELOG ========= +2.3.4 +===== + +* feature: add capture_error flag to process.check_error and process.create and to all functions that runs process: modules.run, modules,run_module, and entry_point.run + 2.3.3 ===== diff --git a/setup.py b/setup.py index 57f5eb8..81fcc93 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ def read(file_name): setup( name='sagemaker_containers', - version='2.3.3', + version='2.3.4', description='Open source library for creating containers to run on Amazon SageMaker.', packages=packages, diff --git a/src/sagemaker_containers/_errors.py b/src/sagemaker_containers/_errors.py index c66fcee..430f4b3 100644 --- a/src/sagemaker_containers/_errors.py +++ b/src/sagemaker_containers/_errors.py @@ -14,6 +14,8 @@ import textwrap +import six + class ClientError(Exception): pass @@ -27,12 +29,20 @@ class _CalledProcessError(ClientError): cmd, return_code, output """ - def __init__(self, cmd, return_code=None): + def __init__(self, cmd, return_code=None, output=None): self.return_code = return_code self.cmd = cmd + self.output = output def __str__(self): - message = '%s:\nCommand "%s"' % (type(self).__name__, self.cmd) + if six.PY3 and self.output: + error_msg = '\n%s' % self.output.decode('latin1') + elif self.output: + error_msg = '\n%s' % self.output + else: + error_msg = '' + + message = '%s:\nCommand "%s"%s' % (type(self).__name__, self.cmd, error_msg) return message.strip() diff --git a/src/sagemaker_containers/_modules.py b/src/sagemaker_containers/_modules.py index 2fef0ae..0b220ff 100644 --- a/src/sagemaker_containers/_modules.py +++ b/src/sagemaker_containers/_modules.py @@ -91,10 +91,12 @@ def prepare(path, name): # type: (str, str) -> None _files.write_file(os.path.join(path, 'MANIFEST.in'), data) -def install(path): # type: (str) -> None +def install(path, capture_error=False): # type: (str, bool) -> None """Install a Python module in the executing Python environment. Args: path (str): Real path location of the Python module. + capture_error (bool): Default false. If True, the running process captures the + stderr, and appends it to the returned Exception message in case of errors. """ cmd = '%s -m pip install -U . ' % _process.python_executable() @@ -103,10 +105,11 @@ def install(path): # type: (str) -> None logger.info('Installing module with the following command:\n%s', cmd) - _process.check_error(shlex.split(cmd), _errors.InstallModuleError, cwd=path) + _process.check_error(shlex.split(cmd), _errors.InstallModuleError, cwd=path, capture_error=capture_error) -def run(module_name, args=None, env_vars=None, wait=True): # type: (str, list, dict, bool) -> Popen +def run(module_name, args=None, env_vars=None, wait=True, capture_error=False): + # type: (str, list, dict, bool, bool) -> Popen """Run Python module as a script. Search sys.path for the named module and execute its contents as the __main__ module. @@ -154,6 +157,8 @@ def run(module_name, args=None, env_vars=None, wait=True): # type: (str, list, module_name (str): module name in the same format required by python -m cli command. args (list): A list of program arguments. env_vars (dict): A map containing the environment variables to be written. + capture_error (bool): Default false. If True, the running process captures the + stderr, and appends it to the returned Exception message in case of errors. """ args = args or [] env_vars = env_vars or {} @@ -163,10 +168,10 @@ def run(module_name, args=None, env_vars=None, wait=True): # type: (str, list, _logging.log_script_invocation(cmd, env_vars) if wait: - return _process.check_error(cmd, _errors.ExecuteUserScriptError) + return _process.check_error(cmd, _errors.ExecuteUserScriptError, capture_error=capture_error) else: - return _process.create(cmd, _errors.ExecuteUserScriptError) + return _process.create(cmd, _errors.ExecuteUserScriptError, capture_error=capture_error) def import_module(uri, name=DEFAULT_MODULE_NAME, cache=None): # type: (str, str, bool) -> module @@ -195,8 +200,8 @@ def import_module(uri, name=DEFAULT_MODULE_NAME, cache=None): # type: (str, str six.reraise(_errors.ImportModuleError, _errors.ImportModuleError(e), sys.exc_info()[2]) -def run_module(uri, args, env_vars=None, name=DEFAULT_MODULE_NAME, cache=None, wait=True): - # type: (str, list, dict, str, bool, bool) -> Popen +def run_module(uri, args, env_vars=None, name=DEFAULT_MODULE_NAME, cache=None, wait=True, capture_error=False): + # type: (str, list, dict, str, bool, bool, bool) -> Popen """Download, prepare and executes a compressed tar file from S3 or provided directory as a module. SageMaker Python SDK saves the user provided scripts as compressed tar files in S3 @@ -222,7 +227,7 @@ def run_module(uri, args, env_vars=None, name=DEFAULT_MODULE_NAME, cache=None, w _env.write_env_vars(env_vars) - return run(name, args, env_vars, wait) + return run(name, args, env_vars, wait, capture_error) def _warning_cache_deprecation(cache): diff --git a/src/sagemaker_containers/_process.py b/src/sagemaker_containers/_process.py index ee9cbe2..f76ec03 100644 --- a/src/sagemaker_containers/_process.py +++ b/src/sagemaker_containers/_process.py @@ -21,19 +21,26 @@ from sagemaker_containers import _env -def create(cmd, error_class, cwd=None, **kwargs): +def create(cmd, error_class, cwd=None, capture_error=False, **kwargs): try: - return subprocess.Popen(cmd, env=os.environ, cwd=cwd or _env.code_dir, **kwargs) + stderr = subprocess.PIPE if capture_error else None + return subprocess.Popen(cmd, env=os.environ, cwd=cwd or _env.code_dir, stderr=stderr, **kwargs) except Exception as e: six.reraise(error_class, error_class(e), sys.exc_info()[2]) -def check_error(cmd, error_class, **kwargs): - process = create(cmd, error_class, **kwargs) - return_code = process.wait() +def check_error(cmd, error_class, capture_error=False, **kwargs): + process = create(cmd, error_class, capture_error=capture_error, **kwargs) + + if capture_error: + _, stderr = process.communicate() + return_code = process.poll() + else: + stderr = None + return_code = process.wait() if return_code: - raise error_class(return_code=return_code, cmd=' '.join(cmd)) + raise error_class(return_code=return_code, cmd=' '.join(cmd), output=stderr) return process diff --git a/src/sagemaker_containers/entry_point.py b/src/sagemaker_containers/entry_point.py index 0ebc8b6..bdf1d19 100644 --- a/src/sagemaker_containers/entry_point.py +++ b/src/sagemaker_containers/entry_point.py @@ -19,8 +19,8 @@ from sagemaker_containers import _env, _errors, _files, _logging, _modules, _process -def run(uri, user_entry_point, args, env_vars=None, wait=True): - # type: (str, str, list, dict, bool) -> subprocess.Popen +def run(uri, user_entry_point, args, env_vars=None, wait=True, capture_error=False): + # type: (str, str, list, dict, bool, bool) -> subprocess.Popen """Download, prepare and executes a compressed tar file from S3 or provided directory as an user entrypoint. Runs the user entry point, passing env_vars as environment variables and args as command arguments. @@ -59,20 +59,23 @@ def run(uri, user_entry_point, args, env_vars=None, wait=True): uri (str): the location of the module. wait (bool): If True, holds the process executing the user entry-point. If False, returns the process that is executing it. + capture_error (bool): Default false. If True, the running process captures the + stderr, and appends it to the returned Exception message in case of errors. + """ env_vars = env_vars or {} env_vars = env_vars.copy() _files.download_and_extract(uri, user_entry_point, _env.code_dir) - install(user_entry_point, _env.code_dir) + install(user_entry_point, _env.code_dir, capture_error) _env.write_env_vars(env_vars) - return _call(user_entry_point, args, env_vars, wait) + return _call(user_entry_point, args, env_vars, wait, capture_error) -def install(name, dst): +def install(name, dst, capture_error=False): """Install the user provided entry point to be executed as follow: - add the path to sys path - if the user entry point is a command, gives exec permissions to the script @@ -80,18 +83,21 @@ def install(name, dst): Args: name (str): name of the script or module. dst (str): path to directory with the script or module. + capture_error (bool): Default false. If True, the running process captures the + stderr, and appends it to the returned Exception message in case of errors. """ if dst not in sys.path: sys.path.insert(0, dst) entrypoint_type = entry_point_type(dst, name) if entrypoint_type is EntryPointType.PYTHON_PACKAGE: - _modules.install(dst) + _modules.install(dst, capture_error) if entrypoint_type is EntryPointType.COMMAND: os.chmod(os.path.join(dst, name), 511) -def _call(user_entry_point, args=None, env_vars=None, wait=True): # type: (str, list, dict, bool) -> Popen +def _call(user_entry_point, args=None, env_vars=None, wait=True, capture_error=False): + # type: (str, list, dict, bool, bool) -> Popen args = args or [] env_vars = env_vars or {} @@ -107,10 +113,10 @@ def _call(user_entry_point, args=None, env_vars=None, wait=True): # type: (str, _logging.log_script_invocation(cmd, env_vars) if wait: - return _process.check_error(cmd, _errors.ExecuteUserScriptError) + return _process.check_error(cmd, _errors.ExecuteUserScriptError, capture_error=capture_error) else: - return _process.create(cmd, _errors.ExecuteUserScriptError) + return _process.create(cmd, _errors.ExecuteUserScriptError, capture_error=capture_error) class EntryPointType(enum.Enum): diff --git a/test/functional/test_training_framework.py b/test/functional/test_training_framework.py index 5c81aaa..192a569 100644 --- a/test/functional/test_training_framework.py +++ b/test/functional/test_training_framework.py @@ -147,8 +147,9 @@ def framework_training_fn(): model.save(model_file) -@pytest.mark.parametrize('user_script', [USER_SCRIPT_WITH_SAVE, USER_SCRIPT_WITH_SAVE]) -def test_training_framework(user_script): +@pytest.mark.parametrize('user_script, capture_error', + [[USER_SCRIPT_WITH_SAVE, False], [USER_SCRIPT_WITH_SAVE, True]]) +def test_training_framework(user_script, capture_error): with pytest.raises(ImportError): importlib.import_module(modules.DEFAULT_MODULE_NAME) @@ -234,18 +235,19 @@ def test_trainer_report_failure(): assert 'No such file or directory' in message -def framework_training_with_script_mode_fn(): +def framework_training_with_script_mode_fn(capture_error): training_env = sagemaker_containers.training_env() entry_point.run(training_env.module_dir, training_env.user_entry_point, training_env.to_cmd_args(), - training_env.to_env_vars()) + training_env.to_env_vars(), capture_error=capture_error) -def framework_training_with_run_modules_fn(): +def framework_training_with_run_modules_fn(capture_error): training_env = sagemaker_containers.training_env() modules.run_module(training_env.module_dir, training_env.to_cmd_args(), - training_env.to_env_vars(), training_env.module_name) + training_env.to_env_vars(), training_env.module_name, + capture_error=capture_error) def test_parameter_server(): @@ -261,10 +263,10 @@ def test_parameter_server(): process.kill() -@pytest.mark.parametrize('user_script, training_fn', [ - [USER_MODE_SCRIPT, framework_training_with_script_mode_fn], - [USER_MODE_SCRIPT, framework_training_with_run_modules_fn]]) -def test_script_mode(user_script, training_fn): +@pytest.mark.parametrize('user_script, training_fn, capture_error', [ + [USER_MODE_SCRIPT, framework_training_with_script_mode_fn, True], + [USER_MODE_SCRIPT, framework_training_with_run_modules_fn, False]]) +def test_script_mode(user_script, training_fn, capture_error): channel = test.Channel.create(name='training') features = [1, 2, 3, 4] @@ -278,7 +280,7 @@ def test_script_mode(user_script, training_fn): test.prepare(user_module=module, hyperparameters=hyperparameters, channels=[channel]) - assert execute_an_wrap_exit(training_fn) == trainer.SUCCESS_CODE + assert execute_an_wrap_exit(training_fn, capture_error=capture_error) == trainer.SUCCESS_CODE model_path = os.path.join(env.model_dir, 'saved_model') @@ -290,10 +292,10 @@ def test_script_mode(user_script, training_fn): assert model.optimizer == 'SGD' -@pytest.mark.parametrize('user_script, training_fn', [ - [USER_MODE_SCRIPT, framework_training_with_script_mode_fn], - [USER_MODE_SCRIPT, framework_training_with_run_modules_fn]]) -def test_script_mode_local_directory(user_script, training_fn, tmpdir): +@pytest.mark.parametrize('user_script, training_fn, capture_error', [ + [USER_MODE_SCRIPT, framework_training_with_script_mode_fn, False], + [USER_MODE_SCRIPT, framework_training_with_run_modules_fn, True]]) +def test_script_mode_local_directory(user_script, training_fn, capture_error, tmpdir): channel = test.Channel.create(name='training') features = [1, 2, 3, 4] @@ -311,7 +313,7 @@ def test_script_mode_local_directory(user_script, training_fn, tmpdir): test.prepare(user_module=module, hyperparameters=hyperparameters, channels=[channel], local=True) - assert execute_an_wrap_exit(training_fn) == trainer.SUCCESS_CODE + assert execute_an_wrap_exit(training_fn, capture_error=capture_error) == trainer.SUCCESS_CODE model_path = os.path.join(env.model_dir, 'saved_model') @@ -329,10 +331,10 @@ def test_script_mode_local_directory(user_script, training_fn, tmpdir): """ -@pytest.mark.parametrize('training_fn', [ - framework_training_with_script_mode_fn, - framework_training_with_run_modules_fn]) -def test_script_mode_client_error(training_fn): +@pytest.mark.parametrize('training_fn, capture_error', [ + (framework_training_with_script_mode_fn, True), + (framework_training_with_run_modules_fn, False)]) +def test_script_mode_client_error(training_fn, capture_error): channel = test.Channel.create(name='training') module = test.UserModule(test.File(name='user_script.py', data=USER_MODE_SCRIPT_WITH_ERROR)) @@ -342,16 +344,18 @@ def test_script_mode_client_error(training_fn): test.prepare(user_module=module, hyperparameters=hyperparameters, channels=[channel]) with pytest.raises(errors.ExecuteUserScriptError) as e: - training_fn() + training_fn(capture_error) message = str(e.value) assert 'ExecuteUserScriptError' in message + if capture_error: + assert 'ZeroDivisionError' in message -@pytest.mark.parametrize('training_fn', [ - framework_training_with_script_mode_fn, - framework_training_with_run_modules_fn]) -def test_script_mode_client_import_error(training_fn): +@pytest.mark.parametrize('training_fn, capture_error', [ + [framework_training_with_script_mode_fn, True], + [framework_training_with_run_modules_fn, False]]) +def test_script_mode_client_import_error(training_fn, capture_error): channel = test.Channel.create(name='training') requirements_file = test.File('requirements.txt', '42/0') @@ -364,20 +368,24 @@ def test_script_mode_client_import_error(training_fn): test.prepare(user_module=module, hyperparameters=hyperparameters, channels=[channel]) with pytest.raises(errors.InstallModuleError) as e: - training_fn() + training_fn(capture_error) message = str(e.value) assert 'InstallModuleError:' in message + if capture_error: + assert "Invalid requirement: \'42/0\'" in message + assert "It looks like a path. File \'42/0\' does not exist." in message + def failure_message(): with open(os.path.join(env.output_dir, 'failure')) as f: return f.read() -def execute_an_wrap_exit(fn): +def execute_an_wrap_exit(fn, **kargs): try: - fn() + fn(**kargs) return trainer.SUCCESS_CODE except ValueError as e: return int(str(e)) diff --git a/test/unit/test_entry_point.py b/test/unit/test_entry_point.py index 1cf8172..f0c16b2 100644 --- a/test/unit/test_entry_point.py +++ b/test/unit/test_entry_point.py @@ -13,6 +13,7 @@ from __future__ import absolute_import import os +import subprocess import sys from mock import patch @@ -48,12 +49,15 @@ def test_install_module(check_error, entry_point_type_module): entry_point.install('python_module.py', path) cmd = [sys.executable, '-m', 'pip', 'install', '-U', '.'] - check_error.assert_called_with(cmd, _errors.InstallModuleError, cwd=path) + check_error.assert_called_with(cmd, _errors.InstallModuleError, + capture_error=False, cwd=path) with patch('os.path.exists', return_value=True): entry_point.install('python_module.py', path) - check_error.assert_called_with(cmd + ['-r', 'requirements.txt'], _errors.InstallModuleError, cwd=path) + check_error.assert_called_with(cmd + ['-r', 'requirements.txt'], + _errors.InstallModuleError, cwd=path, + capture_error=False) @patch('sagemaker_containers._process.check_error', autospec=True) @@ -86,18 +90,21 @@ def test_run_bash(log, popen, entry_point_type_script): entry_point._call('launcher.sh', ['--lr', '13']) cmd = ['/bin/sh', '-c', './launcher.sh --lr 13'] - popen.assert_called_with(cmd, cwd=_env.code_dir, env=os.environ) + popen.assert_called_with(cmd, cwd=_env.code_dir, env=os.environ, stderr=None) log.assert_called_with(cmd, {}) @patch('subprocess.Popen') @patch('sagemaker_containers._logging.log_script_invocation') def test_run_python(log, popen, entry_point_type_script): + popen().communicate.return_value = (None, 0) + with pytest.raises(_errors.ExecuteUserScriptError): - entry_point._call('launcher.py', ['--lr', '13']) + entry_point._call('launcher.py', ['--lr', '13'], capture_error=True) cmd = [sys.executable, 'launcher.py', '--lr', '13'] - popen.assert_called_with(cmd, cwd=_env.code_dir, env=os.environ) + popen.assert_called_with(cmd, cwd=_env.code_dir, env=os.environ, + stderr=subprocess.PIPE) log.assert_called_with(cmd, {}) @@ -108,7 +115,8 @@ def test_run_module(log, popen, entry_point_type_module): entry_point._call('module.py', ['--lr', '13']) cmd = [sys.executable, '-m', 'module', '--lr', '13'] - popen.assert_called_with(cmd, cwd=_env.code_dir, env=os.environ) + popen.assert_called_with(cmd, cwd=_env.code_dir, env=os.environ, + stderr=None) log.assert_called_with(cmd, {}) @@ -125,10 +133,10 @@ def test_run_error(): @patch('sagemaker_containers.entry_point._call') @patch('os.chmod') def test_run_module_wait(chmod, call, download_and_extract): - entry_point.run(uri='s3://url', user_entry_point='launcher.sh', args=['42']) + entry_point.run(uri='s3://url', user_entry_point='launcher.sh', args=['42'], capture_error=True) download_and_extract.assert_called_with('s3://url', 'launcher.sh', _env.code_dir) - call.assert_called_with('launcher.sh', ['42'], {}, True) + call.assert_called_with('launcher.sh', ['42'], {}, True, True) chmod.assert_called_with(os.path.join(_env.code_dir, 'launcher.sh'), 511) diff --git a/test/unit/test_errors.py b/test/unit/test_errors.py index bcb457c..8824ea6 100644 --- a/test/unit/test_errors.py +++ b/test/unit/test_errors.py @@ -25,3 +25,19 @@ def test_execute_user_script_error(): error = _errors.ExecuteUserScriptError(['python', '-m', '42'], return_code=42) assert str(error) == 'ExecuteUserScriptError:\nCommand "[\'python\', \'-m\', \'42\']"' + + +def test_install_module_error_with_output(): + error = _errors.InstallModuleError(['python', '-m', '42'], return_code=42, output=b'42') + + assert str(error) == """InstallModuleError: +Command "['python', '-m', '42']" +42""" + + +def test_execute_user_script_error_with_output(): + error = _errors.ExecuteUserScriptError(['python', '-m', '42'], return_code=42, output=b'42') + + assert str(error) == """ExecuteUserScriptError: +Command "['python', '-m', '42']" +42""" diff --git a/test/unit/test_modules.py b/test/unit/test_modules.py index 6e972e5..f3a869d 100644 --- a/test/unit/test_modules.py +++ b/test/unit/test_modules.py @@ -51,12 +51,14 @@ def test_install(check_error): _modules.install(path) cmd = [sys.executable, '-m', 'pip', 'install', '-U', '.'] - check_error.assert_called_with(cmd, _errors.InstallModuleError, cwd=path) + check_error.assert_called_with(cmd, _errors.InstallModuleError, cwd=path, capture_error=False) with patch('os.path.exists', return_value=True): _modules.install(path) - check_error.assert_called_with(cmd + ['-r', 'requirements.txt'], _errors.InstallModuleError, cwd=path) + check_error.assert_called_with( + cmd + ['-r', 'requirements.txt'], _errors.InstallModuleError, + capture_error=False, cwd=path) @patch('sagemaker_containers._process.check_error', autospec=True) @@ -129,12 +131,15 @@ def test_exists(import_module): @patch('sagemaker_containers.training_env', lambda: {}) -def test_run_error(): +@pytest.mark.parametrize('capture_error', [True, False]) +def test_run_error(capture_error): with pytest.raises(_errors.ExecuteUserScriptError) as e: - _modules.run('wrong module') + _modules.run('wrong module', capture_error=capture_error) message = str(e.value) assert 'ExecuteUserScriptError:' in message + if capture_error: + assert ' No module named wrong module' in message @patch('sagemaker_containers._process.python_executable') @@ -145,18 +150,20 @@ def test_run(log_script_invocation, check_error, executable): expected_cmd = [executable(), '-m', 'pytest', '--version'] log_script_invocation.assert_called_with(expected_cmd, {}) - check_error.assert_called_with(expected_cmd, _errors.ExecuteUserScriptError) + check_error.assert_called_with(expected_cmd, _errors.ExecuteUserScriptError, + capture_error=False) @patch('sagemaker_containers._process.python_executable') @patch('sagemaker_containers._process.create') @patch('sagemaker_containers._logging.log_script_invocation') def test_run_no_wait(log_script_invocation, create, executable): - _modules.run('pytest', ['--version'], {'PYPATH': '/opt/ml/code'}, wait=False) + _modules.run('pytest', ['--version'], {'PYPATH': '/opt/ml/code'}, wait=False, capture_error=True) expected_cmd = [executable(), '-m', 'pytest', '--version'] log_script_invocation.assert_called_with(expected_cmd, {'PYPATH': '/opt/ml/code'}) - create.assert_called_with(expected_cmd, _errors.ExecuteUserScriptError) + create.assert_called_with(expected_cmd, _errors.ExecuteUserScriptError, + capture_error=True) @pytest.mark.parametrize('wait, cache', [[True, False], [True, False]]) @@ -173,7 +180,7 @@ def test_run_module_wait(download_and_extract, write_env_vars, install, run, wai write_env_vars.assert_called_with({}) install.assert_called_with(_env.code_dir) - run.assert_called_with('default_user_module_name', ['42'], {}, True) + run.assert_called_with('default_user_module_name', ['42'], {}, True, False) @patch('sagemaker_containers._files.download_and_extract')