Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve error reporting for failing shell commands (and EasyBuild crashes) #4351

Merged
merged 12 commits into from
Oct 23, 2023
Merged
21 changes: 10 additions & 11 deletions easybuild/framework/easyblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
from easybuild.tools.hooks import MODULE_STEP, MODULE_WRITE, PACKAGE_STEP, PATCH_STEP, PERMISSIONS_STEP, POSTITER_STEP
from easybuild.tools.hooks import POSTPROC_STEP, PREPARE_STEP, READY_STEP, SANITYCHECK_STEP, SOURCE_STEP
from easybuild.tools.hooks import SINGLE_EXTENSION, TEST_STEP, TESTCASES_STEP, load_hooks, run_hook
from easybuild.tools.run import check_async_cmd, run_cmd
from easybuild.tools.run import RunShellCmdError, check_async_cmd, run_cmd
from easybuild.tools.jenkins import write_to_xml
from easybuild.tools.module_generator import ModuleGeneratorLua, ModuleGeneratorTcl, module_generator, dependencies_for
from easybuild.tools.module_naming_scheme.utilities import det_full_ec_version
Expand Down Expand Up @@ -4124,6 +4124,11 @@ def run_all_steps(self, run_test_cases):
start_time = datetime.now()
try:
self.run_step(step_name, step_methods)
except RunShellCmdError as err:
err.print()
ec_path = os.path.basename(self.cfg.path)
error_msg = f"shell command '{err.cmd_name} ...' failed in {step_name} step for {ec_path}"
raise EasyBuildError(error_msg)
finally:
if not self.dry_run:
step_duration = datetime.now() - start_time
Expand Down Expand Up @@ -4225,7 +4230,7 @@ def build_and_install_one(ecdict, init_env):
app.cfg['skip'] = skip

# build easyconfig
errormsg = '(no error)'
error_msg = '(no error)'
# timing info
start_time = time.time()
try:
Expand Down Expand Up @@ -4263,9 +4268,7 @@ def build_and_install_one(ecdict, init_env):
adjust_permissions(app.installdir, stat.S_IWUSR, add=False, recursive=True)

except EasyBuildError as err:
first_n = 300
errormsg = "build failed (first %d chars): %s" % (first_n, err.msg[:first_n])
_log.warning(errormsg)
error_msg = err.msg
result = False

ended = 'ended'
Expand Down Expand Up @@ -4387,11 +4390,7 @@ def ensure_writable_log_dir(log_dir):
# build failed
success = False
summary = 'FAILED'

build_dir = ''
if app.builddir:
build_dir = " (build directory: %s)" % (app.builddir)
succ = "unsuccessfully%s: %s" % (build_dir, errormsg)
succ = "unsuccessfully: " + error_msg

# cleanup logs
app.close_log()
Expand Down Expand Up @@ -4424,7 +4423,7 @@ def ensure_writable_log_dir(log_dir):

del app

return (success, application_log, errormsg)
return (success, application_log, error_msg)


def copy_easyblocks_for_reprod(easyblock_instances, reprod_dir):
Expand Down
28 changes: 18 additions & 10 deletions easybuild/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
from easybuild.tools.github import add_pr_labels, install_github_token, list_prs, merge_pr, new_branch_github, new_pr
from easybuild.tools.github import new_pr_from_branch
from easybuild.tools.github import sync_branch_with_develop, sync_pr_with_develop, update_branch, update_pr
from easybuild.tools.hooks import BUILD_AND_INSTALL_LOOP, PRE_PREF, POST_PREF, START, END, CANCEL, FAIL
from easybuild.tools.hooks import BUILD_AND_INSTALL_LOOP, PRE_PREF, POST_PREF, START, END, CANCEL, CRASH, FAIL
from easybuild.tools.hooks import load_hooks, run_hook
from easybuild.tools.modules import modules_tool
from easybuild.tools.options import opts_dict_to_eb_opts, set_up_configuration, use_color
Expand Down Expand Up @@ -149,11 +149,11 @@ def build_and_install_software(ecs, init_session_state, exit_on_failure=True):

# keep track of success/total count
if ec_res['success']:
test_msg = "Successfully built %s" % ec['spec']
test_msg = "Successfully installed %s" % ec['spec']
else:
test_msg = "Build of %s failed" % ec['spec']
test_msg = "Installation of %s failed" % os.path.basename(ec['spec'])
if 'err' in ec_res:
test_msg += " (err: %s)" % ec_res['err']
test_msg += ": %s" % ec_res['err']

# dump test report next to log file
test_report_txt = create_test_report(test_msg, [(ec, ec_res)], init_session_state)
Expand All @@ -169,8 +169,8 @@ def build_and_install_software(ecs, init_session_state, exit_on_failure=True):
adjust_permissions(parent_dir, stat.S_IWUSR, add=False, recursive=False)

if not ec_res['success'] and exit_on_failure:
if 'traceback' in ec_res:
raise EasyBuildError(ec_res['traceback'])
if not isinstance(ec_res['err'], EasyBuildError):
raise ec_res['err']
else:
raise EasyBuildError(test_msg)

Expand Down Expand Up @@ -747,14 +747,22 @@ def prepare_main(args=None, logfile=None, testing=None):
return init_session_state, eb_go, cfg_settings


if __name__ == "__main__":
init_session_state, eb_go, cfg_settings = prepare_main()
def main_with_hooks(args=None):
init_session_state, eb_go, cfg_settings = prepare_main(args=args)
hooks = load_hooks(eb_go.options.hooks)
try:
main(prepared_cfg_data=(init_session_state, eb_go, cfg_settings))
main(args=args, prepared_cfg_data=(init_session_state, eb_go, cfg_settings))
except EasyBuildError as err:
run_hook(FAIL, hooks, args=[err])
print_error(err.msg)
sys.exit(1)
except KeyboardInterrupt as err:
run_hook(CANCEL, hooks, args=[err])
print_error("Cancelled by user: %s" % err)
except Exception as err:
run_hook(CRASH, hooks, args=[err])
sys.stderr.write("EasyBuild crashed! Please consider reporting a bug, this should not happen...\n\n")
raise


if __name__ == "__main__":
main_with_hooks()
2 changes: 1 addition & 1 deletion easybuild/tools/build_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def nosupport(self, msg, ver):

def error(self, msg, *args, **kwargs):
"""Print error message and raise an EasyBuildError."""
ebmsg = "EasyBuild crashed with an error %s: " % self.caller_info()
ebmsg = "EasyBuild encountered an error %s: " % self.caller_info()
fancylogger.FancyLogger.error(self, ebmsg + msg, *args, **kwargs)

def devel(self, msg, *args, **kwargs):
Expand Down
2 changes: 2 additions & 0 deletions easybuild/tools/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
END = 'end'

CANCEL = 'cancel'
CRASH = 'crash'
FAIL = 'fail'

RUN_SHELL_CMD = 'run_shell_cmd'
Expand Down Expand Up @@ -107,6 +108,7 @@
POST_PREF + BUILD_AND_INSTALL_LOOP,
END,
CANCEL,
CRASH,
FAIL,
PRE_PREF + RUN_SHELL_CMD,
POST_PREF + RUN_SHELL_CMD,
Expand Down
107 changes: 95 additions & 12 deletions easybuild/tools/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"""
import contextlib
import functools
import inspect
import os
import re
import signal
Expand Down Expand Up @@ -75,7 +76,81 @@
]


RunResult = namedtuple('RunResult', ('cmd', 'exit_code', 'output', 'stderr', 'work_dir'))
RunShellCmdResult = namedtuple('RunShellCmdResult', ('cmd', 'exit_code', 'output', 'stderr', 'work_dir'))


class RunShellCmdError(BaseException):

def __init__(self, cmd, exit_code, work_dir, output, stderr, caller_info, *args, **kwargs):
"""Constructor for RunShellCmdError."""
self.cmd = cmd
self.cmd_name = cmd.split(' ')[0]
self.exit_code = exit_code
self.work_dir = work_dir
self.output = output
self.stderr = stderr
self.caller_info = caller_info

msg = f"Shell command '{self.cmd_name}' failed!"
super(RunShellCmdError, self).__init__(msg, *args, **kwargs)

def print(self):
"""
Report failed shell command for this RunShellCmdError instance
"""

def pad_4_spaces(msg):
return ' ' * 4 + msg

error_info = [
'',
"ERROR: Shell command failed!",
pad_4_spaces(f"full command -> {self.cmd}"),
pad_4_spaces(f"exit code -> {self.exit_code}"),
pad_4_spaces(f"working directory -> {self.work_dir}"),
]

tmpdir = tempfile.mkdtemp(prefix='shell-cmd-error-')
output_fp = os.path.join(tmpdir, f"{self.cmd_name}.out")
with open(output_fp, 'w') as fp:
fp.write(self.output or '')

if self.stderr is None:
error_info.append(pad_4_spaces(f"output (stdout + stderr) -> {output_fp}"))
else:
stderr_fp = os.path.join(tmpdir, f"{self.cmd_name}.err")
with open(stderr_fp, 'w') as fp:
fp.write(self.stderr)
error_info.extend([
pad_4_spaces(f"output (stdout) -> {output_fp}"),
pad_4_spaces(f"error/warnings (stderr) -> {stderr_fp}"),
])

caller_file_name, caller_line_nr, caller_function_name = self.caller_info
called_from_info = f"'{caller_function_name}' function in {caller_file_name} (line {caller_line_nr})"
error_info.extend([
pad_4_spaces(f"called from -> {called_from_info}"),
'',
])

sys.stderr.write('\n'.join(error_info) + '\n')


def raise_run_shell_cmd_error(cmd, exit_code, work_dir, output, stderr):
"""
Raise RunShellCmdError for failing shell command, after collecting additional caller info
"""

# figure out where failing command was run
# need to go 3 levels down:
# 1) this function
# 2) run_shell_cmd function
# 3) run_cmd_cache decorator
# 4) actual caller site
frameinfo = inspect.getouterframes(inspect.currentframe())[3]
caller_info = (frameinfo.filename, frameinfo.lineno, frameinfo.function)

raise RunShellCmdError(cmd, exit_code, work_dir, output, stderr, caller_info)


def run_cmd_cache(func):
Expand Down Expand Up @@ -178,7 +253,7 @@ def to_cmd_str(cmd):
msg += f" (in {work_dir})"
dry_run_msg(msg, silent=silent)

return RunResult(cmd=cmd_str, exit_code=0, output='', stderr=None, work_dir=work_dir)
return RunShellCmdResult(cmd=cmd_str, exit_code=0, output='', stderr=None, work_dir=work_dir)

start_time = datetime.now()
if not hidden:
Expand All @@ -204,14 +279,29 @@ def to_cmd_str(cmd):
_log.info("Command to run was changed by pre-%s hook: '%s' (was: '%s')", RUN_SHELL_CMD, cmd, old_cmd)

_log.info(f"Running command '{cmd_str}' in {work_dir}")
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=stderr, check=fail_on_error,
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=stderr, check=False,
cwd=work_dir, env=env, input=stdin, shell=shell, executable=executable)

# return output as a regular string rather than a byte sequence (and non-UTF-8 characters get stripped out)
output = proc.stdout.decode('utf-8', 'ignore')
stderr_output = proc.stderr.decode('utf-8', 'ignore') if split_stderr else None
stderr = proc.stderr.decode('utf-8', 'ignore') if split_stderr else None

res = RunShellCmdResult(cmd=cmd_str, exit_code=proc.returncode, output=output, stderr=stderr, work_dir=work_dir)

res = RunResult(cmd=cmd_str, exit_code=proc.returncode, output=output, stderr=stderr_output, work_dir=work_dir)
# always log command output
cmd_name = cmd_str.split(' ')[0]
if split_stderr:
_log.info(f"Output of '{cmd_name} ...' shell command (stdout only):\n{res.output}")
_log.info(f"Warnings and errors of '{cmd_name} ...' shell command (stderr only):\n{res.stderr}")
else:
_log.info(f"Output of '{cmd_name} ...' shell command (stdout + stderr):\n{res.output}")

if res.exit_code == 0:
_log.info(f"Shell command completed successfully (see output above): {cmd_str}")
else:
_log.warning(f"Shell command FAILED (exit code {res.exit_code}, see output above): {cmd_str}")
if fail_on_error:
raise_run_shell_cmd_error(res.cmd, res.exit_code, res.work_dir, output=res.output, stderr=res.stderr)

if with_hooks:
run_hook_kwargs = {
Expand All @@ -222,13 +312,6 @@ def to_cmd_str(cmd):
}
run_hook(RUN_SHELL_CMD, hooks, post_step_hook=True, args=[cmd], kwargs=run_hook_kwargs)

if split_stderr:
log_msg = f"Command '{cmd_str}' exited with exit code {res.exit_code}, "
log_msg += f"with stdout:\n{res.output}\nstderr:\n{res.stderr}"
else:
log_msg = f"Command '{cmd_str}' exited with exit code {res.exit_code} and output:\n{res.output}"
_log.info(log_msg)

if not hidden:
time_since_start = time_str_since(start_time)
trace_msg(f"command completed: exit {res.exit_code}, ran in {time_since_start}")
Expand Down
8 changes: 4 additions & 4 deletions test/framework/build_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ def test_easybuildlog(self):
r"fancyroot.test_easybuildlog \[WARNING\] :: Deprecated functionality.*onemorewarning.*",
r"fancyroot.test_easybuildlog \[WARNING\] :: Deprecated functionality.*lastwarning.*",
r"fancyroot.test_easybuildlog \[WARNING\] :: Deprecated functionality.*thisisnotprinted.*",
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): kaput",
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): err: msg: %s",
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): kaput",
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): err: msg: %s",
r"fancyroot.test_easybuildlog \[ERROR\] :: .*EasyBuild encountered an exception \(at .* in .*\): oops",
'',
])
Expand Down Expand Up @@ -168,7 +168,7 @@ def test_easybuildlog(self):
r"fancyroot.test_easybuildlog \[WARNING\] :: bleh",
r"fancyroot.test_easybuildlog \[INFO\] :: 4\+2 = 42",
r"fancyroot.test_easybuildlog \[DEBUG\] :: this is just a test",
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): foo baz baz",
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): foo baz baz",
'',
])
logtxt_regex = re.compile(r'^%s' % expected_logtxt, re.M)
Expand Down Expand Up @@ -223,7 +223,7 @@ def test_log_levels(self):
info_msg = r"%s \[INFO\] :: fyi" % prefix
warning_msg = r"%s \[WARNING\] :: this is a warning" % prefix
deprecated_msg = r"%s \[WARNING\] :: Deprecated functionality, .*: almost kaput; see .*" % prefix
error_msg = r"%s \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): kaput" % prefix
error_msg = r"%s \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): kaput" % prefix

expected_logtxt = '\n'.join([
error_msg,
Expand Down
14 changes: 13 additions & 1 deletion test/framework/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ def setUp(self):
'',
'def fail_hook(err):',
' print("EasyBuild FAIL: %s" % err)',
'',
'def crash_hook(err):',
' print("EasyBuild CRASHED, oh no! => %s" % err)',
])
write_file(self.test_hooks_pymod, test_hooks_pymod_txt)

Expand All @@ -97,8 +100,9 @@ def test_load_hooks(self):

hooks = load_hooks(self.test_hooks_pymod)

self.assertEqual(len(hooks), 8)
self.assertEqual(len(hooks), 9)
expected = [
'crash_hook',
'fail_hook',
'parse_hook',
'post_configure_hook',
Expand Down Expand Up @@ -140,6 +144,7 @@ def test_find_hook(self):
pre_single_extension_hook = [hooks[k] for k in hooks if k == 'pre_single_extension_hook'][0]
start_hook = [hooks[k] for k in hooks if k == 'start_hook'][0]
pre_run_shell_cmd_hook = [hooks[k] for k in hooks if k == 'pre_run_shell_cmd_hook'][0]
crash_hook = [hooks[k] for k in hooks if k == 'crash_hook'][0]
fail_hook = [hooks[k] for k in hooks if k == 'fail_hook'][0]
pre_build_and_install_loop_hook = [hooks[k] for k in hooks if k == 'pre_build_and_install_loop_hook'][0]

Expand Down Expand Up @@ -175,6 +180,10 @@ def test_find_hook(self):
self.assertEqual(find_hook('fail', hooks, pre_step_hook=True), None)
self.assertEqual(find_hook('fail', hooks, post_step_hook=True), None)

self.assertEqual(find_hook('crash', hooks), crash_hook)
self.assertEqual(find_hook('crash', hooks, pre_step_hook=True), None)
self.assertEqual(find_hook('crash', hooks, post_step_hook=True), None)

hook_name = 'build_and_install_loop'
self.assertEqual(find_hook(hook_name, hooks), None)
self.assertEqual(find_hook(hook_name, hooks, pre_step_hook=True), pre_build_and_install_loop_hook)
Expand Down Expand Up @@ -209,6 +218,7 @@ def run_hooks():
run_hook('single_extension', hooks, post_step_hook=True, args=[None])
run_hook('extensions', hooks, post_step_hook=True, args=[None])
run_hook('fail', hooks, args=[EasyBuildError('oops')])
run_hook('crash', hooks, args=[RuntimeError('boom!')])
stdout = self.get_stdout()
stderr = self.get_stderr()
self.mock_stdout(False)
Expand Down Expand Up @@ -244,6 +254,8 @@ def run_hooks():
"this is run before installing an extension",
"== Running fail hook...",
"EasyBuild FAIL: 'oops'",
"== Running crash hook...",
"EasyBuild CRASHED, oh no! => boom!",
]
expected_stdout = '\n'.join(expected_stdout_lines)

Expand Down
1 change: 1 addition & 0 deletions test/framework/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,7 @@ def test_avail_hooks(self):
" post_build_and_install_loop_hook",
" end_hook",
" cancel_hook",
" crash_hook",
" fail_hook",
" pre_run_shell_cmd_hook",
" post_run_shell_cmd_hook",
Expand Down