Skip to content

Commit

Permalink
Merge pull request #1787 from davidmarin/not-a-valid-jar
Browse files Browse the repository at this point in the history
handle "Not a valid JAR" error message (fixes #1771)
  • Loading branch information
David Marin committed May 29, 2018
2 parents 250dad2 + d208a63 commit a954048
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 0 deletions.
1 change: 1 addition & 0 deletions mrjob/examples/mr_jar_step_example.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright 2013 David Marin
# Copyright 2016 Yelp
# Copyright 2018 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
15 changes: 15 additions & 0 deletions mrjob/logs/step.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@
_JOB_PROGRESS_RE = re.compile(
r'^\s*map\s+(?P<map>\d+)%\s+reduce\s+(?P<reduce>\d+)%\s*$')

# if you specify a bad jar, this is all you get
_NOT_A_VALID_JAR_RE = re.compile(r'^\s*Not a valid JAR:.*')

# YARN prints this (sometimes followed by a Java exception) when tasks fail
_TASK_ATTEMPT_FAILED_RE = re.compile(
Expand Down Expand Up @@ -316,6 +318,19 @@ def _parse_step_syslog_from_log4j_records(records, step_interpretation=None):
message=message,
)

# invalid jar
m = _NOT_A_VALID_JAR_RE.match(message)
if m:
error = dict(
hadoop_error=dict(
message=message,
num_lines=record['num_lines'],
start_line=record['start_line'],
),
)
result.setdefault('errors', [])
result['errors'].append(error)

# task failure
m = _TASK_ATTEMPT_FAILED_RE.match(message)
if m:
Expand Down
21 changes: 21 additions & 0 deletions tests/logs/test_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,21 @@
progress=dict(map=100, message=' map 100% reduce 100%', reduce=100),
)

NOT_A_VALID_JAR_LOG_LINES = [
'Not a valid JAR: /home/hadoop/hadoop-examples.jar',
]

PARSED_NOT_A_VALID_JAR_LOG_LINES = dict(
errors=[
dict(
hadoop_error=dict(
message='Not a valid JAR: /home/hadoop/hadoop-examples.jar',
start_line=0,
num_lines=1,
),
),
],
)

class ParseStepSyslogTestCase(TestCase):

Expand All @@ -114,6 +129,12 @@ def test_pre_yarn(self):
_parse_step_syslog(PRE_YARN_STEP_LOG_LINES),
PARSED_PRE_YARN_STEP_LOG_LINES)

def test_not_a_valid_jar(self):
self.assertEqual(
_parse_step_syslog(NOT_A_VALID_JAR_LOG_LINES),
PARSED_NOT_A_VALID_JAR_LOG_LINES,
)


class InterpretHadoopJarCommandStderrTestCase(TestCase):

Expand Down

0 comments on commit a954048

Please sign in to comment.