-
Notifications
You must be signed in to change notification settings - Fork 589
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1711 from davidmarin/faster-log-parsing
- Loading branch information
Showing
15 changed files
with
447 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
# Copyright 2017 Yelp | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
"""Print probable cause of error for a failed step. | ||
Currently this only works on EMR. | ||
Usage:: | ||
mrjob diagnose [opts] j-CLUSTERID | ||
Options:: | ||
-c CONF_PATHS, --conf-path CONF_PATHS | ||
Path to alternate mrjob.conf file to read from | ||
--no-conf Don't load mrjob.conf even if it's available | ||
--emr-endpoint EMR_ENDPOINT | ||
Force mrjob to connect to EMR on this endpoint (e.g. | ||
us-west-1.elasticmapreduce.amazonaws.com). Default is | ||
to infer this from region. | ||
-h, --help show this help message and exit | ||
-q, --quiet Don't print anything to stderr | ||
--region REGION GCE/AWS region to run Dataproc/EMR jobs in. | ||
--s3-endpoint S3_ENDPOINT | ||
Force mrjob to connect to S3 on this endpoint (e.g. s3 | ||
-us-west-1.amazonaws.com). You usually shouldn't set | ||
this; by default mrjob will choose the correct | ||
endpoint for each S3 bucket based on its location. | ||
--step-id STEP_ID ID of a particular failed step to diagnose | ||
-v, --verbose print more messages to stderr | ||
""" | ||
from argparse import ArgumentParser | ||
from logging import getLogger | ||
|
||
from mrjob.aws import _boto3_paginate | ||
from mrjob.emr import _EMR_SPARK_ARGS | ||
from mrjob.emr import EMRJobRunner | ||
from mrjob.job import MRJob | ||
from mrjob.logs.errors import _format_error | ||
from mrjob.options import _add_basic_args | ||
from mrjob.options import _add_runner_args | ||
from mrjob.options import _alphabetize_actions | ||
from mrjob.options import _filter_by_role | ||
|
||
log = getLogger(__name__) | ||
|
||
|
||
def main(cl_args=None): | ||
arg_parser = _make_arg_parser() | ||
options = arg_parser.parse_args(cl_args) | ||
|
||
MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose) | ||
|
||
runner_kwargs = {k:v for k, v in options.__dict__.items() | ||
if k not in ('quiet', 'verbose', 'step_id')} | ||
|
||
runner = EMRJobRunner(**runner_kwargs) | ||
emr_client = runner.make_emr_client() | ||
|
||
# pick step | ||
step = _get_step(emr_client, options.cluster_id, options.step_id) | ||
|
||
if not step: | ||
raise SystemExit(1) | ||
|
||
if step['Status']['State'] != 'FAILED': | ||
log.warning('step %s has state %s, not FAILED' % | ||
(step['Id'], step['Status']['State'])) | ||
|
||
# interpret logs | ||
log.info('Diagnosing step %s (%s)' % (step['Id'], step['Name'])) | ||
|
||
log_interpretation = dict(step_id=step['Id']) | ||
|
||
step_type = _infer_step_type(step) | ||
|
||
error = runner._pick_error(log_interpretation, step_type) | ||
|
||
# print error | ||
if error: | ||
log.error('Probable cause of failure:\n\n%s\n\n' % | ||
_format_error(error)) | ||
else: | ||
log.warning('No error detected') | ||
|
||
|
||
def _get_step(emr_client, cluster_id, step_id=None): | ||
|
||
# just iterate backwards through steps, rather than filtering | ||
# by step ID or status. usually it'll be the last step anyhow | ||
|
||
for step in _boto3_paginate('Steps', emr_client, 'list_steps', | ||
ClusterId=cluster_id): | ||
|
||
if _step_matches(step, step_id=step_id): | ||
return step | ||
else: | ||
if step_id: | ||
log.error('step %s not found on cluster %s' % | ||
(step_id, cluster_id)) | ||
else: | ||
log.error('cluster %s has no failed steps' % cluster_id) | ||
|
||
|
||
def _step_matches(step, step_id=None): | ||
if not step_id: | ||
return step['Status']['State'] == 'FAILED' | ||
else: | ||
return step['Id'] == step_id | ||
|
||
|
||
def _infer_step_type(step): | ||
args = step['Config']['Args'] | ||
|
||
# all that matters for log parsing is picking out Spark steps | ||
# (doesn't matter if it's spark or spark_jar or spark_script) | ||
# | ||
# and of course we don't know the logging habits of jar steps, | ||
# so we might as well use streaming's logic | ||
for i in range(len(_EMR_SPARK_ARGS)): | ||
if list(args[i:i + len(_EMR_SPARK_ARGS)]) == _EMR_SPARK_ARGS: | ||
return 'spark' | ||
else: | ||
return 'streaming' | ||
|
||
# every spark step on EMR must include these args | ||
return any(args[i:i+len(_EMR_SPARK_ARGS)] == _EMR_SPARK_ARGS | ||
for i in range(len(_EMR_SPARK_ARGS))) | ||
|
||
|
||
def _make_arg_parser(): | ||
usage = '%(prog)s [opts] [--step-id STEP_ID] CLUSTER_ID' | ||
description = ( | ||
'Get probable cause of failure for step on CLUSTER_ID.' | ||
' By default we look at the last failed step') | ||
arg_parser = ArgumentParser(usage=usage, description=description) | ||
|
||
_add_basic_args(arg_parser) | ||
_add_runner_args( | ||
arg_parser, | ||
_filter_by_role(EMRJobRunner.OPT_NAMES, 'connect')) | ||
|
||
arg_parser.add_argument( | ||
dest='cluster_id', | ||
help='ID of cluster with failed step') | ||
arg_parser.add_argument( | ||
'--step-id', dest='step_id', | ||
help='ID of a particular failed step to diagnose') | ||
|
||
_alphabetize_actions(arg_parser) | ||
|
||
return arg_parser | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.