Skip to content

Commit

Permalink
Merge pull request #1692 from davidmarin/tool-docstrings
Browse files Browse the repository at this point in the history
update mrjob command docstrings for v0.6.0 (fixes #1646)
  • Loading branch information
David Marin committed Oct 23, 2017
2 parents a0fa7dd + b9a0c8e commit 4e5fc5a
Show file tree
Hide file tree
Showing 15 changed files with 172 additions and 122 deletions.
2 changes: 1 addition & 1 deletion docs/guides/writing-mrjobs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,7 @@ instance. Here's an example that sneaks a peek at :ref:`writing-cl-opts`::

def configure_args(self):
super(CommandLineProtocolJob, self).configure_args()
self.add_passthrough_arg(
self.add_passthru_arg(
'--output-format', default='raw', choices=['raw', 'json'],
help="Specify the output format of the job")

Expand Down
4 changes: 2 additions & 2 deletions mrjob/examples/contrib/mr_pegasos_svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ def __init__(self, *args, **kwargs):

def configure_args(self):
super(MRsvm, self).configure_args()
self.add_passthrough_arg(
self.add_passthru_arg(
'--iterations', dest='iterations', default=2, type=int,
help='T: number of iterations to run')
self.add_passthrough_arg(
self.add_passthru_arg(
'--batchsize', dest='batchsize', default=100, type=int,
help='k: number of data points in a batch')

Expand Down
2 changes: 1 addition & 1 deletion mrjob/examples/mr_grep.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class MRGrepJob(MRJob):
def configure_args(self):
super(MRGrepJob, self).configure_args()

self.add_passthrough_arg(
self.add_passthru_arg(
'-e', '--expression',
help=( 'Expression to search for. Required.'))

Expand Down
4 changes: 2 additions & 2 deletions mrjob/examples/mr_log_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ class MRLogSampler(MRJob):

def configure_args(self):
super(MRLogSampler, self).configure_args()
self.add_passthrough_arg(
self.add_passthru_arg(
'--sample-size',
type=int,
help='Number of entries to sample.'
)
self.add_passthrough_arg(
self.add_passthru_arg(
'--expected-length',
type=int,
help=("Number of entries you expect in the log. If not specified,"
Expand Down
4 changes: 2 additions & 2 deletions mrjob/examples/mr_page_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ class MRPageRank(MRJob):
def configure_args(self):
super(MRPageRank, self).configure_args()

self.add_passthrough_arg(
self.add_passthru_arg(
'--iterations', dest='iterations', default=10, type=int,
help='number of iterations to run')

self.add_passthrough_arg(
self.add_passthru_arg(
'--damping-factor', dest='damping_factor', default=0.85,
type=float,
help='probability a web surfer will continue clicking on links')
Expand Down
18 changes: 9 additions & 9 deletions mrjob/examples/mr_text_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,33 +156,33 @@ def configure_args(self):
"""Add command-line options specific to this script."""
super(MRTextClassifier, self).configure_args()

self.add_passthrough_arg(
self.add_passthru_arg(
'--min-df', dest='min_df', default=2, type=int,
help=('min number of documents an n-gram must appear in for us to'
' count it. Default: %default'))
self.add_passthrough_arg(
' count it. Default: %(default)s'))
self.add_passthru_arg(
'--max-df', dest='max_df', default=10000000, type=int,
help=('max number of documents an n-gram may appear in for us to'
' count it (this keeps reducers from running out of memory).'
' Default: %default'))
self.add_passthrough_arg(
' Default: %(default)s'))
self.add_passthru_arg(
'--max-ngram-size', dest='max_ngram_size',
default=DEFAULT_MAX_NGRAM_SIZE, type=int,
help='maximum phrase length to consider')
self.add_passthrough_arg(
self.add_passthru_arg(
'--stop-words', dest='stop_words',
default=', '.join(DEFAULT_STOP_WORDS),
help=("comma-separated list of words to ignore. For example, "
"--stop-words 'in, the' would cause 'hole in the wall' to be"
" parsed as ['hole', 'wall']. Default: %default"))
self.add_passthrough_arg(
" parsed as ['hole', 'wall']. Default: %(default)s"))
self.add_passthru_arg(
'--short-doc-threshold', dest='short_doc_threshold',
type=int, default=None,
help=('Normally, for each n-gram size, we take the average score'
' over all n-grams that appear. This allows us to penalize'
' short documents by using this threshold as the denominator'
' rather than the actual number of n-grams.'))
self.add_passthrough_arg(
self.add_passthru_arg(
'--no-test-set', dest='no_test_set',
action='store_true', default=False,
help=("Choose about half of the documents to be the testing set"
Expand Down
11 changes: 11 additions & 0 deletions mrjob/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -1419,7 +1419,18 @@ def _optparse_kwargs_to_argparse(**kwargs):
' opt groups')
kwargs.pop('opt_group')

# convert %default -> %(default)s
if kwargs.get('help'):
kwargs['help'] = kwargs['help'].replace('%default', '%(default)s')

# pretty much everything else is the same. if people want to pass argparse
# kwargs through the old optparse interface (e.g. *action* or *required*)
# more power to 'em.
return kwargs


def _alphabetize_actions(arg_parser):
"""Alphabetize arg parser actions for the sake of nicer help printouts."""
# based on https://stackoverflow.com/questions/12268602/sort-argparse-help-alphabetically # noqa
for g in arg_parser._action_groups:
g._group_actions.sort(key=lambda opt: opt.dest)
16 changes: 10 additions & 6 deletions mrjob/tools/emr/audit_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,21 @@
Options::
-h, --help show this help message and exit
-c CONF_PATHS, --conf-path=CONF_PATHS
-c CONF_PATHS, --conf-path CONF_PATHS
Path to alternate mrjob.conf file to read from
--no-conf Don't load mrjob.conf even if it's available
--emr-endpoint=EMR_ENDPOINT
--emr-endpoint EMR_ENDPOINT
Force mrjob to connect to EMR on this endpoint (e.g.
us-west-1.elasticmapreduce.amazonaws.com). Default is
to infer this from region.
--max-days-ago=MAX_DAYS_AGO
-h, --help show this help message and exit
--max-days-ago MAX_DAYS_AGO
Max number of days ago to look at jobs. By default, we
go back as far as EMR supports (currently about 2
months)
-q, --quiet Don't print anything to stderr
--region=REGION GCE/AWS region to run Dataproc/EMR jobs in.
--s3-endpoint=S3_ENDPOINT
--region REGION GCE/AWS region to run Dataproc/EMR jobs in.
--s3-endpoint S3_ENDPOINT
Force mrjob to connect to S3 on this endpoint (e.g. s3
-us-west-1.amazonaws.com). You usually shouldn't set
this; by default mrjob will choose the correct
Expand All @@ -60,6 +60,7 @@
from mrjob.job import MRJob
from mrjob.options import _add_basic_args
from mrjob.options import _add_runner_args
from mrjob.options import _alphabetize_actions
from mrjob.options import _filter_by_role
from mrjob.pool import _legacy_pool_hash_and_name
from mrjob.pool import _pool_hash_and_name
Expand All @@ -81,6 +82,7 @@
def main(args=None):
# parse command-line args
arg_parser = _make_arg_parser()

options = arg_parser.parse_args(args)

MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
Expand Down Expand Up @@ -113,6 +115,8 @@ def _make_arg_parser():
arg_parser,
_filter_by_role(EMRJobRunner.OPT_NAMES, 'connect'))

_alphabetize_actions(arg_parser)

return arg_parser


Expand Down

0 comments on commit 4e5fc5a

Please sign in to comment.