Merge pull request #1692 from davidmarin/tool-docstrings

update mrjob command docstrings for v0.6.0 (fixes #1646)
Yelp · Oct 23, 2017 · 4e5fc5a · 4e5fc5a
2 parents a0fa7dd + b9a0c8e
commit 4e5fc5a
Show file tree

Hide file tree

Showing 15 changed files with 172 additions and 122 deletions.
diff --git a/docs/guides/writing-mrjobs.rst b/docs/guides/writing-mrjobs.rst
@@ -501,7 +501,7 @@ instance. Here's an example that sneaks a peek at :ref:`writing-cl-opts`::
 
         def configure_args(self):
             super(CommandLineProtocolJob, self).configure_args()
-            self.add_passthrough_arg(
+            self.add_passthru_arg(
                 '--output-format', default='raw', choices=['raw', 'json'],
                 help="Specify the output format of the job")
 

diff --git a/mrjob/examples/contrib/mr_pegasos_svm.py b/mrjob/examples/contrib/mr_pegasos_svm.py
@@ -54,10 +54,10 @@ def __init__(self, *args, **kwargs):
 
     def configure_args(self):
         super(MRsvm, self).configure_args()
-        self.add_passthrough_arg(
+        self.add_passthru_arg(
             '--iterations', dest='iterations', default=2, type=int,
             help='T: number of iterations to run')
-        self.add_passthrough_arg(
+        self.add_passthru_arg(
             '--batchsize', dest='batchsize', default=100, type=int,
             help='k: number of data points in a batch')
 

diff --git a/mrjob/examples/mr_grep.py b/mrjob/examples/mr_grep.py
@@ -21,7 +21,7 @@ class MRGrepJob(MRJob):
     def configure_args(self):
         super(MRGrepJob, self).configure_args()
 
-        self.add_passthrough_arg(
+        self.add_passthru_arg(
             '-e', '--expression',
             help=( 'Expression to search for. Required.'))
 

diff --git a/mrjob/examples/mr_log_sampler.py b/mrjob/examples/mr_log_sampler.py
@@ -42,12 +42,12 @@ class MRLogSampler(MRJob):
 
     def configure_args(self):
         super(MRLogSampler, self).configure_args()
-        self.add_passthrough_arg(
+        self.add_passthru_arg(
             '--sample-size',
             type=int,
             help='Number of entries to sample.'
         )
-        self.add_passthrough_arg(
+        self.add_passthru_arg(
             '--expected-length',
             type=int,
             help=("Number of entries you expect in the log. If not specified,"

diff --git a/mrjob/examples/mr_page_rank.py b/mrjob/examples/mr_page_rank.py
@@ -51,11 +51,11 @@ class MRPageRank(MRJob):
     def configure_args(self):
         super(MRPageRank, self).configure_args()
 
-        self.add_passthrough_arg(
+        self.add_passthru_arg(
             '--iterations', dest='iterations', default=10, type=int,
             help='number of iterations to run')
 
-        self.add_passthrough_arg(
+        self.add_passthru_arg(
             '--damping-factor', dest='damping_factor', default=0.85,
             type=float,
             help='probability a web surfer will continue clicking on links')

diff --git a/mrjob/examples/mr_text_classifier.py b/mrjob/examples/mr_text_classifier.py
@@ -156,33 +156,33 @@ def configure_args(self):
         """Add command-line options specific to this script."""
         super(MRTextClassifier, self).configure_args()
 
-        self.add_passthrough_arg(
+        self.add_passthru_arg(
             '--min-df', dest='min_df', default=2, type=int,
             help=('min number of documents an n-gram must appear in for us to'
-                  ' count it. Default: %default'))
-        self.add_passthrough_arg(
+                  ' count it. Default: %(default)s'))
+        self.add_passthru_arg(
             '--max-df', dest='max_df', default=10000000, type=int,
             help=('max number of documents an n-gram may appear in for us to'
                   ' count it (this keeps reducers from running out of memory).'
-                  ' Default: %default'))
-        self.add_passthrough_arg(
+                  ' Default: %(default)s'))
+        self.add_passthru_arg(
             '--max-ngram-size', dest='max_ngram_size',
             default=DEFAULT_MAX_NGRAM_SIZE, type=int,
             help='maximum phrase length to consider')
-        self.add_passthrough_arg(
+        self.add_passthru_arg(
             '--stop-words', dest='stop_words',
             default=', '.join(DEFAULT_STOP_WORDS),
             help=("comma-separated list of words to ignore. For example, "
                   "--stop-words 'in, the' would cause 'hole in the wall' to be"
-                  " parsed as ['hole', 'wall']. Default: %default"))
-        self.add_passthrough_arg(
+                  " parsed as ['hole', 'wall']. Default: %(default)s"))
+        self.add_passthru_arg(
             '--short-doc-threshold', dest='short_doc_threshold',
             type=int, default=None,
             help=('Normally, for each n-gram size, we take the average score'
                   ' over all n-grams that appear. This allows us to penalize'
                   ' short documents by using this threshold as the denominator'
                   ' rather than the actual number of n-grams.'))
-        self.add_passthrough_arg(
+        self.add_passthru_arg(
             '--no-test-set', dest='no_test_set',
             action='store_true', default=False,
             help=("Choose about half of the documents to be the testing set"

diff --git a/mrjob/options.py b/mrjob/options.py
@@ -1419,7 +1419,18 @@ def _optparse_kwargs_to_argparse(**kwargs):
             ' opt groups')
         kwargs.pop('opt_group')
 
+    # convert %default -> %(default)s
+    if kwargs.get('help'):
+        kwargs['help'] = kwargs['help'].replace('%default', '%(default)s')
+
     # pretty much everything else is the same. if people want to pass argparse
     # kwargs through the old optparse interface (e.g. *action* or *required*)
     # more power to 'em.
     return kwargs
+
+
+def _alphabetize_actions(arg_parser):
+    """Alphabetize arg parser actions for the sake of nicer help printouts."""
+    # based on https://stackoverflow.com/questions/12268602/sort-argparse-help-alphabetically  # noqa
+    for g in arg_parser._action_groups:
+        g._group_actions.sort(key=lambda opt: opt.dest)
diff --git a/mrjob/tools/emr/audit_usage.py b/mrjob/tools/emr/audit_usage.py
@@ -20,21 +20,21 @@
 
 Options::
 
-  -h, --help            show this help message and exit
-  -c CONF_PATHS, --conf-path=CONF_PATHS
+  -c CONF_PATHS, --conf-path CONF_PATHS
                         Path to alternate mrjob.conf file to read from
   --no-conf             Don't load mrjob.conf even if it's available
-  --emr-endpoint=EMR_ENDPOINT
+  --emr-endpoint EMR_ENDPOINT
                         Force mrjob to connect to EMR on this endpoint (e.g.
                         us-west-1.elasticmapreduce.amazonaws.com). Default is
                         to infer this from region.
-  --max-days-ago=MAX_DAYS_AGO
+  -h, --help            show this help message and exit
+  --max-days-ago MAX_DAYS_AGO
                         Max number of days ago to look at jobs. By default, we
                         go back as far as EMR supports (currently about 2
                         months)
   -q, --quiet           Don't print anything to stderr
-  --region=REGION       GCE/AWS region to run Dataproc/EMR jobs in.
-  --s3-endpoint=S3_ENDPOINT
+  --region REGION       GCE/AWS region to run Dataproc/EMR jobs in.
+  --s3-endpoint S3_ENDPOINT
                         Force mrjob to connect to S3 on this endpoint (e.g. s3
                         -us-west-1.amazonaws.com). You usually shouldn't set
                         this; by default mrjob will choose the correct
@@ -60,6 +60,7 @@
 from mrjob.job import MRJob
 from mrjob.options import _add_basic_args
 from mrjob.options import _add_runner_args
+from mrjob.options import _alphabetize_actions
 from mrjob.options import _filter_by_role
 from mrjob.pool import _legacy_pool_hash_and_name
 from mrjob.pool import _pool_hash_and_name
@@ -81,6 +82,7 @@
 def main(args=None):
     # parse command-line args
     arg_parser = _make_arg_parser()
+
     options = arg_parser.parse_args(args)
 
     MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
@@ -113,6 +115,8 @@ def _make_arg_parser():
         arg_parser,
         _filter_by_role(EMRJobRunner.OPT_NAMES, 'connect'))
 
+    _alphabetize_actions(arg_parser)
+
     return arg_parser