Skip to content

Commit

Permalink
Implement improved & consistent argument parsing (#785)
Browse files Browse the repository at this point in the history
* Extend ArgumentParser subclass
* Reorg
* Clean it up with argument groups
* Weirdness to support "import" subcommand
* add test to make sure help is printed when cmd is called without subcmd
* use new main methods for lca and sig
* More concise "sourmash --help" output
* Clean up usage statements
* Marked some subcommands as expert
* Add module docstrings, fix descriptors
Co-authored-by: Luiz Irber <luizirber@users.noreply.github.com>
Co-authored-by: C. Titus Brown <titus@idyll.org>
  • Loading branch information
standage authored and luizirber committed Jan 8, 2020
1 parent 6a2a14e commit 585108f
Show file tree
Hide file tree
Showing 54 changed files with 1,637 additions and 803 deletions.
3 changes: 3 additions & 0 deletions sourmash/__init__.py
Expand Up @@ -28,6 +28,9 @@
from . import sbtmh
from . import sbt_storage
from . import signature
from . import sig
from . import cli
from . import commands

from pkg_resources import get_distribution, DistributionNotFound

Expand Down
90 changes: 11 additions & 79 deletions sourmash/__main__.py
@@ -1,86 +1,18 @@
"""
sourmash command line.
"""
from __future__ import print_function
import sys
import argparse
import sourmash

from .logging import error, set_quiet

from .commands import (categorize, compare, compute, dump, import_csv,
gather, index, sbt_combine, search,
plot, watch, info, storage, migrate, multigather)
from .lca import main as lca_main
from .sig import main as sig_main
def main(arglist=None):
args = sourmash.cli.get_parser().parse_args(arglist)
if hasattr(args, 'subcmd'):
mod = getattr(sourmash.cli, args.cmd)
submod = getattr(mod, args.subcmd)
mainmethod = getattr(submod, 'main')
else:
mod = getattr(sourmash.cli, args.cmd)
mainmethod = getattr(mod, 'main')
return mainmethod(args)

usage='''
sourmash <command> [<args>]
** Commands include:
compute <filenames> Compute MinHash signatures for sequences in files.
compare <filenames.sig> Compute similarity matrix for multiple signatures.
search <query> <against> Search a signature against a list of signatures.
plot <matrix> Plot a distance matrix made by 'compare'.
gather Search a metagenome signature for multiple
non-overlapping matches.
** Taxonomic classification utilities:
Run 'sourmash lca' for the taxonomic classification routines.
** Sequence Bloom Tree (SBT) utilities:
index Index a collection of signatures for fast searching.
sbt_combine Combine multiple SBTs into a new one.
categorize Identify best matches for many signatures using an SBT.
watch Classify a stream of sequences.
** Other commands:
info Display sourmash version and other information.
signature Sourmash signature manipulation utilities.
Use '-h' to get subcommand-specific help, e.g.
sourmash compute -h
** Documentation is available at https://sourmash.readthedocs.io/
'''


def main():
set_quiet(False)

commands = {'search': search, 'compute': compute,
'compare': compare, 'plot': plot,
'import_csv': import_csv, 'dump': dump,
'index': index,
'categorize': categorize, 'gather': gather,
'watch': watch,
'sbt_combine': sbt_combine, 'info': info,
'storage': storage,
'lca': lca_main,
'migrate': migrate,
'multigather': multigather,
'sig': sig_main,
'signature': sig_main}
parser = argparse.ArgumentParser(
description='work with compressed biological sequence representations')
parser.add_argument('command', nargs='?')
args = parser.parse_args(sys.argv[1:2])

if not args.command:
print(usage)
sys.exit(1)

if args.command not in commands:
error('Unrecognized command')
print(usage)
sys.exit(1)

cmd = commands.get(args.command)
cmd(sys.argv[2:])

if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions sourmash/cli/.gitignore
@@ -0,0 +1 @@
__pycache__/
125 changes: 125 additions & 0 deletions sourmash/cli/__init__.py
@@ -0,0 +1,125 @@
"""Define the top-level command line interface for sourmash
This module handles user input when sourmash is invoked from the command line.
A top-level parser is defined for the `sourmash` command, and subparsers are
defined for each subcommand. Some sourmash operations are grouped together
using the `sourmash <subcmd> <subsubcmd>` pattern, and these are organized in
their own CLI submodules, each with a dedicated directory.
"""

from argparse import ArgumentParser, RawDescriptionHelpFormatter, SUPPRESS
import os
import sys

import sourmash

from . import utils

# Commands
from . import categorize
from . import compare
from . import compute
from . import dump
from . import gather
from . import import_csv
from . import info
from . import index
from . import migrate
from . import multigather
from . import plot
from . import sbt_combine
from . import search
from . import watch

# Subcommand groups
from . import lca
from . import sig
from . import storage


class SourmashParser(ArgumentParser):
_citation_printed = False

def __init__(self, citation=True, **kwargs):
super(SourmashParser, self).__init__(**kwargs)
self.citation = citation

@classmethod
def print_citation(cls):
if cls._citation_printed:
return
from sourmash.logging import notify
notify("\n== This is sourmash version {version}. ==", version=sourmash.VERSION)
notify("== Please cite Brown and Irber (2016), doi:10.21105/joss.00027. ==\n")
cls._citation_printed = True

def _subparser_from_name(self, name):
"""Given a name, get the subparser instance registered with this parser."""
container = self._actions
if name is None:
return None
for action in container:
if action.choices is None:
continue
elif name in action.choices:
return action.choices[name]

def print_help(self):
self.print_citation()
super(SourmashParser, self).print_help()


def parse_args(self, args=None, namespace=None):
if (args is None and len(sys.argv) == 1) or (args is not None and len(args) == 0):
self.print_help()
raise SystemExit(1)
args = super(SourmashParser, self).parse_args(args=args, namespace=namespace)
if ('quiet' not in args or not args.quiet) and self.citation:
self.print_citation()

if 'subcmd' in args and args.subcmd is None:
self._subparser_from_name(args.cmd).print_help()
raise SystemExit(1)

# BEGIN: dirty hacks to simultaneously support new and previous interface
if hasattr(args, 'subcmd') and args.subcmd == 'import':
args.subcmd = 'ingest'
# END: dirty hacks to simultaneously support new and previous interface
return args


def get_parser():
module_descs = {
'lca': 'Taxonomic operations',
'sig': 'Manipulate signature files',
'storage': 'Operations on storage',
}
expert = set(['categorize', 'dump', 'import_csv', 'migrate', 'multigather', 'sbt_combine', 'watch'])

clidir = os.path.dirname(__file__)
basic_ops = utils.command_list(clidir)
user_ops = [op for op in basic_ops if op not in expert]
usage = ' Basic operations\n'
for op in user_ops:
docstring = getattr(sys.modules[__name__], op).__doc__
helpstring = 'sourmash {op:s} --help'.format(op=op)
usage += ' {hs:25s} {ds:s}\n'.format(hs=helpstring, ds=docstring)
cmd_group_dirs = next(os.walk(clidir))[1]
cmd_group_dirs = filter(utils.opfilter, cmd_group_dirs)
cmd_group_dirs = sorted(cmd_group_dirs)
for dirpath in cmd_group_dirs:
usage += '\n ' + module_descs[dirpath] + '\n'
usage += ' sourmash {gd:s} --help\n'.format(gd=dirpath)

desc = 'Compute, compare, manipulate, and analyze MinHash sketches of DNA sequences.\n\nUsage instructions:\n' + usage
parser = SourmashParser(prog='sourmash', description=desc, formatter_class=RawDescriptionHelpFormatter, usage=SUPPRESS)
parser._optionals.title = 'Options'
parser.add_argument('-v', '--version', action='version', version='sourmash '+ sourmash.VERSION)
parser.add_argument('-q', '--quiet', action='store_true', help='don\'t print citation information')
sub = parser.add_subparsers(
title='Instructions', dest='cmd', metavar='cmd', help=SUPPRESS,
)
for op in basic_ops + cmd_group_dirs:
getattr(sys.modules[__name__], op).subparser(sub)
parser._action_groups.reverse()
return parser
38 changes: 38 additions & 0 deletions sourmash/cli/categorize.py
@@ -0,0 +1,38 @@
"'sourmash categorize' - query an SBT for bes match, with many signatures."

import argparse

from sourmash.cli.utils import add_ksize_arg, add_moltype_args


def subparser(subparsers):
subparser = subparsers.add_parser('categorize')
subparser.add_argument('sbt_name', help='name of SBT to load')
subparser.add_argument(
'queries', nargs='+',
help='list of signatures to categorize'
)
subparser.add_argument(
'-q', '--quiet', action='store_true',
help='suppress non-error output'
)
add_ksize_arg(subparser, 31)
subparser.add_argument(
'--threshold', default=0.08, type=float,
help='minimum threshold for reporting matches; default=0.08'
)
subparser.add_argument('--traverse-directory', action="store_true")
subparser.add_argument(
'--ignore-abundance', action='store_true',
help='do NOT use k-mer abundances if present'
)
add_moltype_args(subparser)

# TODO: help messages in these
subparser.add_argument('--csv', type=argparse.FileType('at'))
subparser.add_argument('--load-csv', default=None)


def main(args):
import sourmash
return sourmash.commands.categorize(args)
43 changes: 43 additions & 0 deletions sourmash/cli/compare.py
@@ -0,0 +1,43 @@
"""compare genomes"""

from argparse import FileType

from sourmash.cli.utils import add_ksize_arg, add_moltype_args


def subparser(subparsers):
subparser = subparsers.add_parser('compare')
subparser.add_argument(
'signatures', nargs='+', help='list of signatures to compare'
)
subparser.add_argument(
'-q', '--quiet', action='store_true', help='suppress non-error output'
)
add_ksize_arg(subparser)
add_moltype_args(subparser)
subparser.add_argument(
'-o', '--output', metavar='F',
help='file to which output will be written; default is terminal '
'(standard output)'
)
subparser.add_argument(
'--ignore-abundance', action='store_true',
help='do NOT use k-mer abundances even if present'
)
subparser.add_argument(
'--traverse-directory', action='store_true',
help='compare all signatures underneath directories'
)
subparser.add_argument(
'--csv', metavar='F', type=FileType('w'),
help='write matrix to specified file in CSV format (with column '
'headers)'
)
subparser.add_argument(
'-p', '--processes', metavar='N', type=int, default=None,
help='Number of processes to use to calculate similarity')


def main(args):
import sourmash
return sourmash.commands.compare(args)

0 comments on commit 585108f

Please sign in to comment.