Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement improved & consistent argument parsing (#785)
* Extend ArgumentParser subclass * Reorg * Clean it up with argument groups * Weirdness to support "import" subcommand * add test to make sure help is printed when cmd is called without subcmd * use new main methods for lca and sig * More concise "sourmash --help" output * Clean up usage statements * Marked some subcommands as expert * Add module docstrings, fix descriptors Co-authored-by: Luiz Irber <luizirber@users.noreply.github.com> Co-authored-by: C. Titus Brown <titus@idyll.org>
- Loading branch information
Showing
54 changed files
with
1,637 additions
and
803 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,86 +1,18 @@ | ||
""" | ||
sourmash command line. | ||
""" | ||
from __future__ import print_function | ||
import sys | ||
import argparse | ||
import sourmash | ||
|
||
from .logging import error, set_quiet | ||
|
||
from .commands import (categorize, compare, compute, dump, import_csv, | ||
gather, index, sbt_combine, search, | ||
plot, watch, info, storage, migrate, multigather) | ||
from .lca import main as lca_main | ||
from .sig import main as sig_main | ||
def main(arglist=None): | ||
args = sourmash.cli.get_parser().parse_args(arglist) | ||
if hasattr(args, 'subcmd'): | ||
mod = getattr(sourmash.cli, args.cmd) | ||
submod = getattr(mod, args.subcmd) | ||
mainmethod = getattr(submod, 'main') | ||
else: | ||
mod = getattr(sourmash.cli, args.cmd) | ||
mainmethod = getattr(mod, 'main') | ||
return mainmethod(args) | ||
|
||
usage=''' | ||
sourmash <command> [<args>] | ||
** Commands include: | ||
compute <filenames> Compute MinHash signatures for sequences in files. | ||
compare <filenames.sig> Compute similarity matrix for multiple signatures. | ||
search <query> <against> Search a signature against a list of signatures. | ||
plot <matrix> Plot a distance matrix made by 'compare'. | ||
gather Search a metagenome signature for multiple | ||
non-overlapping matches. | ||
** Taxonomic classification utilities: | ||
Run 'sourmash lca' for the taxonomic classification routines. | ||
** Sequence Bloom Tree (SBT) utilities: | ||
index Index a collection of signatures for fast searching. | ||
sbt_combine Combine multiple SBTs into a new one. | ||
categorize Identify best matches for many signatures using an SBT. | ||
watch Classify a stream of sequences. | ||
** Other commands: | ||
info Display sourmash version and other information. | ||
signature Sourmash signature manipulation utilities. | ||
Use '-h' to get subcommand-specific help, e.g. | ||
sourmash compute -h | ||
** Documentation is available at https://sourmash.readthedocs.io/ | ||
''' | ||
|
||
|
||
def main(): | ||
set_quiet(False) | ||
|
||
commands = {'search': search, 'compute': compute, | ||
'compare': compare, 'plot': plot, | ||
'import_csv': import_csv, 'dump': dump, | ||
'index': index, | ||
'categorize': categorize, 'gather': gather, | ||
'watch': watch, | ||
'sbt_combine': sbt_combine, 'info': info, | ||
'storage': storage, | ||
'lca': lca_main, | ||
'migrate': migrate, | ||
'multigather': multigather, | ||
'sig': sig_main, | ||
'signature': sig_main} | ||
parser = argparse.ArgumentParser( | ||
description='work with compressed biological sequence representations') | ||
parser.add_argument('command', nargs='?') | ||
args = parser.parse_args(sys.argv[1:2]) | ||
|
||
if not args.command: | ||
print(usage) | ||
sys.exit(1) | ||
|
||
if args.command not in commands: | ||
error('Unrecognized command') | ||
print(usage) | ||
sys.exit(1) | ||
|
||
cmd = commands.get(args.command) | ||
cmd(sys.argv[2:]) | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__pycache__/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
"""Define the top-level command line interface for sourmash | ||
This module handles user input when sourmash is invoked from the command line. | ||
A top-level parser is defined for the `sourmash` command, and subparsers are | ||
defined for each subcommand. Some sourmash operations are grouped together | ||
using the `sourmash <subcmd> <subsubcmd>` pattern, and these are organized in | ||
their own CLI submodules, each with a dedicated directory. | ||
""" | ||
|
||
from argparse import ArgumentParser, RawDescriptionHelpFormatter, SUPPRESS | ||
import os | ||
import sys | ||
|
||
import sourmash | ||
|
||
from . import utils | ||
|
||
# Commands | ||
from . import categorize | ||
from . import compare | ||
from . import compute | ||
from . import dump | ||
from . import gather | ||
from . import import_csv | ||
from . import info | ||
from . import index | ||
from . import migrate | ||
from . import multigather | ||
from . import plot | ||
from . import sbt_combine | ||
from . import search | ||
from . import watch | ||
|
||
# Subcommand groups | ||
from . import lca | ||
from . import sig | ||
from . import storage | ||
|
||
|
||
class SourmashParser(ArgumentParser): | ||
_citation_printed = False | ||
|
||
def __init__(self, citation=True, **kwargs): | ||
super(SourmashParser, self).__init__(**kwargs) | ||
self.citation = citation | ||
|
||
@classmethod | ||
def print_citation(cls): | ||
if cls._citation_printed: | ||
return | ||
from sourmash.logging import notify | ||
notify("\n== This is sourmash version {version}. ==", version=sourmash.VERSION) | ||
notify("== Please cite Brown and Irber (2016), doi:10.21105/joss.00027. ==\n") | ||
cls._citation_printed = True | ||
|
||
def _subparser_from_name(self, name): | ||
"""Given a name, get the subparser instance registered with this parser.""" | ||
container = self._actions | ||
if name is None: | ||
return None | ||
for action in container: | ||
if action.choices is None: | ||
continue | ||
elif name in action.choices: | ||
return action.choices[name] | ||
|
||
def print_help(self): | ||
self.print_citation() | ||
super(SourmashParser, self).print_help() | ||
|
||
|
||
def parse_args(self, args=None, namespace=None): | ||
if (args is None and len(sys.argv) == 1) or (args is not None and len(args) == 0): | ||
self.print_help() | ||
raise SystemExit(1) | ||
args = super(SourmashParser, self).parse_args(args=args, namespace=namespace) | ||
if ('quiet' not in args or not args.quiet) and self.citation: | ||
self.print_citation() | ||
|
||
if 'subcmd' in args and args.subcmd is None: | ||
self._subparser_from_name(args.cmd).print_help() | ||
raise SystemExit(1) | ||
|
||
# BEGIN: dirty hacks to simultaneously support new and previous interface | ||
if hasattr(args, 'subcmd') and args.subcmd == 'import': | ||
args.subcmd = 'ingest' | ||
# END: dirty hacks to simultaneously support new and previous interface | ||
return args | ||
|
||
|
||
def get_parser(): | ||
module_descs = { | ||
'lca': 'Taxonomic operations', | ||
'sig': 'Manipulate signature files', | ||
'storage': 'Operations on storage', | ||
} | ||
expert = set(['categorize', 'dump', 'import_csv', 'migrate', 'multigather', 'sbt_combine', 'watch']) | ||
|
||
clidir = os.path.dirname(__file__) | ||
basic_ops = utils.command_list(clidir) | ||
user_ops = [op for op in basic_ops if op not in expert] | ||
usage = ' Basic operations\n' | ||
for op in user_ops: | ||
docstring = getattr(sys.modules[__name__], op).__doc__ | ||
helpstring = 'sourmash {op:s} --help'.format(op=op) | ||
usage += ' {hs:25s} {ds:s}\n'.format(hs=helpstring, ds=docstring) | ||
cmd_group_dirs = next(os.walk(clidir))[1] | ||
cmd_group_dirs = filter(utils.opfilter, cmd_group_dirs) | ||
cmd_group_dirs = sorted(cmd_group_dirs) | ||
for dirpath in cmd_group_dirs: | ||
usage += '\n ' + module_descs[dirpath] + '\n' | ||
usage += ' sourmash {gd:s} --help\n'.format(gd=dirpath) | ||
|
||
desc = 'Compute, compare, manipulate, and analyze MinHash sketches of DNA sequences.\n\nUsage instructions:\n' + usage | ||
parser = SourmashParser(prog='sourmash', description=desc, formatter_class=RawDescriptionHelpFormatter, usage=SUPPRESS) | ||
parser._optionals.title = 'Options' | ||
parser.add_argument('-v', '--version', action='version', version='sourmash '+ sourmash.VERSION) | ||
parser.add_argument('-q', '--quiet', action='store_true', help='don\'t print citation information') | ||
sub = parser.add_subparsers( | ||
title='Instructions', dest='cmd', metavar='cmd', help=SUPPRESS, | ||
) | ||
for op in basic_ops + cmd_group_dirs: | ||
getattr(sys.modules[__name__], op).subparser(sub) | ||
parser._action_groups.reverse() | ||
return parser |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
"'sourmash categorize' - query an SBT for bes match, with many signatures." | ||
|
||
import argparse | ||
|
||
from sourmash.cli.utils import add_ksize_arg, add_moltype_args | ||
|
||
|
||
def subparser(subparsers): | ||
subparser = subparsers.add_parser('categorize') | ||
subparser.add_argument('sbt_name', help='name of SBT to load') | ||
subparser.add_argument( | ||
'queries', nargs='+', | ||
help='list of signatures to categorize' | ||
) | ||
subparser.add_argument( | ||
'-q', '--quiet', action='store_true', | ||
help='suppress non-error output' | ||
) | ||
add_ksize_arg(subparser, 31) | ||
subparser.add_argument( | ||
'--threshold', default=0.08, type=float, | ||
help='minimum threshold for reporting matches; default=0.08' | ||
) | ||
subparser.add_argument('--traverse-directory', action="store_true") | ||
subparser.add_argument( | ||
'--ignore-abundance', action='store_true', | ||
help='do NOT use k-mer abundances if present' | ||
) | ||
add_moltype_args(subparser) | ||
|
||
# TODO: help messages in these | ||
subparser.add_argument('--csv', type=argparse.FileType('at')) | ||
subparser.add_argument('--load-csv', default=None) | ||
|
||
|
||
def main(args): | ||
import sourmash | ||
return sourmash.commands.categorize(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
"""compare genomes""" | ||
|
||
from argparse import FileType | ||
|
||
from sourmash.cli.utils import add_ksize_arg, add_moltype_args | ||
|
||
|
||
def subparser(subparsers): | ||
subparser = subparsers.add_parser('compare') | ||
subparser.add_argument( | ||
'signatures', nargs='+', help='list of signatures to compare' | ||
) | ||
subparser.add_argument( | ||
'-q', '--quiet', action='store_true', help='suppress non-error output' | ||
) | ||
add_ksize_arg(subparser) | ||
add_moltype_args(subparser) | ||
subparser.add_argument( | ||
'-o', '--output', metavar='F', | ||
help='file to which output will be written; default is terminal ' | ||
'(standard output)' | ||
) | ||
subparser.add_argument( | ||
'--ignore-abundance', action='store_true', | ||
help='do NOT use k-mer abundances even if present' | ||
) | ||
subparser.add_argument( | ||
'--traverse-directory', action='store_true', | ||
help='compare all signatures underneath directories' | ||
) | ||
subparser.add_argument( | ||
'--csv', metavar='F', type=FileType('w'), | ||
help='write matrix to specified file in CSV format (with column ' | ||
'headers)' | ||
) | ||
subparser.add_argument( | ||
'-p', '--processes', metavar='N', type=int, default=None, | ||
help='Number of processes to use to calculate similarity') | ||
|
||
|
||
def main(args): | ||
import sourmash | ||
return sourmash.commands.compare(args) |
Oops, something went wrong.