Skip to content

Commit

Permalink
Merge pull request #82 from dib-lab/refactor/cli
Browse files Browse the repository at this point in the history
Separate subparser stuff into CLI module
  • Loading branch information
standage committed May 31, 2017
2 parents 3375f8a + 9fad6f2 commit 3aa32e6
Show file tree
Hide file tree
Showing 21 changed files with 547 additions and 449 deletions.
1 change: 1 addition & 0 deletions .cloc.exclude
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
kevlar/_version.py
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,9 @@ doc:
cd docs && make html

loc:
cloc --exclude-list-file=<(echo kevlar/_version.py) kevlar/*.py kevlar/cli/*.py
@- echo "\n\n===== Core kevlar ====="
cloc --exclude-list-file=.cloc.exclude kevlar/*.py
@- echo "\n\n===== kevlar CLI ====="
cloc kevlar/cli/*.py
@- echo "\n\n===== kevlar tests ====="
cloc kevlar/tests/test_*.py
19 changes: 0 additions & 19 deletions kevlar/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from __future__ import print_function
from collections import defaultdict, namedtuple
import argparse
import itertools
import sys
try:
Expand All @@ -22,24 +21,6 @@
from kevlar.seqio import load_reads_and_kmers


def subparser(subparsers):
subparser = subparsers.add_parser('assemble')
subparser.add_argument('-d', '--debug', action='store_true',
help='print debugging output')
subparser.add_argument('-o', '--out', metavar='FILE',
help='output file; default is terminal (stdout)')
subparser.add_argument('--gml', metavar='FILE',
help='write graph to .gml file')
subparser.add_argument('-n', '--min-abund', type=int, metavar='N',
default=2, help='discard interesting k-mers that '
'occur fewer than N times')
subparser.add_argument('-x', '--max-abund', type=int, metavar='X',
default=500, help='discard interesting k-mers that '
'occur more than X times')
subparser.add_argument('augfastq', help='annotated reads in augmented '
'Fastq format')


def merge_pair(pair):
"""
Assemble a pair of overlapping reads.
Expand Down
35 changes: 24 additions & 11 deletions kevlar/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,17 @@
# -----------------------------------------------------------------------------

import argparse
import kevlar
import sys

import kevlar
from . import dump
from . import count
from . import novel
from . import collect
from . import filter
from . import reaugment
from . import assemble
from . import mutate
from . import partition

mains = {
'dump': kevlar.dump.main,
Expand All @@ -25,6 +33,18 @@
'partition': kevlar.partition.main,
}

subparser_funcs = {
'dump': dump.subparser,
'count': count.subparser,
'novel': novel.subparser,
'collect': collect.subparser,
'filter': filter.subparser,
'reaugment': reaugment.subparser,
'assemble': assemble.subparser,
'mutate': mutate.subparser,
'partition': partition.subparser,
}


def parser():
bubbletext = """
Expand All @@ -51,14 +71,7 @@ def parser():
'diagnostic messages, warnings, and errors')
subparsers = parser.add_subparsers(dest='cmd', metavar='cmd',
help='"' + subcommandstr + '"')
kevlar.dump.subparser(subparsers)
kevlar.count.subparser(subparsers)
kevlar.novel.subparser(subparsers)
kevlar.collect.subparser(subparsers)
kevlar.filter.subparser(subparsers)
kevlar.reaugment.subparser(subparsers)
kevlar.assemble.subparser(subparsers)
kevlar.mutate.subparser(subparsers)
kevlar.partition.subparser(subparsers)
for func in subparser_funcs.values():
func(subparsers)

return parser
26 changes: 26 additions & 0 deletions kevlar/cli/assemble.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env python
#
# -----------------------------------------------------------------------------
# Copyright (c) 2017 The Regents of the University of California
#
# This file is part of kevlar (http://github.com/dib-lab/kevlar) and is
# licensed under the MIT license: see LICENSE.
# -----------------------------------------------------------------------------


def subparser(subparsers):
subparser = subparsers.add_parser('assemble')
subparser.add_argument('-d', '--debug', action='store_true',
help='print debugging output')
subparser.add_argument('-o', '--out', metavar='FILE',
help='output file; default is terminal (stdout)')
subparser.add_argument('--gml', metavar='FILE',
help='write graph to .gml file')
subparser.add_argument('-n', '--min-abund', type=int, metavar='N',
default=2, help='discard interesting k-mers that '
'occur fewer than N times')
subparser.add_argument('-x', '--max-abund', type=int, metavar='X',
default=500, help='discard interesting k-mers that '
'occur more than X times')
subparser.add_argument('augfastq', help='annotated reads in augmented '
'Fastq format')
51 changes: 51 additions & 0 deletions kevlar/cli/collect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python
#
# -----------------------------------------------------------------------------
# Copyright (c) 2017 The Regents of the University of California
#
# This file is part of kevlar (http://github.com/dib-lab/kevlar) and is
# licensed under the MIT license: see LICENSE.
# -----------------------------------------------------------------------------

import argparse
import khmer
from khmer import khmer_args


def subparser(subparsers):
subparser = subparsers.add_parser('collect')
subparser.add_argument('-d', '--debug', action='store_true',
help='print debugging output')
subparser.add_argument('-M', '--memory', default='1e6', metavar='MEM',
type=khmer_args.memory_setting,
help='memory to allocate for recalculating '
'abundances of novel k-mers; default is 1M')
subparser.add_argument('--minabund', type=int, default=5, metavar='Y',
help='minimum case abundance required to call a '
'k-mer novel; used to filter out k-mers with '
'inflated abundances; should equal the value of '
'--case_min used in "kevlar novel"; default is 5')
subparser.add_argument('--max-fpr', type=float, metavar='FPR',
default=0.001, help='terminate if the expected '
'false positive rate is higher than the specified '
'FPR; default is 0.001')
subparser.add_argument('--refr', metavar='FILE', type=str, default=None,
help='reference genome in Fasta/Fastq format; any '
'k-mers designated as "interesting" by "kevlar '
'novel" are ignored if they are present in the '
'reference genome')
subparser.add_argument('--refr-memory', metavar='MEM', default='1e6',
type=khmer_args.memory_setting,
help='memory to allocate for storing the reference '
'genome; default is 1M')
subparser.add_argument('-k', '--ksize', type=int, default=31, metavar='K',
help='k-mer size; default is 31')
subparser.add_argument('--ignore', metavar='KMER', nargs='+',
help='ignore the specified k-mer(s)')
subparser.add_argument('-o', '--out', type=argparse.FileType('w'),
metavar='OUT',
help='output file; default is terminal (stdout)')
subparser.add_argument('--collapse', action='store_true', help='collapse '
'linear paths contained in other linear paths')
subparser.add_argument('novel_output', nargs='+', help='one or more output'
' files from the "kevlar novel" command')
110 changes: 110 additions & 0 deletions kevlar/cli/count.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/usr/bin/env python
#
# -----------------------------------------------------------------------------
# Copyright (c) 2017 The Regents of the University of California
#
# This file is part of kevlar (http://github.com/dib-lab/kevlar) and is
# licensed under the MIT license: see LICENSE.
# -----------------------------------------------------------------------------

import argparse
import textwrap
import khmer
from khmer import khmer_args


def subparser(subparsers):
"""Define the `kevlar count` command-line interface."""

desc = "Compute k-mer abundances for the provided samples"
epilog = """\
Example::
kevlar count --ksize 31 --memory 4G --mem-frac 0.25 \\
--case proband.counttable proband-reads.fq.gz \\
--control father.counttable father-r1.fq.gz father-r2.fq.gz \\
--control mother.counttable mother-reads.fq.gz
Example::
kevlar count --ksize 25 --memory 500M \\
--case case_x.ct case_x.fq \\
--case case_y.ct case_y.fq \\
--control control1.ct control1a.fq control1b.fq \\
--control control2.ct control2a.fq control2b.fq"""
epilog = textwrap.dedent(epilog)

subparser = subparsers.add_parser(
'count', description=desc, epilog=epilog, add_help=False,
formatter_class=argparse.RawDescriptionHelpFormatter,
)

samp_args = subparser.add_argument_group('Case/control config')
samp_args.add_argument(
'--case', metavar='F', nargs='+', required=True, action='append',
help='one or more FASTA/FASTQ files containing reads from a case '
'sample; can be declared multiple times corresponding to multiple '
'case samples; see examples below'
)
samp_args.add_argument(
'--control', metavar='F', nargs='+', action='append',
help='one or more FASTA/FASTQ files containing reads from a control '
'sample; can be declared multiple times corresponding to multiple '
'control samples; see examples below'
)
samp_args.add_argument(
'-x', '--ctrl-max', metavar='X', type=int, default=1,
help='k-mers with abund > X in any control sample are uninteresting; '
'default is X=1'
)

mem_desc = """\
Specify how much memory to allocate for the sketch data structures
used to store k-mer counts. The first control sample will be
allocated the full amount of specifed `--memory`, and all subsequent
samples will be allocated a fraction thereof.
"""
mem_desc = textwrap.dedent(mem_desc)
memory_args = subparser.add_argument_group('Memory allocation', mem_desc)
memory_args.add_argument(
'-M', '--memory', default='1e6', metavar='MEM',
type=khmer_args.memory_setting, help='total memory to allocate for '
'the initial control sample; default is 1M'
)
memory_args.add_argument(
'-f', '--mem-frac', type=float, default=0.1, metavar='F',
help='fraction of the total memory to allocate to subsequent samples; '
'default is 0.1'
)
memory_args.add_argument(
'--max-fpr', type=float, default=0.2, metavar='FPR',
help='terminate if the expected false positive rate for any sample is '
'higher than the specified FPR; default is 0.2'
)

band_desc = """\
If memory is a limiting factor, it is possible to get a linear
decrease in memory consumption by running kevlar in "banded" mode.
Splitting the hashed k-mer space into N bands and only considering k-mers
from one band at a time reduces the memory consumption to approximately 1/N
of the total memory required. This implements a scatter/gather approach in
which `kevlar count` and/or `kevlar novel` is run N times, after which the
results are combined using `kevlar filter`.
"""
band_desc = textwrap.dedent(band_desc)
band_args = subparser.add_argument_group('K-mer banding', band_desc)
band_args.add_argument(
'--num-bands', type=int, metavar='N', default=None,
help='number of bands into which to divide the hashed k-mer space'
)
band_args.add_argument(
'--band', type=int, metavar='I', default=None,
help='a number between 1 and N (inclusive) indicating the band to be '
'processed'
)

misc_args = subparser.add_argument_group('Miscellaneous settings')
misc_args.add_argument('-h', '--help', action='help',
help='show this help message and exit')
misc_args.add_argument('-k', '--ksize', type=int, default=31, metavar='K',
help='k-mer size; default is 31')
32 changes: 32 additions & 0 deletions kevlar/cli/dump.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env python
#
# -----------------------------------------------------------------------------
# Copyright (c) 2016 The Regents of the University of California
#
# This file is part of kevlar (http://github.com/dib-lab/kevlar) and is
# licensed under the MIT license: see LICENSE.
# -----------------------------------------------------------------------------

import khmer
from khmer import khmer_args


def subparser(subparsers):
subparser = subparsers.add_parser('dump')
subparser.add_argument('--seqid', metavar='SEQ',
help='dump reads not mapped to SEQ')
subparser.add_argument('--genomemask', metavar='FILE', help='dump reads '
'with median k-mer abundance >= 1 in the specified '
'genome; if both --seqid and --genomemask are '
'declared, reads passing either filter will be '
'kept')
subparser.add_argument('--maskmemory', metavar='SIZE', default=2e9,
type=khmer_args.memory_setting,
help='memory to be occupied by genome mask; default'
' is 2G')
subparser.add_argument('--mask-k', metavar='K', default=31, type=int,
help='k size for genome mask')
subparser.add_argument('--out', metavar='FILE', help='output file; default'
' is terminal (stdout)')
subparser.add_argument('refr', help='reference sequence in Fasta format')
subparser.add_argument('reads', help='read alignments in BAM format')

0 comments on commit 3aa32e6

Please sign in to comment.