Skip to content

Commit

Permalink
Refactor logging code (#308)
Browse files Browse the repository at this point in the history
This update introduces wide-sweeping changes to the code for printing logging messages, and adds a new global flag for printing logging messages to the terminal (stderr) and a logfile simultaneously. Closes #307.

Also, the `effcount` and `dump` modules have been removed.
  • Loading branch information
standage committed Dec 5, 2018
1 parent 015f412 commit 76a82fc
Show file tree
Hide file tree
Showing 47 changed files with 362 additions and 815 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,15 @@ This project adheres to [Semantic Versioning](http://semver.org/).
- A new Snakemake workflow for preprocessing BAM inputs for analysis with kevlar (see #305).
- A new Snakemake workflow for kevlar's standard processing procedure (see #306).

### Changed
- Added a new flag to print to the terminal (stderr) and a logfile simultanously (see #308).

### Fixed
- Corrected a bug that reported the reference target sequence instead of the assembled contig sequence in the `CONTIG` attribute of indel calls in the VCF (see #304).

### Removed
- The `effcount` an `dump` modules have been disabled (see #308).


## [0.6.1] 2018-11-16

Expand Down
10 changes: 0 additions & 10 deletions docs/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,6 @@ kevlar simplex
:prog: kevlar
:path: simplex

kevlar dump
-----------

.. argparse::
:module: kevlar.cli.__init__
:func: parser
:nodefault:
:prog: kevlar
:path: dump

kevlar augment
----------------

Expand Down
14 changes: 12 additions & 2 deletions kevlar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,12 @@
from kevlar.progress import ProgressIndicator

# Subcommands and command-line interface
from kevlar import dump
from kevlar import novel
from kevlar import filter
from kevlar import augment
from kevlar import mutate
from kevlar import assemble
from kevlar import count
from kevlar import effcount
from kevlar import partition
from kevlar import localize
from kevlar import call
Expand All @@ -70,6 +68,18 @@
del get_versions


logstream = None
teelog = False


def plog(*args, **kwargs):
"""Print logging output."""
if logstream is not None:
print(*args, **kwargs, file=logstream)
if logstream is None or teelog:
print(*args, **kwargs, file=sys.stderr)


def open(filename, mode):
if mode not in ('r', 'w'):
raise ValueError('invalid mode "{}"'.format(mode))
Expand Down
11 changes: 4 additions & 7 deletions kevlar/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,20 @@
import kevlar


def main(args=None):
"""
Entry point for the kevlar CLI.
def main(arglist=None):
"""Entry point for the kevlar CLI.
Isolated as a method so that the CLI can be called by other Python code
(e.g. for testing), in which case the arguments are passed to the function.
If no arguments are passed to the function, parse them from the command
line.
"""
if args is None: # pragma: no cover
args = kevlar.cli.parser().parse_args()

args = kevlar.cli.parse_args(arglist)
if args.cmd is None: # pragma: no cover
kevlar.cli.parser().parse_args(['-h'])

assert args.cmd in kevlar.cli.mains
mainmethod = kevlar.cli.mains[args.cmd]
versionmessage = '[kevlar] running version {}'.format(kevlar.__version__)
print(versionmessage, file=args.logfile)
kevlar.plog(versionmessage)
mainmethod(args)
10 changes: 4 additions & 6 deletions kevlar/alac.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,12 @@
from kevlar.call import call
import khmer
import re
import sys


def alac(pstream, refrfile, threads=1, ksize=31, maxreads=10000, delta=50,
seedsize=31, maxdiff=None, inclpattern=None, exclpattern=None,
match=1, mismatch=2, gapopen=5, gapextend=0, min_ikmers=None,
maskfile=None, maskmem=1e6, maskmaxfpr=0.01, logstream=sys.stderr):
maskfile=None, maskmem=1e6, maskmaxfpr=0.01):
assembler = kevlar.assemble.assemble(pstream, maxreads=maxreads)
contigs_by_partition = defaultdict(list)
for partid, contig in assembler:
Expand All @@ -30,7 +29,6 @@ def alac(pstream, refrfile, threads=1, ksize=31, maxreads=10000, delta=50,
targeter = kevlar.localize.localize(
contigstream, refrfile, seedsize=seedsize, delta=delta,
maxdiff=maxdiff, inclpattern=inclpattern, exclpattern=exclpattern,
logstream=logstream
)
targets_by_partition = defaultdict(list)
for partid, gdna in targeter:
Expand All @@ -50,7 +48,7 @@ def alac(pstream, refrfile, threads=1, ksize=31, maxreads=10000, delta=50,
calls = sorted(calls, key=lambda c: (c.seqid, c.position))
if maskfile:
message = 'generating mask of variant-spanning k-mers'
print('[kevlar::alac]', message, file=logstream)
kevlar.plog('[kevlar::alac]', message)
numtables = 4
buckets = maskmem * khmer._buckets_per_byte['nodegraph'] / numtables
mask = khmer.Nodetable(ksize, buckets, numtables)
Expand All @@ -63,7 +61,7 @@ def alac(pstream, refrfile, threads=1, ksize=31, maxreads=10000, delta=50,
message = 'WARNING: mask FPR is {:.4f}'.format(fpr)
message += '; exceeds user-specified limit'
message += ' of {:.4f}'.format(maskmaxfpr)
print('[kevlar::alac]', message, file=logstream)
kevlar.plog('[kevlar::alac]', message)
mask.save(maskfile)
for call in calls:
yield call
Expand All @@ -83,7 +81,7 @@ def main(args):
exclpattern=args.exclude, match=args.match, mismatch=args.mismatch,
gapopen=args.open, gapextend=args.extend, min_ikmers=args.min_ikmers,
maskfile=args.gen_mask, maskmem=args.mask_mem,
maskmaxfpr=args.mask_max_fpr, logstream=args.logfile,
maskmaxfpr=args.mask_max_fpr,
)

writer = kevlar.vcf.VCFWriter(
Expand Down
15 changes: 6 additions & 9 deletions kevlar/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@

import kevlar
import re
import sys


def assemble_fml_asm(partition, logstream=sys.stderr):
def assemble_fml_asm(partition):
reads = list(partition)
assembler = kevlar.assembly.fml_asm(reads)
for n, contig in enumerate(assembler, 1):
Expand All @@ -21,23 +20,22 @@ def assemble_fml_asm(partition, logstream=sys.stderr):
yield next(kevlar.augment.augment(reads, [record]))


def assemble(partstream, maxreads=10000, logstream=sys.stderr):
def assemble(partstream, maxreads=10000):
n = 0
pn = 0
progress_indicator = kevlar.ProgressIndicator(
'[kevlar::assemble] skipping partition with {counter} reads',
interval=10, breaks=[100, 1000, 10000], usetimer=True,
logstream=logstream,
)
for partid, partition in partstream:
pn += 1
progress_indicator.update()
numreads = len(partition)
if numreads > maxreads: # pragma: no cover
message = 'skipping partition with {:d} reads'.format(numreads)
print('[kevlar::assemble] WARNING:', message, file=logstream)
kevlar.plog('[kevlar::assemble] WARNING:', message)
continue
for contig in assemble_fml_asm(partition, logstream=logstream):
for contig in assemble_fml_asm(partition):
n += 1
newname = 'contig{}'.format(n)
if partid is not None:
Expand All @@ -46,7 +44,7 @@ def assemble(partstream, maxreads=10000, logstream=sys.stderr):
yield partid, contig
message = 'processed {} partitions'.format(pn)
message += ' and assembled {} contigs'.format(n)
print('[kevlar::assemble]', message, file=logstream)
kevlar.plog('[kevlar::assemble]', message)


def main(args):
Expand All @@ -56,7 +54,6 @@ def main(args):
else:
pstream = kevlar.parse_partitioned_reads(readstream)
outstream = kevlar.open(args.out, 'w')
assembler = assemble(pstream, maxreads=args.max_reads,
logstream=args.logfile)
assembler = assemble(pstream, maxreads=args.max_reads)
for partid, contig in assembler:
kevlar.print_augmented_fastx(contig, outstream)
4 changes: 1 addition & 3 deletions kevlar/augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
# -----------------------------------------------------------------------------

import kevlar
import sys


def augment(augseqstream, nakedseqstream, collapsemates=False, upint=10000):
Expand All @@ -24,8 +23,7 @@ def augment(augseqstream, nakedseqstream, collapsemates=False, upint=10000):
mateseqs = dict()
for n, record in enumerate(augseqstream):
if n > 0 and n % upint == 0:
print('[kevlar::augment] processed', n, 'input reads',
file=sys.stderr)
kevlar.plog('[kevlar::augment] processed', n, 'input reads')
for ikmer in record.annotations:
seq = record.ikmerseq(ikmer)
ikmers[seq] = ikmer.abund
Expand Down
29 changes: 14 additions & 15 deletions kevlar/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
# -----------------------------------------------------------------------------

from collections import defaultdict
import sys
import kevlar
from kevlar.reference import bwa_align
from kevlar.varmap import VariantMapping
Expand Down Expand Up @@ -85,7 +84,7 @@ def dedup(callstream):

def prelim_call(targetlist, querylist, partid=None, match=1, mismatch=2,
gapopen=5, gapextend=0, ksize=31, refrfile=None, debug=False,
mindist=5, logstream=sys.stderr):
mindist=5):
"""Implement the `kevlar call` procedure as a generator function."""
for query in sorted(querylist, reverse=True, key=len):
alignments = list()
Expand All @@ -106,10 +105,12 @@ def prelim_call(targetlist, querylist, partid=None, match=1, mismatch=2,

for n, alignment in enumerate(aligns2report):
if debug:
print('DEBUG ', alignment.cutout.defline, ' vs ',
alignment.contig.name, '\n', str(alignment), sep='',
end='\n\n', file=logstream)
for varcall in alignment.call_variants(ksize, mindist, logstream):
kevlar.plog(
'DEBUG ', alignment.cutout.defline, ' vs ',
alignment.contig.name, '\n', str(alignment), sep='',
end='\n\n',
)
for varcall in alignment.call_variants(ksize, mindist):
if partid is not None:
varcall.annotate('PART', partid)
if alignment.matedist:
Expand All @@ -126,18 +127,17 @@ def call(*args, **kwargs):
yield call


def load_contigs(contigstream, logstream=sys.stderr):
message = 'loading contigs into memory by partition'
print('[kevlar::call]', message, file=logstream)
def load_contigs(contigstream):
kevlar.plog('[kevlar::call] Loading contigs into memory by partition')
contigs_by_partition = dict()
nparts = 0
ncontigs = 0
for partid, contiglist in contigstream:
nparts += 1
ncontigs += len(contiglist)
contigs_by_partition[partid] = contiglist
message = 'loaded {} contigs from {} partitions'.format(ncontigs, nparts)
print('[kevlar::call]', message, file=logstream)
message = 'Loaded {} contigs from {} partitions'.format(ncontigs, nparts)
kevlar.plog('[kevlar::call]', message)
return contigs_by_partition


Expand All @@ -161,13 +161,13 @@ def main(args):
mask = None
if args.gen_mask:
message = 'generating mask of variant-spanning k-mers'
print('[kevlar::call]', message, file=args.logfile)
kevlar.plog('[kevlar::call]', message)
ntables = 4
buckets = args.mask_mem * _buckets_per_byte['nodegraph'] / ntables
mask = khmer.Nodetable(args.ksize, buckets, ntables)
progress_indicator = kevlar.ProgressIndicator(
'[kevlar::call] processed contigs/gDNAs for {counter} partitions',
interval=10, breaks=[100, 1000, 10000], logstream=args.logfile,
interval=10, breaks=[100, 1000, 10000],
)
for partid, gdnas in gdnastream:
progress_indicator.update()
Expand All @@ -176,7 +176,6 @@ def main(args):
gdnas, contigs, partid, match=args.match, mismatch=args.mismatch,
gapopen=args.open, gapextend=args.extend, ksize=args.ksize,
refrfile=args.refr, debug=args.debug, mindist=5,
logstream=args.logfile
)
for varcall in caller:
if args.gen_mask:
Expand All @@ -190,5 +189,5 @@ def main(args):
message = 'WARNING: mask FPR is {:.4f}'.format(fpr)
message += '; exceeds user-specified limit'
message += ' of {:.4f}'.format(args.mask_max_fpr)
print('[kevlar::call]', message, file=args.logfile)
kevlar.plog('[kevlar::call]', message)
mask.save(args.gen_mask)
1 change: 0 additions & 1 deletion kevlar/cigar.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from collections import namedtuple
import kevlar
import re
import sys


AlignmentBlock = namedtuple('AlignmentBlock', 'length type target query')
Expand Down
20 changes: 12 additions & 8 deletions kevlar/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
import argparse
import sys
import kevlar
from . import dump
from . import count
from . import effcount
from . import novel
from . import filter
from . import augment
Expand All @@ -30,9 +28,7 @@
from . import dist

mains = {
'dump': kevlar.dump.main,
'count': kevlar.count.main,
'effcount': kevlar.effcount.main,
'novel': kevlar.novel.main,
'filter': kevlar.filter.main,
'augment': kevlar.augment.main,
Expand All @@ -50,9 +46,7 @@
}

subparser_funcs = {
'dump': dump.subparser,
'count': count.subparser,
'effcount': effcount.subparser,
'novel': novel.subparser,
'filter': filter.subparser,
'augment': augment.subparser,
Expand Down Expand Up @@ -90,12 +84,22 @@ def parser():
parser._optionals.title = 'Global arguments'
parser.add_argument('-v', '--version', action='version',
version='kevlar v{}'.format(kevlar.__version__))
parser.add_argument('-l', '--logfile', metavar='F', default=sys.stderr,
type=argparse.FileType('w'), help='log file for '
parser.add_argument('-l', '--logfile', metavar='F', help='log file for '
'diagnostic messages, warnings, and errors')
parser.add_argument('--tee', action='store_true', help='write diagnostic '
'output to logfile AND terminal (stderr)')
subparsers = parser.add_subparsers(dest='cmd', metavar='cmd',
help='"' + subcommandstr + '"')
for func in subparser_funcs.values():
func(subparsers)

return parser


def parse_args(arglist=None):
args = kevlar.cli.parser().parse_args(arglist)
kevlar.logstream = sys.stderr
if args.logfile and args.logfile != '-':
kevlar.logstream = kevlar.open(args.logfile, 'w')
kevlar.teelog = args.tee
return args

0 comments on commit 76a82fc

Please sign in to comment.