Skip to content

Commit

Permalink
Merge pull request #279 from dib-lab/cython/sequence
Browse files Browse the repository at this point in the history
Implement augfastx handling in Cython, fix `kevlar augment`
  • Loading branch information
standage committed Jul 16, 2018
2 parents ab27713 + 68d2a8f commit be2b55d
Show file tree
Hide file tree
Showing 34 changed files with 11,607 additions and 1,519 deletions.
10 changes: 3 additions & 7 deletions kevlar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

# Core libraries
import builtins
from collections import namedtuple
from gzip import open as gzopen
from os import makedirs
from os.path import dirname
Expand All @@ -26,15 +25,14 @@

# Internal modules
from kevlar import seqio
from kevlar import overlap
from kevlar import sketch
from kevlar import reference
from kevlar import cigar
from kevlar import varmap
from kevlar import vcf
from kevlar.mutablestring import MutableString
from kevlar.readgraph import ReadGraph
from kevlar.seqio import parse_augmented_fastx, print_augmented_fastx
from kevlar.mutablestring import MutableString
from kevlar.sequence import parse_augmented_fastx, print_augmented_fastx
from kevlar.seqio import parse_partitioned_reads, parse_single_partition
from kevlar.timer import Timer

Expand All @@ -61,6 +59,7 @@
# C extensions
from kevlar.alignment import contig_align as align
import kevlar.assembly
import kevlar.sequence

from kevlar._version import get_versions
__version__ = get_versions()['version']
Expand Down Expand Up @@ -149,6 +148,3 @@ def vcf_header(outstream, version='4.2', source='kevlar', infoheader=False):
file=outstream)
print('#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO',
sep='\t', file=outstream)


KmerOfInterest = namedtuple('KmerOfInterest', 'sequence offset abund')
30 changes: 11 additions & 19 deletions kevlar/alac.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@
from time import sleep

import kevlar
from kevlar.assemble import assemble_greedy, assemble_fml_asm
from kevlar.assemble import assemble_fml_asm
from kevlar.localize import localize
from kevlar.call import call


def make_call_from_reads(queue, idx, calls, refrfile, ksize=31, delta=50,
seedsize=31, maxdiff=None, match=1, mismatch=2,
gapopen=5, gapextend=0, greedy=False, fallback=False,
min_ikmers=None, refrseqs=None, logstream=sys.stderr):
gapopen=5, gapextend=0, min_ikmers=None,
refrseqs=None, logstream=sys.stderr):
while True:
if queue.empty():
sleep(3)
Expand All @@ -38,18 +38,12 @@ def make_call_from_reads(queue, idx, calls, refrfile, ksize=31, delta=50,
print(message, file=sys.stderr)

# Assemble partitioned reads into contig(s)
assmblr = assemble_greedy if greedy else assemble_fml_asm
contigs = list(assmblr(reads, logstream=logstream))
if len(contigs) == 0 and assmblr == assemble_fml_asm and fallback:
message = 'WARNING: no contig assembled by fermi-lite'
if cc: # pragma: no cover
message += ' for partition={:s}'.format(cc)
message += '; attempting again with homegrown greedy assembler'
print('[kevlar::alac]', message, file=logstream)
contigs = list(assemble_greedy(reads, logstream=logstream))
contigs = list(assemble_fml_asm(reads, logstream=logstream))
if min_ikmers is not None:
# Apply min ikmer filter if it's set
contigs = [c for c in contigs if len(c.ikmers) >= min_ikmers]
contigs = [
c for c in contigs if len(c.annotations) >= min_ikmers
]
if len(contigs) == 0:
queue.task_done()
continue
Expand All @@ -76,8 +70,7 @@ def make_call_from_reads(queue, idx, calls, refrfile, ksize=31, delta=50,

def alac(pstream, refrfile, threads=1, ksize=31, bigpart=10000, delta=50,
seedsize=31, maxdiff=None, match=1, mismatch=2, gapopen=5,
gapextend=0, greedy=False, fallback=False, min_ikmers=None,
logstream=sys.stderr):
gapextend=0, min_ikmers=None, logstream=sys.stderr):
part_queue = Queue(maxsize=max(32, 12 * threads))

refrstream = kevlar.open(refrfile, 'r')
Expand All @@ -91,8 +84,8 @@ def alac(pstream, refrfile, threads=1, ksize=31, bigpart=10000, delta=50,
target=make_call_from_reads,
args=(
part_queue, idx, thread_calls, refrfile, ksize, delta,
seedsize, maxdiff, match, mismatch, gapopen, gapextend, greedy,
fallback, min_ikmers, refrseqs, logstream,
seedsize, maxdiff, match, mismatch, gapopen, gapextend,
min_ikmers, refrseqs, logstream,
)
)
worker.setDaemon(True)
Expand Down Expand Up @@ -125,8 +118,7 @@ def main(args):
pstream, args.refr, threads=args.threads, ksize=args.ksize,
bigpart=args.bigpart, delta=args.delta, seedsize=args.seed_size,
maxdiff=args.max_diff, match=args.match, mismatch=args.mismatch,
gapopen=args.open, gapextend=args.extend, greedy=args.greedy,
fallback=args.fallback, min_ikmers=args.min_ikmers,
gapopen=args.open, gapextend=args.extend, min_ikmers=args.min_ikmers,
logstream=args.logfile
)

Expand Down

0 comments on commit be2b55d

Please sign in to comment.