Skip to content

Commit

Permalink
Merge pull request #254 from dib-lab/varmap/align-str
Browse files Browse the repository at this point in the history
String repr of varmap, and convenient little debug flag for calling
  • Loading branch information
standage committed Apr 23, 2018
2 parents 0dbf973 + 91c2ff9 commit 9965925
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 24 deletions.
31 changes: 9 additions & 22 deletions kevlar/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,27 +83,9 @@ def dedup(callstream):


def prelim_call(targetlist, querylist, match=1, mismatch=2, gapopen=5,
gapextend=0, ksize=31, refrfile=None, mindist=5,
gapextend=0, ksize=31, refrfile=None, debug=False, mindist=5,
logstream=sys.stderr):
"""Implement the `kevlar call` procedure as a generator function.
Input is the following.
- an iterable containing one or more target sequences from the reference
genome, stored as khmer or screed sequence records
- an iterable containing one or more contigs assembled by kevlar, stored as
khmer or screed sequence records
- alignment match score (integer)
- alignment mismatch penalty (integer)
- alignment gap open penalty (integer)
- alignment gap extension penalty (integer)
- mates of interesting reads, in case these are needed to distinguish
between multiple best hist (filename)
- reference file to which mates of interesting reads, if any, will be
mapped to disambiguate multi-mapping contigs
The function yields tuples of target sequence name, query sequence name,
and alignment CIGAR string
"""
"""Implement the `kevlar call` procedure as a generator function."""
for query in sorted(querylist, reverse=True, key=len):
alignments = list()
for target in sorted(targetlist, key=lambda cutout: cutout.defline):
Expand All @@ -122,9 +104,14 @@ def prelim_call(targetlist, querylist, match=1, mismatch=2, gapopen=5,
aligns2report.sort(key=lambda aln: aln.matedist)

for n, alignment in enumerate(aligns2report):
if debug:
print('DEBUG ', alignment.cutout.defline, ' vs ',
alignment.contig.name, '\n', str(alignment), sep='',
end='\n\n', file=logstream)
for varcall in alignment.call_variants(ksize, mindist, logstream):
if alignment.matedist:
varcall.annotate('MD', '{:.2f}'.format(alignment.matedist))
avgdistance = '{:.2f}'.format(alignment.matedist)
varcall.annotate('MATEDIST', avgdistance)
yield varcall


Expand All @@ -146,7 +133,7 @@ def main(args):
caller = call(
targetseqs, queryseqs,
args.match, args.mismatch, args.open, args.extend,
args.ksize, args.refr
args.ksize, args.refr, args.debug, 5, args.logfile
)
for varcall in caller:
print(varcall.vcf, file=outstream)
2 changes: 2 additions & 0 deletions kevlar/cli/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def subparser(subparsers):
misc_args = subparser.add_argument_group('Miscellaneous settings')
misc_args.add_argument('-h', '--help', action='help',
help='show this help message and exit')
misc_args.add_argument('-d', '--debug', action='store_true',
help='show debugging output')
misc_args.add_argument('--refr', metavar='FILE',
help='reference genome indexed for BWA search; if '
'provided, mates of interesting reads will be used '
Expand Down
15 changes: 15 additions & 0 deletions kevlar/tests/data/wasp-align.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
GAGACGTCTGCACCAGTGTCTATTAGAAAACATGCTCCTGTTTTCCTATCATTTACCATGAGGCGCAGGTTTTTCGTCGC
|||||||||||||||||||||||||| |||||||||||||||||||||||
------------------------------CATGCTCCTGTTTTCCTATCATTTACAATGAGGCGCAGGTTTTTCGTCGC

GTCGTCAACCGCCTCTATTTGAGACGCTTGCACTAGTTTTCCGAAGGCTTTCTTTTATCCGGGCCTGTCCACTTGCACGG
||||||||||||||||||||||||||||||||||||||||||||||||||||||| |||||||||||||||||||
GTCGTCAACCGCCTCTATTTGAGACGCTTGCACTAGTTTTCCGAAGGCTTTCTTT------GGCCTGTCCACTTGCACGG

GGATCTGCACTTCCACGATCGTTCGGCGAATTTCCAGTGGTAATAACATAGGCCGTCATCGTTTCGGTAAGTGCTTGACG
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
GGATCTGCACTTCCACGATCGTTCGGCGAATTTCCAGTGGTAATAACATAGGCCGTCATCGTTTCGGTAAGTGCTTGACG

ATCGGCTTCGACTTCGGTATCTTCCGCGGAAACTTCGGTATTTTCTTCTGCCGCG
|||||||||||||||||||||||||
ATCGGCTTCGACTTCGGTATCTTCC------------------------------
4 changes: 2 additions & 2 deletions kevlar/tests/test_alac.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ def test_alac_inf_mate_dist(cc, numcalls, numdist, numinf):
caller = kevlar.alac.alac(partstream, refrfile, ksize=31, delta=50,
seedsize=51, fallback=True)
calls = list(caller)
withdist = [c for c in calls if c.attribute('MD') is not None]
infdist = [c for c in calls if c.attribute('MD') == 'inf']
withdist = [c for c in calls if c.attribute('MATEDIST') is not None]
infdist = [c for c in calls if c.attribute('MATEDIST') == 'inf']
print('DEBUG total withdist infdist', len(calls), len(withdist),
len(infdist), file=sys.stderr)
assert len(calls) == numcalls
Expand Down
12 changes: 12 additions & 0 deletions kevlar/tests/test_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,15 @@ def test_snv_dedup():
assert len(calls) == 1
assert calls[0].seqid == 'linkagegroup5'
assert calls[0].position == 8174 - 1


def test_debug_mode(capsys):
contig = data_file('wasp-pass.contig.augfasta')
gdna = data_file('wasp.gdna.fa')
arglist = ['call', '--debug', contig, gdna]
args = kevlar.cli.parser().parse_args(arglist)
kevlar.call.main(args)

out, err = capsys.readouterr()
alignstr = kevlar.open(data_file('wasp-align.txt'), 'r').read().strip()
assert alignstr in err
20 changes: 20 additions & 0 deletions kevlar/tests/test_varmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,23 @@ def test_no_margin(query, target, refr, alt):
assert calls[0].filterstr == 'PASS'
assert calls[0]._refr == refr
assert calls[0]._alt == alt


def test_varmap_str():
contig = next(
kevlar.parse_augmented_fastx(
kevlar.open(data_file('wasp-pass.contig.augfasta'), 'r')
)
)
cutout = next(
kevlar.reference.load_refr_cutouts(
kevlar.open(data_file('wasp.gdna.fa'), 'r')
)
)
aln = VariantMapping(contig, cutout)
alignstr = kevlar.open(data_file('wasp-align.txt'), 'r').read().strip()

print(str(aln), file=sys.stderr)
print(alignstr, file=sys.stderr)

assert str(aln) == alignstr
22 changes: 22 additions & 0 deletions kevlar/varmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,28 @@ def __init__(self, contig, cutout, score=None, cigar=None, strand=1,
elif re.search(indelpattern, self.cigar):
self.vartype = 'indel'

def __str__(self):
fulltarget, fullquery = '', ''
for token in self.tok.blocks:
fulltarget += token.target if token.target else '-' * token.length
fullquery += token.query if token.query else '-' * token.length

fullmatch = list()
for t, q in zip(fulltarget, fullquery):
c = '|' if t == q else ' '
fullmatch.append(c)
fullmatch = ''.join(fullmatch)

outlines = list()
i = 0
while i < len(fulltarget):
outlines.append(fulltarget[i:i+80])
outlines.append(fullmatch[i:i+80])
outlines.append(fullquery[i:i+80])
outlines.append('')
i += 80
return '\n'.join(outlines).strip()

@property
def interval(self):
return self.cutout.interval
Expand Down

0 comments on commit 9965925

Please sign in to comment.