Skip to content

Commit

Permalink
Merge pull request #199 from dib-lab/fix/alignscore
Browse files Browse the repository at this point in the history
Address new alignment scoring/sorting/reporting strategies
  • Loading branch information
standage committed Feb 3, 2018
2 parents ae8cc63 + a6a948a commit ba54493
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 15 deletions.
47 changes: 33 additions & 14 deletions kevlar/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,19 @@ def align_both_strands(targetseq, queryseq, match=1, mismatch=2, gapopen=5,
return cigar, score, strand


def alignment_interpretable(cigar):
patterns = [
'^(\d+)([DI])(\d+)M(\d+)[DI]$',
'^(\d+)([DI])(\d+)M(\d+)[DI](\d+)M$',
'^(\d+)([DI])(\d+)M(\d+)([ID])(\d+)M(\d+)[DI]$',
'^(\d+)([DI])(\d+)M(\d+)([ID])(\d+)M(\d+)[DI](\d+)M$',
]
for pattern in patterns:
if re.search(pattern, cigar) is not None:
return True
return False


def call(targetlist, querylist, match=1, mismatch=2, gapopen=5, gapextend=0,
ksize=31):
"""
Expand All @@ -338,25 +351,31 @@ def call(targetlist, querylist, match=1, mismatch=2, gapopen=5, gapextend=0,
and alignment CIGAR string
"""
for query in sorted(querylist, reverse=True, key=len):
bestcigar = None
bestscore = None
besttarget = None
bestorientation = None
alignments = list()
for target in sorted(targetlist, key=lambda record: record.name):
cigar, score, strand = align_both_strands(
target.sequence, query.sequence, match, mismatch, gapopen,
gapextend
)
if bestscore is None or score > bestscore:
bestscore = score
bestcigar = cigar
besttarget = target
bestorientation = strand

if bestorientation == -1:
query.sequence = kevlar.revcom(query.sequence)
for varcall in make_call(besttarget, query, bestcigar, ksize):
yield varcall
alignments.append((target, cigar, score, strand))
alignments.sort(key=lambda a: a[2], reverse=True)
if len(alignments) == 1:
aligns2report = alignments
else:
scrtbl = [a for a in alignments if alignment_interpretable(a[1])]
if len(scrtbl) == 0:
finallist = alignments
else:
finallist = scrtbl
bestscore = finallist[0][2]
aligns2report = [a for a in finallist if a[2] == bestscore]

for alignment in aligns2report:
besttarget, bestcigar, bestscore, bestorientation = alignment
if bestorientation == -1:
query.sequence = kevlar.revcom(query.sequence)
for varcall in make_call(besttarget, query, bestcigar, ksize):
yield varcall


def main(args):
Expand Down
13 changes: 12 additions & 1 deletion kevlar/tests/test_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import sys
import khmer
import kevlar
from kevlar.call import call, make_call
from kevlar.call import call, make_call, alignment_interpretable
from kevlar.tests import data_file


Expand All @@ -28,6 +28,17 @@ def test_align():
assert kevlar.align(target, query) == ('10D91M69D79M20I', 155)


def test_alignment_interpretable():
assert alignment_interpretable('6M73I13M4I5M4D63M11I') is False
assert alignment_interpretable('29I17M7I3M24I36M10D7M11I14M23I4M') is False
assert alignment_interpretable('57I16M7I3M8D33M1I28M27I3M') is False

assert alignment_interpretable('25D100M25D') is True
assert alignment_interpretable('50I50M50I50M24D1M') is True
assert alignment_interpretable('47I127M30D1M') is True
assert alignment_interpretable('30D100M16D67M30D') is True


@pytest.mark.parametrize('ccid,varcall', [
('5', 'seq1:185752:30D'),
('7', 'seq1:226611:190D'),
Expand Down

0 comments on commit ba54493

Please sign in to comment.