Skip to content

Commit

Permalink
Merge pull request #237 from dib-lab/fix/nomargin
Browse files Browse the repository at this point in the history
Fix/nomargin
  • Loading branch information
standage committed Apr 3, 2018
2 parents b4db0d2 + 3168e87 commit 114b2ba
Show file tree
Hide file tree
Showing 9 changed files with 145 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).

### Fixed
- Incorrect handling of VCF `FILTER` field resolved (see #235).
- A bug causing some calls to be erroneously filtered (see #237).


## [0.4.0] 2018-03-29
Expand Down
2 changes: 2 additions & 0 deletions kevlar/tests/data/nomargin-gdna.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>seq1_115504410-115504607
GTGTGTGTGCTAGAAACACTTCTGCCCGGCGCTGTAATGGCGTGAATGAAGTTAAGGTCCCACTTCCATTTTCCTATGCCTATGCTTGGGAACCTGCCTGGTAAGAAGATCACTGCTCTGCCCTAGGAGGATCCTTATCTTTCTGTTGCAATATCAACTCCTGACAGACTGGAAGAAGTCCCTGAGTTCAGGGCTAA
3 changes: 3 additions & 0 deletions kevlar/tests/data/nomargin-indel-contigs.augfasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
>contig1
GTGTGTGTGCTAGAAACACTTCTGCCCGGCGCTGTAATGGCGTGAATGATAAGGTCCCACTTCCATTTTCCTATGCCTATGCTTGGGAACCTGCCTGGTAAGAAGATCACTGCTCTGCCCTAGGAGGATCCTTATCTTTCTGTT
TGGCGTGAATGATAAGGTCCCACTTCCATTT 6 0 0#
2 changes: 2 additions & 0 deletions kevlar/tests/data/nomargin-r-gdna.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>seq1_115504410-115504557
GTGTGTGTGCTAGAAACACTTCTGCCCGGCGCTGTAATGGCGTGAATGAAGTTAAGGTCCCACTTCCATTTTCCTATGCCTATGCTTGGGAACCTGCCTGGTAAGAAGATCACTGCTCTGCCCTAGGAGGATCCTTATCTTTCTGTT
3 changes: 3 additions & 0 deletions kevlar/tests/data/nomargin-r-indel-contigs.augfasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
>contig1
TAGAAACACTTCTGCCCGGCGCTGTAATGGCGTGAATGAAGTTAAGGTCCCACTTCCATTTTCCTATTATGCCTATGCTTGGGAACCTGCCTGGTAAGAAGATCACTGCTCTGCCCTAGGAGGATCCTTATCTTTCTGTT
CCATTTTCCTATTATGCCTATGCTTGGGAAC 9 0 0#
44 changes: 44 additions & 0 deletions kevlar/tests/data/nomargin-r-snv-contigs.augfasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
>contig1
AACAGAAAGATAAGGATCCTCCTAGGGCAGAGCAGTGATCTTCTTACCAGGCAGGTTCCCAAGCATAGGCATAGGAAAATGGAAGTGGGACCTTAACCTCATTCACGCCATTACAGCGCCGGGCAGAAGTGTTTCTA
GGCATAGGAAAATGGAAGTGGGACCTTAACC 7 0 1#
GCATAGGAAAATGGAAGTGGGACCTTAACCT 7 0 0#
CATAGGAAAATGGAAGTGGGACCTTAACCTC 7 1 0#
ATAGGAAAATGGAAGTGGGACCTTAACCTCA 7 0 0#
TAGGAAAATGGAAGTGGGACCTTAACCTCAT 7 0 0#
AGGAAAATGGAAGTGGGACCTTAACCTCATT 8 0 0#
GGAAAATGGAAGTGGGACCTTAACCTCATTC 8 0 1#
GAAAATGGAAGTGGGACCTTAACCTCATTCA 8 0 0#
AAAATGGAAGTGGGACCTTAACCTCATTCAC 8 0 0#
AAATGGAAGTGGGACCTTAACCTCATTCACG 8 0 0#
AATGGAAGTGGGACCTTAACCTCATTCACGC 8 0 0#
ATGGAAGTGGGACCTTAACCTCATTCACGCC 8 0 0#
TGGAAGTGGGACCTTAACCTCATTCACGCCA 8 0 0#
GGAAGTGGGACCTTAACCTCATTCACGCCAT 8 0 0#
GAAGTGGGACCTTAACCTCATTCACGCCATT 7 0 0#
AAGTGGGACCTTAACCTCATTCACGCCATTA 7 0 0#
AGTGGGACCTTAACCTCATTCACGCCATTAC 7 0 0#
GTGGGACCTTAACCTCATTCACGCCATTACA 7 0 0#
TGGGACCTTAACCTCATTCACGCCATTACAG 7 0 0#
GGGACCTTAACCTCATTCACGCCATTACAGC 6 0 0#
GGACCTTAACCTCATTCACGCCATTACAGCG 6 0 0#
GACCTTAACCTCATTCACGCCATTACAGCGC 6 0 0#
ACCTTAACCTCATTCACGCCATTACAGCGCC 6 0 0#
CCTTAACCTCATTCACGCCATTACAGCGCCG 6 0 0#
CTTAACCTCATTCACGCCATTACAGCGCCGG 8 1 1#
TTAACCTCATTCACGCCATTACAGCGCCGGG 8 0 1#
TAACCTCATTCACGCCATTACAGCGCCGGGC 8 0 0#
AACCTCATTCACGCCATTACAGCGCCGGGCA 8 0 0#
ACCTCATTCACGCCATTACAGCGCCGGGCAG 8 0 0#
CCTCATTCACGCCATTACAGCGCCGGGCAGA 8 1 0#
CTCATTCACGCCATTACAGCGCCGGGCAGAA 8 0 0#
#mateseq=ACAGTTACTTTTCAACATAATTCTCAGACTCCATACTGTTTATTTTATGTTGATAGACTCGCACCACACTTCTGCCATTTTAAAACCCATATTCTGGTCA#
#mateseq=ACTACGTGACTCATTTTTGACAATTCAGTTACATTCATTTCTGTGGGTATTTTTTGTTTATGTAATCATTGGATACATAACCTCTGGACCCTTTCCATCT#
#mateseq=AGCTTATTATATTCCTCAGTGGCAAAATATACATCACTCTTTTGAGACTGATAGGGAAAAAGGCCAACAGTTACTTTTCAACATAATTCTCAGACTCCAT#
#mateseq=AGGCAGTGCCCGCCTTGGGCTCCTGAGTAAGGACTACGTGACTCATTTTTGACAATTCAGTTACATTCATTTCTTTGGGTATTTTTTGTTTATGTAATCA#
#mateseq=CGCCTTGGGCTCCTGAGTAAGGACTACGTGACTCATTTTTGACAATTCAGTTACATTCATTTCTGTGGGTATTTTTTGTTTATGTAATCATTGGATACAT#
#mateseq=CGGCATTTGCTTTATAAACACATCCCACCGCAGGCCCGAAGGGGGAAGATGAGCTGGCTGAGGCAGTGCCCGCCTTGGGCTCCTGAGTAAGGACTACGTG#
#mateseq=GCCAACAGTTACTTTTCAACATAATTCTCAGACTCCATACTGTTTATTTTATGTTGATAGAATCGCACCACACTTCTGCCATTTTAAAACCCATATTCTG#
#mateseq=TCATTGGATACATAACCTCTGGACCCTTTCCATCTTCAAATGTATAGATTAAAATCTTATAAGTATTCCTTAAATTTGCGTGGGGGGCACATCTGAGAGA#
#mateseq=TGTTTATTTTATGTTGATAGAATCGCACCACACTTCTGCCATTTTAAAACCCATATTCTGGTCAGCAGTTTGGTATTATGTTTTGACCTCATCTTCGTCT#
#mateseq=TTATTTTATGTTGATAGAATCGCACCACACTTCTGCCATTTTAAAACCCATATTCTGGTCAGCAGTTTGGTATTATGTTTTGACCTCATCTTCGTCTGTT#
#mateseq=TTTTCAACATAATTCTCAGACTCCATACTGTTTATTTTATGTTGATAGAAACGCACCACACTTCTGCCATTTTAAAACCCATATTCTGGTCAGCAGTTTG#
44 changes: 44 additions & 0 deletions kevlar/tests/data/nomargin-snv-contigs.augfasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
>contig1
AACAGAAAGATAAGGATCCTCCTAGGGCAGAGCAGTGATCTTCTTACCAGGCAGGTTCCCAAGCATAGGCATAGGAAAATGGAAGTGGGACCTTAACCTCATTCACGCCATTACAGCGCCGGGCAGAAGTGTTTCTAGCACACACAC
GGCATAGGAAAATGGAAGTGGGACCTTAACC 7 0 1#
GCATAGGAAAATGGAAGTGGGACCTTAACCT 7 0 0#
CATAGGAAAATGGAAGTGGGACCTTAACCTC 7 1 0#
ATAGGAAAATGGAAGTGGGACCTTAACCTCA 7 0 0#
TAGGAAAATGGAAGTGGGACCTTAACCTCAT 7 0 0#
AGGAAAATGGAAGTGGGACCTTAACCTCATT 8 0 0#
GGAAAATGGAAGTGGGACCTTAACCTCATTC 8 0 1#
GAAAATGGAAGTGGGACCTTAACCTCATTCA 8 0 0#
AAAATGGAAGTGGGACCTTAACCTCATTCAC 8 0 0#
AAATGGAAGTGGGACCTTAACCTCATTCACG 8 0 0#
AATGGAAGTGGGACCTTAACCTCATTCACGC 8 0 0#
ATGGAAGTGGGACCTTAACCTCATTCACGCC 8 0 0#
TGGAAGTGGGACCTTAACCTCATTCACGCCA 8 0 0#
GGAAGTGGGACCTTAACCTCATTCACGCCAT 8 0 0#
GAAGTGGGACCTTAACCTCATTCACGCCATT 7 0 0#
AAGTGGGACCTTAACCTCATTCACGCCATTA 7 0 0#
AGTGGGACCTTAACCTCATTCACGCCATTAC 7 0 0#
GTGGGACCTTAACCTCATTCACGCCATTACA 7 0 0#
TGGGACCTTAACCTCATTCACGCCATTACAG 7 0 0#
GGGACCTTAACCTCATTCACGCCATTACAGC 6 0 0#
GGACCTTAACCTCATTCACGCCATTACAGCG 6 0 0#
GACCTTAACCTCATTCACGCCATTACAGCGC 6 0 0#
ACCTTAACCTCATTCACGCCATTACAGCGCC 6 0 0#
CCTTAACCTCATTCACGCCATTACAGCGCCG 6 0 0#
CTTAACCTCATTCACGCCATTACAGCGCCGG 8 1 1#
TTAACCTCATTCACGCCATTACAGCGCCGGG 8 0 1#
TAACCTCATTCACGCCATTACAGCGCCGGGC 8 0 0#
AACCTCATTCACGCCATTACAGCGCCGGGCA 8 0 0#
ACCTCATTCACGCCATTACAGCGCCGGGCAG 8 0 0#
CCTCATTCACGCCATTACAGCGCCGGGCAGA 8 1 0#
CTCATTCACGCCATTACAGCGCCGGGCAGAA 8 0 0#
#mateseq=ACAGTTACTTTTCAACATAATTCTCAGACTCCATACTGTTTATTTTATGTTGATAGACTCGCACCACACTTCTGCCATTTTAAAACCCATATTCTGGTCA#
#mateseq=ACTACGTGACTCATTTTTGACAATTCAGTTACATTCATTTCTGTGGGTATTTTTTGTTTATGTAATCATTGGATACATAACCTCTGGACCCTTTCCATCT#
#mateseq=AGCTTATTATATTCCTCAGTGGCAAAATATACATCACTCTTTTGAGACTGATAGGGAAAAAGGCCAACAGTTACTTTTCAACATAATTCTCAGACTCCAT#
#mateseq=AGGCAGTGCCCGCCTTGGGCTCCTGAGTAAGGACTACGTGACTCATTTTTGACAATTCAGTTACATTCATTTCTTTGGGTATTTTTTGTTTATGTAATCA#
#mateseq=CGCCTTGGGCTCCTGAGTAAGGACTACGTGACTCATTTTTGACAATTCAGTTACATTCATTTCTGTGGGTATTTTTTGTTTATGTAATCATTGGATACAT#
#mateseq=CGGCATTTGCTTTATAAACACATCCCACCGCAGGCCCGAAGGGGGAAGATGAGCTGGCTGAGGCAGTGCCCGCCTTGGGCTCCTGAGTAAGGACTACGTG#
#mateseq=GCCAACAGTTACTTTTCAACATAATTCTCAGACTCCATACTGTTTATTTTATGTTGATAGAATCGCACCACACTTCTGCCATTTTAAAACCCATATTCTG#
#mateseq=TCATTGGATACATAACCTCTGGACCCTTTCCATCTTCAAATGTATAGATTAAAATCTTATAAGTATTCCTTAAATTTGCGTGGGGGGCACATCTGAGAGA#
#mateseq=TGTTTATTTTATGTTGATAGAATCGCACCACACTTCTGCCATTTTAAAACCCATATTCTGGTCAGCAGTTTGGTATTATGTTTTGACCTCATCTTCGTCT#
#mateseq=TTATTTTATGTTGATAGAATCGCACCACACTTCTGCCATTTTAAAACCCATATTCTGGTCAGCAGTTTGGTATTATGTTTTGACCTCATCTTCGTCTGTT#
#mateseq=TTTTCAACATAATTCTCAGACTCCATACTGTTTATTTTATGTTGATAGAAACGCACCACACTTCTGCCATTTTAAAACCCATATTCTGGTCAGCAGTTTG#
25 changes: 25 additions & 0 deletions kevlar/tests/test_varmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,28 @@ def test_passenger_screen():
assert len(calls) == 2
assert calls[0].filterstr == 'PASS'
assert calls[1].filterstr == 'PassengerVariant'


@pytest.mark.parametrize('query,target,refr,alt', [
('nomargin-snv-contigs.augfasta', 'nomargin-gdna.fa', 'A', 'G'),
('nomargin-indel-contigs.augfasta', 'nomargin-gdna.fa', 'AAGT', 'A'),
('nomargin-r-snv-contigs.augfasta', 'nomargin-r-gdna.fa', 'A', 'G'),
('nomargin-r-indel-contigs.augfasta', 'nomargin-r-gdna.fa', 'C', 'CTAT'),
])
def test_no_margin(query, target, refr, alt):
contig = next(
kevlar.parse_augmented_fastx(
kevlar.open(data_file(query), 'r')
)
)
cutout = next(
kevlar.reference.load_refr_cutouts(
kevlar.open(data_file(target), 'r')
)
)
aln = VariantMapping(contig, cutout)
calls = list(aln.call_variants(31))
assert len(calls) == 1
assert calls[0].filterstr == 'PASS'
assert calls[0]._refr == refr
assert calls[0]._alt == alt
33 changes: 21 additions & 12 deletions kevlar/varmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@


patterns = {
'^(\d+)([DI])(\d+)M(\d+)[DI]$': 'snv',
'^(\d+)([DI])(\d+)M(\d+)[DI](\d+)M$': 'snv',
'^(\d+)([DI])(\d+)M(\d+)([ID])(\d+)M(\d+)[DI]$': 'indel',
'^(\d+)([DI])(\d+)M(\d+)([ID])(\d+)M(\d+)[DI](\d+)M$': 'indel',
'^((\d+)([DI]))?(\d+)M((\d+)[DI])?$': ('snv', False),
'^((\d+)([DI]))?(\d+)M(\d+)[DI](\d+)M$': ('snv', True),
'^((\d+)([DI]))?(\d+)M(\d+)([ID])(\d+)M((\d+)[DI])?$': ('indel', False),
'^((\d+)([DI]))?(\d+)M(\d+)([ID])(\d+)M(\d+)[DI](\d+)M$': ('indel', True),
}


Expand All @@ -45,9 +45,14 @@ def __init__(self, contig, cutout, score=None, cigar=None, strand=1,
self.matedist = None
self.vartype = None
self.alnmatch = None
for pattern, vartype in patterns.items():
for pattern, (vartype, rightcheck) in patterns.items():
matchobj = re.match(pattern, cigar)
if matchobj:
if rightcheck:
idx = 6 if vartype == 'snv' else 9
rightmatchlen = int(matchobj.group(idx))
if rightmatchlen > 5:
continue
self.alnmatch = matchobj
self.vartype = vartype
break
Expand Down Expand Up @@ -86,37 +91,41 @@ def pos(self):
def offset(self):
if self.alnmatch is None:
return None
return int(self.alnmatch.group(1))
if self.alnmatch.group(1) is None:
return 0
return int(self.alnmatch.group(2))

@property
def targetshort(self):
if self.alnmatch is None:
return None
return self.alnmatch.group(2) == 'I'
if self.alnmatch.group(1) is None:
return False
return self.alnmatch.group(3) == 'I'

@property
def leftmatchlen(self):
if self.alnmatch is None or self.vartype != 'indel':
return None
return int(self.alnmatch.group(3))
return int(self.alnmatch.group(4))

@property
def indellength(self):
if self.alnmatch is None or self.vartype != 'indel':
return None
return int(self.alnmatch.group(4))
return int(self.alnmatch.group(5))

@property
def indeltype(self):
if self.alnmatch is None or self.vartype != 'indel':
return None
return self.alnmatch.group(5)
return self.alnmatch.group(6)

@property
def rightmatchlen(self):
if self.alnmatch is None or self.vartype != 'indel':
return None
return int(self.alnmatch.group(6))
return int(self.alnmatch.group(7))

def is_passenger(self, call):
if call.window is None:
Expand Down Expand Up @@ -192,7 +201,7 @@ def snv_variant(self, qseq, tseq, mismatches, offset, ksize):

def call_snv(self, ksize, mindist=5, logstream=sys.stderr):
"""Call SNVs from the given alignment."""
length = int(self.alnmatch.group(3))
length = int(self.alnmatch.group(4))
offset = self.offset
if self.targetshort:
gdnaoffset = 0
Expand Down

0 comments on commit 114b2ba

Please sign in to comment.