Skip to content

Commit

Permalink
Merge pull request #230 from dib-lab/feature/passenger-screen
Browse files Browse the repository at this point in the history
Screen for "passenger calls"
  • Loading branch information
standage committed Mar 27, 2018
2 parents 7d572e1 + f2f0cbf commit 81e2277
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 11 deletions.
2 changes: 1 addition & 1 deletion kevlar/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def call(targetlist, querylist, match=1, mismatch=2, gapopen=5,
for n, alignment in enumerate(aligns2report):
for varcall in alignment.call_variants(ksize, mindist, logstream):
if alignment.matedist:
varcall.info['MD'] = '{:.2f}'.format(alignment.matedist)
varcall.annotate('MD', '{:.2f}'.format(alignment.matedist))
if n > 0:
varcall.annotate('NC', 'matefail')
yield varcall
Expand Down
4 changes: 2 additions & 2 deletions kevlar/gentrio.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,14 +179,14 @@ def simulate_variant_genotypes(sequences, ninh=20, ndenovo=10,
for variant in mutator:
genotypes = pick_inheritance_genotypes(rng)
gtstring = ','.join(genotypes)
variant.info['GT'] = gtstring
variant.annotate('GT', gtstring)
yield variant

mut8r = generate_mutations(sequences, n=ndenovo, weights=weights, rng=rng)
for variant in mut8r:
genotypes = (rng.choice(['0/1', '1/0']), '0/0', '0/0')
gtstring = ','.join(genotypes)
variant.info['GT'] = gtstring
variant.annotate('GT', gtstring)
yield variant


Expand Down
5 changes: 2 additions & 3 deletions kevlar/tests/data/nodiff.contig.fa
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
>contig1
ATCTCCCGGAAGACCCTTACAAGCGAGCCAAGGTCCGATTTTGGGTAGATTTCGTCAACAAAAAGGTTACTGCCCCCTCT
TCCTCTTCCATGTCTGTTTTCTTAACCTAATCCCAGAACACTCTGCCTGTTCAAGCTCATGGAGTCTTAAGTGCAGGTTG
TGCCAATTATGCATACCATCGTAACCTTGAAAGGCGATGAGCAGAAGACGGCAGCGGACGAGTTCGGAGGGTATCTACAG
ATCTCCCGGAAGACCCTTACAAGCGAGCCAAGGTCCGATTTTGGGTAGATTTCGTCAACAAAAAGGTTACTGCCCCCTCTTCCTCTTCCATGTCTGTTTTCTTAACCTAATCCCAGAACACTCTGCCTGTTCAAGCTCATGGAGTCTTAAGTGCAGGTTGTGCCAATTATGCATACCATCGTAACCTTGAAAGGCGATGAGCAGAAGACGGCAGCGGACGAGTTCGGAGGGTATCTACAG
AAAAGGTTACTGCCCCCTCTTCCTCTTCCAT 5 0 1#
5 changes: 1 addition & 4 deletions kevlar/tests/data/nodiff.gdna.fa
Original file line number Diff line number Diff line change
@@ -1,5 +1,2 @@
>chr99_2899347-2899647
ACATCGATGATGTGTGGAAGGACCCTCCCCATCTCCCGGAAGACCCTTACAAGCGAGCCAAGGTCCGATTTTGGGTAGAT
TTCGTCAACAAAAAGGTTACTGCCCCCTCTTCCTCTTCCATGTCTGTTTTCTTAACCTAATCCCAGAACACTCTGCCTGT
TCAAGCTCATGGAGTCTTAAGTGCAGGTTGTGCCAATTATGCATACCATCGTAACCTTGAAAGGCGATGAGCAGAAGACG
GCAGCGGACGAGTTCGGAGGGTATCTACAGACCATGGAGGACGGCATCCGAGAGGAGCTG
ACATCGATGATGTGTGGAAGGACCCTCCCCATCTCCCGGAAGACCCTTACAAGCGAGCCAAGGTCCGATTTTGGGTAGATTTCGTCAACAAAAAGGTTACTGCCCCCTCTTCCTCTTCCATGTCTGTTTTCTTAACCTAATCCCAGAACACTCTGCCTGTTCAAGCTCATGGAGTCTTAAGTGCAGGTTGTGCCAATTATGCATACCATCGTAACCTTGAAAGGCGATGAGCAGAAGACGGCAGCGGACGAGTTCGGAGGGTATCTACAGACCATGGAGGACGGCATCCGAGAGGAGCTG
9 changes: 9 additions & 0 deletions kevlar/tests/data/wasp-pass.contig.augfasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
>contig1
CATGCTCCTGTTTTCCTATCATTTACAATGAGGCGCAGGTTTTTCGTCGCGTCGTCAACCGCCTCTATTTGAGACGCTTGCACTAGTTTTCCGAAGGCTTTCTTTGGCCTGTCCACTTGCACGGGGATCTGCACTTCCACGATCGTTCGGCGAATTTCCAGTGGTAATAACATAGGCCGTCATCGTTTCGGTAAGTGCTTGACGATCGGCTTCGACTTCGGTATCTTCC
TTGCACTAGTTTTCCGAAGGCTTTCTTTG 12 0 0#
CACTAGTTTTCCGAAGGCTTTCTTTGGCC 13 0 0#
TTCCGAAGGCTTTCTTTGGCCTGTCCACT 10 0 0#
TCCGAAGGCTTTCTTTGGCCTGTCCACTT 13 0 0#
GGCTTTCTTTGGCCTGTCCACTTGCACGG 9 0 0#
TTTGGCCTGTCCACTTGCACGGGGATCTG 57 1 0#
GGCCTGTCCACTTGCACGGGGATCTGCAC 11 0 0#
2 changes: 2 additions & 0 deletions kevlar/tests/data/wasp.gdna.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>chr42_9000-9295
GAGACGTCTGCACCAGTGTCTATTAGAAAACATGCTCCTGTTTTCCTATCATTTACCATGAGGCGCAGGTTTTTCGTCGCGTCGTCAACCGCCTCTATTTGAGACGCTTGCACTAGTTTTCCGAAGGCTTTCTTTTATCCGGGCCTGTCCACTTGCACGGGGATCTGCACTTCCACGATCGTTCGGCGAATTTCCAGTGGTAATAACATAGGCCGTCATCGTTTCGGTAAGTGCTTGACGATCGGCTTCGACTTCGGTATCTTCCGCGGAAACTTCGGTATTTTCTTCTGCCGCG
20 changes: 19 additions & 1 deletion kevlar/tests/test_varmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,4 +242,22 @@ def test_call_num_interesting_kmers():
aln = VariantMapping(contig, cutout)
calls = list(aln.call_variants(29))
assert len(calls) == 1
assert calls[0].info['IK'] == '1'
assert calls[0].attribute('IK') == '1'


def test_passenger_screen():
contig = next(
kevlar.parse_augmented_fastx(
kevlar.open(data_file('wasp-pass.contig.augfasta'), 'r')
)
)
cutout = next(
kevlar.reference.load_refr_cutouts(
kevlar.open(data_file('wasp.gdna.fa'), 'r')
)
)
aln = VariantMapping(contig, cutout)
calls = list(aln.call_variants(29))
assert len(calls) == 2
assert calls[0].attribute('NC') is None
assert calls[1].attribute('NC') == 'passenger'
24 changes: 24 additions & 0 deletions kevlar/varmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ def __init__(self, contig, cutout, score=None, cigar=None, strand=1,
def interval(self):
return self.cutout.interval

@property
def ikmers(self):
for kmer in self.contig.ikmers:
yield kmer.sequence
yield kevlar.revcom(kmer.sequence)

@property
def varseq(self):
assert self.strand in (-1, 1)
Expand Down Expand Up @@ -111,22 +117,40 @@ def rightmatchlen(self):
return None
return int(self.alnmatch.group(6))

def is_passenger(self, call):
if call.window is None:
return False
numikmers = sum([1 for k in self.ikmers if k in call.window])
return numikmers == 0

def call_variants(self, ksize, mindist=5, logstream=sys.stderr):
"""Attempt to call variants from this contig alignment.
If the alignment CIGAR matches a known pattern, the appropriate caller
is invoked (SNV or INDEL caller). If not, a "no call" is reported.
If an SNV call is within `mindist` base pairs of the end of the
alignment it is ignored. Set to `None` to disable this behavior.
Variant calls with no spanning interesting k-mers are designated as
"passenger calls" and discarded.
"""
if self.vartype == 'snv':
for call in self.call_snv(ksize, mindist, logstream=logstream):
if self.is_passenger(call):
call.annotate('NC', 'passenger')
yield call
elif self.vartype == 'indel':
caller = self.call_insertion
if self.indeltype == 'D':
caller = self.call_deletion
for call in caller(ksize):
if self.is_passenger(call):
call.annotate('NC', 'passenger')
yield call
for call in self.call_indel_snvs(ksize, mindist, logstream):
if self.is_passenger(call):
call.annotate('NC', 'passenger')
yield call
else:
nocall = Variant(
Expand Down

0 comments on commit 81e2277

Please sign in to comment.