Skip to content

Commit

Permalink
Merge pull request #225 from dib-lab/fix/ikmer-count
Browse files Browse the repository at this point in the history
Fix number of interesting k-mers reported for each call
  • Loading branch information
standage committed Mar 22, 2018
2 parents e562d88 + 5140bb5 commit 3a263ac
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 3 deletions.
19 changes: 16 additions & 3 deletions kevlar/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,16 @@ def __str__(self):
return '{:s}:{:d}:I->{:s}'.format(self._seqid, pos, insertion)


def n_ikmers_present(ikmers, window):
n = 0
for ikmer in ikmers:
if ikmer.sequence in window:
n += 1
elif kevlar.revcom(ikmer.sequence) in window:
n += 1
return n


def call_snv(aln, offset, length, ksize, mindist, logstream=sys.stderr):
targetshort = False
if offset < 0:
Expand Down Expand Up @@ -267,8 +277,9 @@ def call_snv(aln, offset, length, ksize, mindist, logstream=sys.stderr):
if not targetshort:
localcoord += offset
globalcoord = aln.cutout.local_to_global(localcoord)
nikmers = n_ikmers_present(aln.contig.ikmers, window)
snv = VariantSNV(aln.seqid, globalcoord, refr, alt, VW=window,
RW=refrwindow, IK=str(len(aln.contig.ikmers)))
RW=refrwindow, IK=str(nikmers))
snvs.append(snv)
return snvs

Expand Down Expand Up @@ -317,8 +328,9 @@ def call_deletion(aln, offset, ksize, leftmatch, indellength):
if not targetshort:
localcoord += offset
globalcoord = aln.cutout.local_to_global(localcoord)
nikmers = n_ikmers_present(aln.contig.ikmers, altwindow)
return [VariantIndel(aln.seqid, globalcoord - 1, refr, alt, VW=altwindow,
RW=refrwindow, IK=str(len(aln.contig.ikmers)))]
RW=refrwindow, IK=str(nikmers))]


def call_insertion(aln, offset, ksize, leftmatch, indellength):
Expand All @@ -339,8 +351,9 @@ def call_insertion(aln, offset, ksize, leftmatch, indellength):
if not targetshort:
localcoord += offset
globalcoord = aln.cutout.local_to_global(localcoord)
nikmers = n_ikmers_present(aln.contig.ikmers, altwindow)
return [VariantIndel(aln.seqid, globalcoord - 1, refr, alt, VW=altwindow,
RW=refrwindow, IK=str(len(aln.contig.ikmers)))]
RW=refrwindow, IK=str(nikmers))]


def align_mates(record, refrfile):
Expand Down
8 changes: 8 additions & 0 deletions kevlar/tests/data/iktest.contig.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
>contig1 *
GCTTACGCGCAAGCGCCCCCATTTCATTAGAGCAGGGTTAGTAAGTTCTGGTATAAATAATCAACGATGCCGAGTTT
TTACGCGCAAGCGCCCCCATTTCATTAGA 24 0 0#
TACGCGCAAGCGCCCCCATTTCATTAGAG 24 0 0#
ACGCGCAAGCGCCCCCATTTCATTAGAGC 20 0 0#
CGCGCAAGCGCCCCCATTTCATTAGAGCA 26 0 0#

GTTAGTAAGTTCTGGTATAAATAATCAAC 6 1 0#
2 changes: 2 additions & 0 deletions kevlar/tests/data/iktest.gdna.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>scaf1_3000-3117
TTCGTTGACGCGAGCGCCTTGCTTACGCGCAAGCGCCCCCATTTCATTAGAGCAGGGTGAGTAAGTTCTGGTATAAATAATCAACGATGCCGAGTTTCGTCCGTTATCGCGACGGAT
17 changes: 17 additions & 0 deletions kevlar/tests/test_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,3 +442,20 @@ def test_call_truncated_windows(query, target, vw, rw):
print('RW:', calls[0].refrwindow, file=sys.stderr)
assert calls[0].window == vw
assert calls[0].refrwindow == rw


def test_call_num_interesting_kmers():
contig = next(
kevlar.parse_augmented_fastx(
kevlar.open(data_file('iktest.contig.fa'), 'r')
)
)
cutout = next(
kevlar.reference.load_refr_cutouts(
kevlar.open(data_file('iktest.gdna.fa'), 'r')
)
)
aln = kevlar.call.align_both_strands(cutout, contig)
calls = list(aln.call_variants(29))
assert len(calls) == 1
assert calls[0].info['IK'] == '1'

0 comments on commit 3a263ac

Please sign in to comment.