Skip to content

Commit

Permalink
Merge pull request #250 from dib-lab/all-seed-matches
Browse files Browse the repository at this point in the history
Reporting all seed matches
  • Loading branch information
standage committed Apr 20, 2018
2 parents 848ca20 + a35adc1 commit c01e378
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 16 deletions.
4 changes: 3 additions & 1 deletion kevlar/localize.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ def get_exact_matches(contigstream, bwaindexfile, seedsize=31):
(seqid, startpos) for each exact match found.
"""
kmers = unique_seed_string(contigstream, seedsize)
cmd = 'bwa mem -k {k} -T {k} {idx} -'.format(k=seedsize, idx=bwaindexfile)
cmd = 'bwa mem -k {k} -T {k} -a -c 5000 {idx} -'.format(
k=seedsize, idx=bwaindexfile
)
cmdargs = cmd.split(' ')
for seqid, pos in bwa_align(cmdargs, seqstring=kmers):
yield seqid, pos
Expand Down
25 changes: 12 additions & 13 deletions kevlar/tests/test_alac.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,11 @@ def test_alac_bigpart():
assert len(calls) == 3


@pytest.mark.parametrize('cc,numrawcalls', [
('26849', [3, 4, 5, 7]), # Assembly deterministic on OS X, but not Linux
('138713', [14]),
@pytest.mark.parametrize('cc,numrawcalls,numfiltcalls', [
('26849', 4, 2),
('138713', 14, 1),
])
def test_alac_inf_mate_dist(cc, numrawcalls):
def test_alac_inf_mate_dist(cc, numrawcalls, numfiltcalls):
readfile = data_file('inf-mate-dist/cc{}.augfastq.gz'.format(cc))
refrfile = data_file('inf-mate-dist/cc{}.genome.fa.gz'.format(cc))
readstream = kevlar.parse_augmented_fastx(kevlar.open(readfile, 'r'))
Expand All @@ -160,10 +160,9 @@ def test_alac_inf_mate_dist(cc, numrawcalls):
seedsize=51, fallback=True)
calls = list(caller)
print(*[c.vcf for c in calls], sep='\n', file=sys.stderr)
assert len(calls) in numrawcalls
assert len(calls) == numrawcalls
filtcalls = [c for c in calls if c.filterstr == 'PASS']
print(*[c.vcf for c in filtcalls], sep='\n', file=sys.stderr)
assert len(filtcalls) == 1
assert len(filtcalls) == numfiltcalls


def test_alac_no_mates():
Expand All @@ -175,16 +174,16 @@ def test_alac_no_mates():
seedsize=51, fallback=True)
calls = list(caller)
print(*[c.vcf for c in calls], sep='\n', file=sys.stderr)
assert len(calls) in [3, 4, 5, 7]
assert len(calls) == 4
filtcalls = [c for c in calls if c.filterstr == 'PASS']
assert len(filtcalls) == 2
assert len(filtcalls) == 3


@pytest.mark.parametrize('vcfposition,X,cigar', [
(40692, 10000, '30595D96M6I91M15137D'),
(40692, 1000, '37D96M6I91M44D'),
(40692, 0, '30595D96M6I91M132906D'),
(40692, None, '37D96M6I91M44D'),
(40692, 10000, '32713D96M6I91M15142D'),
(40692, 1000, '50D96M6I91M50D'),
(40692, 0, '32713D96M6I91M140025D'),
(40692, None, '50D96M6I91M50D'),
])
def test_alac_maxdiff(vcfposition, X, cigar):
pstream = kevlar.parse_partitioned_reads(
Expand Down
4 changes: 2 additions & 2 deletions kevlar/tests/test_localize.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,9 @@ def test_extract_regions_boundaries():
@pytest.mark.parametrize('X,numtargets', [
(100000, 1),
(10000, 5),
(1000, 23),
(1000, 33),
(0, 1),
(None, 23),
(None, 33),
])
def test_maxdiff(X, numtargets):
contigstream = kevlar.parse_augmented_fastx(
Expand Down

0 comments on commit c01e378

Please sign in to comment.