Skip to content

Commit

Permalink
Merge pull request #263 from dib-lab/fix/mate-labeling
Browse files Browse the repository at this point in the history
New strategy for using pairing info
  • Loading branch information
standage committed Jun 12, 2018
2 parents dc316ba + 82a2dfb commit 997e37a
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 15 deletions.
2 changes: 0 additions & 2 deletions kevlar/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,6 @@ def prelim_call(targetlist, querylist, match=1, mismatch=2, gapopen=5,
for varcall in alignment.call_variants(ksize, mindist, logstream):
if alignment.matedist:
varcall.annotate('MATEDIST', alignment.matedist)
if n > 0:
varcall.filter(vf.MateFail)
yield varcall


Expand Down
2 changes: 1 addition & 1 deletion kevlar/cli/dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def subparser(subparsers):
'abundance distribution to file `PNG`')
subparser.add_argument('--tsv', metavar='TSV', help='write k-mer '
'abundance distribution out to file formatted as '
'tab-separated values`')
'tab-separated values')
subparser.add_argument('--plot-xlim', metavar=('MIN', 'MAX'), type=int,
nargs=2, default=(0, 100), help='define the '
'minimum and maximum x values (k-mer abundance) '
Expand Down
36 changes: 26 additions & 10 deletions kevlar/simlike.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,16 +177,36 @@ def annotate_abundances(call, abundances, samplelabels):
call.format(sample, 'ALTABUND', abundstr)


def process_partition(partitionid, calls):
maxscore = max([c.attribute('LIKESCORE') for c in calls])
maxcalls = list()
for call in calls:
if call.attribute('LIKESCORE') == maxscore:
maxcalls.append(call)
else:
call.filter(kevlar.vcf.VariantFilter.PartitionScore)
for call in maxcalls:
call.annotate('CALLCLASS', partitionid)
if calls[0].attribute('MATEDIST') is not None:
matedists = set([c.attribute('MATEDIST') for c in calls])
if matedists == set([float('inf')]):
for call in calls:
call.filter(kevlar.vcf.VariantFilter.MateFail)


def simlike(variants, case, controls, refr, mu=30.0, sigma=8.0, epsilon=0.001,
casemin=5, samplelabels=None, logstream=sys.stderr):
calls_by_partition = defaultdict(list)
if samplelabels is None:
samplelabels = default_sample_labels(len(controls) + 1)
for call in variants:
if len(call.window) < case.ksize():
message = 'WARNING: stubbornly refusing to compute likelihood '
message += ' for variant-spanning window {:s}'.format(call.window)
message += ', shorter than k size {:s}'.format(case.ksize())
if call.window is None or len(call.window) < case.ksize():
message = 'WARNING: stubbornly refusing to compute likelihood for '
if call.window is None:
message += 'variant with no spanning window'
else:
message += 'variant-spanning window {:s}'.format(call.window)
message += ', shorter than k size {:d}'.format(case.ksize())
print(message, file=logstream)
call.annotate('LIKESCORE', float('-inf'))
calls_by_partition[call.attribute('PART')].append(call)
Expand All @@ -204,12 +224,8 @@ def simlike(variants, case, controls, refr, mu=30.0, sigma=8.0, epsilon=0.001,

allcalls = list()
for partition, calls in calls_by_partition.items():
scores = [c.attribute('LIKESCORE') for c in calls]
maxscore = max(scores)
for call, score in zip(calls, scores):
if score < maxscore:
call.filter(kevlar.vcf.VariantFilter.PartitionScore)
allcalls.append(call)
process_partition(partition, calls)
allcalls.extend(calls)

allcalls.sort(key=lambda c: c.attribute('LIKESCORE'), reverse=True)
for call in allcalls:
Expand Down
4 changes: 2 additions & 2 deletions kevlar/tests/test_alac.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ def test_alac_matedist():
calls = list(caller)
assert len(calls) == 3
passed = [c for c in calls if c.filterstr == 'PASS']
assert len(passed) == 1
assert passed[0].position == 127541 - 1
assert len(passed) == 3
assert sorted([c.position for c in passed]) == [1475, 115377, 127540]


def test_alac_nomates():
Expand Down

0 comments on commit 997e37a

Please sign in to comment.