Skip to content

Commit

Permalink
New --min-like-score filter for simlike module (#343)
Browse files Browse the repository at this point in the history
This update allows users to adjust the threshold used to filter out low-scoring variant predictions. Closes #341.
  • Loading branch information
standage committed Jan 22, 2019
1 parent e899720 commit 519b595
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 4 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ This project adheres to [Semantic Versioning](http://semver.org/).
- A new Snakemake workflow for preprocessing BAM inputs for analysis with kevlar (see #305).
- A new Snakemake workflow for kevlar's standard processing procedure (see #306).
- New `unband` module to merge augmented Fastq files produced with a *k*-mer banding strategy (see #316).
- New `varfilter` module to filter out preliminary variant calls overlapping with problematic/unwanted loci or features (see #318).
- New `varfilter` module to filter out preliminary variant calls overlapping with problematic/unwanted loci or features (see #318, #342).
- New dependency: `intervaltree` package (see #318).
- A new `sandbox` directory with convenience scripts for development and analysis (see #335).
- A new `--min-like-score` filter for the `simlike` module (see #343).

### Changed
- Added a new flag to print to the terminal (stderr) and a logfile simultaneously (see #308).
Expand Down
6 changes: 6 additions & 0 deletions kevlar/cli/simlike.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ def subparser(subparsers):
'low abundance means < --case-min); by default, L=5; set L<=0 to '
'disable the filter'
)
filt_args.add_argument(
'--min-like-score', metavar='S', type=float, default=0.0,
help='filter out variant predictions with likelihood scores < S; by '
'default, S = 0.0, but it\'s often possible to improve specificity '
'without sacrificing sensitivity by raising S to, for example, 50.0'
)

misc_args = subparser.add_argument_group('Miscellaneous settings')
misc_args.add_argument('-h', '--help', action='help',
Expand Down
7 changes: 4 additions & 3 deletions kevlar/simlike.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,8 @@ def check_ctrl_abund_high(call, ctrlabundlists, ctrlmax, ctrlabundhigh):

def simlike(variants, case, controls, refr, mu=30.0, sigma=8.0, epsilon=0.001,
dynamic=True, casemin=6, ctrlmax=1, caseabundlow=5,
ctrlabundhigh=4, samplelabels=None, fastmode=False):
ctrlabundhigh=4, samplelabels=None, fastmode=False,
minlikescore=0.0):
calls_by_partition = defaultdict(list)
if samplelabels is None:
samplelabels = default_sample_labels(len(controls) + 1)
Expand Down Expand Up @@ -299,7 +300,7 @@ def simlike(variants, case, controls, refr, mu=30.0, sigma=8.0, epsilon=0.001,

allcalls.sort(key=lambda c: c.attribute('LIKESCORE'), reverse=True)
for call in allcalls:
if call.attribute('LIKESCORE') < 0.0:
if call.attribute('LIKESCORE') < minlikescore:
call.filter(kevlar.vcf.VariantFilter.LikelihoodFail)
yield call

Expand Down Expand Up @@ -335,7 +336,7 @@ def main(args):
epsilon=args.epsilon, dynamic=args.dynamic, casemin=args.case_min,
ctrlmax=args.ctrl_max, caseabundlow=args.case_abund_low,
ctrlabundhigh=args.ctrl_abund_high, samplelabels=args.sample_labels,
fastmode=args.fast_mode,
fastmode=args.fast_mode, minlikescore=args.min_like_score,
)
for call in calculator:
writer.write(call)
27 changes: 27 additions & 0 deletions kevlar/tests/test_simlike.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,3 +303,30 @@ def test_simlike_case_low_abund(casemin, abund, numfilt, caselowsketches):
print([c.filterstr for c in calls])
filtered = [c for c in calls if 'CaseAbundance' in c.filterstr]
assert len(filtered) == numfilt


def test_simlike_min_like_score(ctrlhighsketches):
kid, mom, dad, refr = ctrlhighsketches
vcfin = kevlar.open(data_file('ctrl-high-abund/cc57120.calls.vcf'), 'r')
prelimcalls = kevlar.vcf.VCFReader(vcfin)
scorer = kevlar.simlike.simlike(
prelimcalls, kid, [mom, dad], refr, samplelabels=['Kid', 'Mom', 'Dad'],
ctrlabundhigh=0, caseabundlow=0, minlikescore=0.0,
)
calls = list(scorer)
passing = [c for c in calls if c.filterstr == 'PASS']
notpassing = [c for c in calls if c.filterstr != 'PASS']
assert len(passing) == 1
assert len(notpassing) == 1

vcfin = kevlar.open(data_file('ctrl-high-abund/cc57120.calls.vcf'), 'r')
prelimcalls = kevlar.vcf.VCFReader(vcfin)
scorer = kevlar.simlike.simlike(
prelimcalls, kid, [mom, dad], refr, samplelabels=['Kid', 'Mom', 'Dad'],
ctrlabundhigh=0, caseabundlow=0, minlikescore=400.0,
)
calls = list(scorer)
passing = [c for c in calls if c.filterstr == 'PASS']
notpassing = [c for c in calls if c.filterstr != 'PASS']
assert len(passing) == 0
assert len(notpassing) == 2

0 comments on commit 519b595

Please sign in to comment.