diff --git a/HISTORY.md b/HISTORY.md index 7e86bc925..6c206ccf0 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,8 @@ ## 1.1.1 (in progress) +- tumor-only prioritization: do not apply LowPriority filter by default, instead + annotate with external databases. Use `tumoronly_germline_filter` to re-enable + previous behavior. - gemini: databases no longer created by default. Use `tools_on: [gemini]` or `tools_on: [gemini_orig]` to create a database. - vcfanno: run gemini and somatic annotations by default, producing annotated diff --git a/bcbio/variation/prioritize.py b/bcbio/variation/prioritize.py index 5e6fcc07e..fa46074d2 100644 --- a/bcbio/variation/prioritize.py +++ b/bcbio/variation/prioritize.py @@ -15,11 +15,11 @@ import csv import re -from bcbio import install, utils +from bcbio import utils from bcbio.distributed.transaction import file_transaction from bcbio.pipeline import datadict as dd from bcbio.provenance import do -from bcbio.variation import population, vcfanno, vcfutils +from bcbio.variation import population, vcfutils geneimpacts = utils.LazyImport("geneimpacts") cyvcf2 = utils.LazyImport("cyvcf2") @@ -33,7 +33,7 @@ def handle_vcf_calls(vcf_file, data, orig_items): ann_vcf = population.run_vcfanno(vcf_file, data) if ann_vcf: priority_file = _prep_priority_filter_vcfanno(ann_vcf, data) - return _apply_priority_filter(vcf_file, priority_file, data) + return _apply_priority_filter(ann_vcf, priority_file, data) # No data available for filtering, return original file else: return vcf_file @@ -45,14 +45,19 @@ def _apply_priority_filter(in_file, priority_file, data): if not utils.file_exists(out_file): with file_transaction(data, out_file) as tx_out_file: header = ('##INFO=') + 'Description="Somatic prioritization based on external annotations, ' + 'identify as likely germline">') header_file = "%s-repeatheader.txt" % utils.splitext_plus(tx_out_file)[0] with open(header_file, "w") as out_handle: out_handle.write(header) + if "tumoronly_germline_filter" in dd.get_tools_on(data): + filter_cmd = ("bcftools filter -m '+' -s 'LowPriority' " + """-e "EPR[*] != 'pass'" |""") + else: + filter_cmd = "" cmd = ("bcftools annotate -a {priority_file} -h {header_file} " "-c CHROM,FROM,TO,REF,ALT,INFO/EPR {in_file} | " - "bcftools filter -m '+' -s 'LowPriority' " - """-e "EPR[*] != 'pass'" | bgzip -c > {tx_out_file}""") + "{filter_cmd} bgzip -c > {tx_out_file}") do.run(cmd.format(**locals()), "Run external annotation based prioritization filtering") vcfutils.bgzip_and_index(out_file, data["config"]) return out_file @@ -145,12 +150,10 @@ def _calc_priority_filter(row, pops): - Pass high/medium impact variants not found in population databases - Pass variants found in COSMIC or Clinvar provided they don't have two - additional reasons to filter (low severity or found in multiple external populations) + additional reasons to filter (found in multiple external populations) """ filters = [] passes = [] - if row.get("impact_severity") in ["LOW"]: - filters.append("lowseverity") passes.extend(_find_known(row)) filters.extend(_known_populations(row, pops)) if len(filters) == 0 or (len(passes) > 0 and len(filters) < 2): diff --git a/docs/contents/configuration.rst b/docs/contents/configuration.rst index 3cd475ba1..1a269ed96 100644 --- a/docs/contents/configuration.rst +++ b/docs/contents/configuration.rst @@ -1083,6 +1083,10 @@ lists with multiple options: - ``qualimap_full`` runs Qualimap with full bam files but it may be slow. - ``damage_filter`` annotates low frequency somatic calls in INFO/DKFZBias for DNA damage artifacts using `DKFZBiasFilter `_. + - ``tumoronly_germline_filter`` applies a ``LowPriority`` filter to tumor-only calls + that match population germline databases. The default is to just apply a tag + ``EPR`` (external prioritization) that flags variants present in external + databases. Anything missing a ``pass`` here is a likely germline. - ``vqsr`` makes GATK try quality score recalibration for variant filtration, even for smaller sample sizes. - ``svplots`` adds additional coverage and summary plots for CNVkit and detected