diff --git a/HISTORY.md b/HISTORY.md
index 04240203b..5cac36cd8 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,6 +1,7 @@
## 1.0.5 (in progress)
- GATK4: fix option usage for gVCF creation with HaplotypeCaller
+- Add `tools_off: [lumpy-genotype]` option to skip Lumpy genotyping.
- CWL/WDL: use single file tarballs for complex collections of files like
aligner, RTG and snpEff indices.
diff --git a/bcbio/structural/lumpy.py b/bcbio/structural/lumpy.py
index 7746ab153..605463203 100644
--- a/bcbio/structural/lumpy.py
+++ b/bcbio/structural/lumpy.py
@@ -148,6 +148,8 @@ def run(items):
data["config"])
if "bnd-genotype" in dd.get_tools_on(data):
gt_vcf = _run_svtyper(sample_vcf, dd.get_align_bam(data), exclude_file, data)
+ elif "lumpy-genotype" in dd.get_tools_off(data):
+ gt_vcf = sample_vcf
else:
std_vcf, bnd_vcf = _split_breakends(sample_vcf, data)
std_gt_vcf = _run_svtyper(std_vcf, dd.get_align_bam(data), exclude_file, data)
@@ -223,7 +225,10 @@ def run_svtyper_prioritize(call):
"""Run svtyper on prioritized outputs, adding in typing for breakends skipped earlier.
"""
def _run(in_file, work_dir, data):
- return _run_svtyper(in_file, dd.get_align_bam(data), call.get("exclude_file"), data)
+ if "lumpy-genotype" in dd.get_tools_off(data):
+ return in_file
+ else:
+ return _run_svtyper(in_file, dd.get_align_bam(data), call.get("exclude_file"), data)
return _run
def _run_svtyper(in_file, full_bam, exclude_file, data):
diff --git a/docs/contents/configuration.rst b/docs/contents/configuration.rst
index 75eab443e..b847b5704 100644
--- a/docs/contents/configuration.rst
+++ b/docs/contents/configuration.rst
@@ -892,42 +892,55 @@ lists with multiple options:
- ``tools_off`` Specify third party tools to skip as part of analysis
pipeline. Enables turning off specific components of pipelines if not
- needed. ``gemini`` avoids creation of a `GEMINI database`_ of variants for downstream
- query during variant calling pipelines. Also skips vcfanno annotation unless
- turned on explicitly with ``vcfanno`` in :ref:`config-variant-annotation`.
- ``vardict_somatic_filter`` disables
- running a post calling filter for VarDict to remove variants found in normal
- samples. Without ``vardict_somatic_filter`` in paired analyses no soft
- filtering of germline variants is performed but all high quality variants pass.
- ``bwa-mem`` forces use of original ``bwa aln`` alignment. Without this,
- we use bwa mem with 70bp or longer reads. ``fastqc`` turns off quality
- control FastQC usage. ``pbgzip`` turns off use of bgzip with multiple threads.
- ``seqcluster`` turns off use of
- seqcluster tool in srnaseq pipeline. ``tumoronly-prioritization`` turns off
- attempted removal of germline variants from tumor only calls using external
- population data sources like ExAC and 1000 genomes.
- ``vqsr`` turns off variant quality score recalibration for all samples.
- ``upload_alignment`` turns off final upload of large alignment files.
-- ``tools_on`` Specify functionality to enable that is off by default.
- ``svplots`` adds additional coverage and summary plots for CNVkit and detected
- ensemble variants. ``qualimap`` runs `Qualimap
- `_ (qualimap uses downsampled files and
- numbers here are an estimation of 1e7 reads.). ``qualimap_full`` uses the full
- bam files but it may be slow. ``bwa-mem`` forces use of bwa mem even for
- samples with less than 70bp reads. ``bnd-genotype`` enables genotyping
- of breakends in Lumpy calls, which improves accuracy but can be slow. ``gvcf``
- forces gVCF output for callers that support it (GATK HaplotypeCaller,
- FreeBayes, Platypus). ``vqsr`` makes GATK try quality score recalibration
- for variant filtration, even for smaller sample sizes.
- ``vep_splicesite_annotations`` enables the use of the MaxEntScan and SpliceRegion plugin for VEP.
- Both optional plugins add extra splice site annotations.
- ``gemini_allvariants`` enables all variants to go into GEMINI, not only those
- that pass filters. ``vcf2db_expand`` decompresses and expands the genotype columns in
- the vcfanno prepared GEMINI databases, enabling standard SQL queries on
- genotypes and depths. ``damage_filter`` annotates low frequency somatic calls
- in INFO/DKFZBias for DNA damage artifacts using
- `DKFZBiasFilter `_.
- ``lumpy_usecnv`` uses input calls from CNVkit as prior evidence to Lumpy calling.
+ needed:
+
+ - ``gemini`` avoids creation of a `GEMINI database`_ of variants for
+ downstream query during variant calling pipelines. Also skips vcfanno
+ annotation unless turned on explicitly with ``vcfanno`` in
+ :ref:`config-variant-annotation`.
+ - ``vqsr`` turns off variant quality score recalibration for all samples.
+ - ``bwa-mem`` forces use of original ``bwa aln`` alignment. Without this, we
+ use bwa mem with 70bp or longer reads. ``fastqc`` turns off quality control
+ FastQC usage.
+ - ``lumpy-genotype`` skip genotyping for Lumpy samples, which can be slow in
+ the case of many structural variants.
+ - ``seqcluster`` turns off use of seqcluster tool in srnaseq pipeline.
+ - ``tumoronly-prioritization`` turns off attempted removal of germline
+ variants from tumor only calls using external population data sources like
+ ExAC and 1000 genomes.
+ - ``vardict_somatic_filter`` disables running a post calling filter for
+ VarDict to remove variants found in normal samples. Without
+ ``vardict_somatic_filter`` in paired analyses no soft filtering of germline
+ variants is performed but all high quality variants pass.
+ - ``upload_alignment`` turns off final upload of large alignment files.
+ - ``pbgzip`` turns off use of bgzip with multiple threads.
+
+- ``tools_on`` Specify functionality to enable that is off by default:
+
+ - ``qualimap`` runs `Qualimap `_ (qualimap
+ uses downsampled files and numbers here are an estimation of 1e7 reads.).
+ - ``qualimap_full`` runs Qualimap with full bam files but it may be slow.
+ - ``damage_filter`` annotates low frequency somatic calls in INFO/DKFZBias for
+ DNA damage artifacts using `DKFZBiasFilter `_.
+ - ``vqsr`` makes GATK try quality score recalibration for variant filtration,
+ even for smaller sample sizes.
+ - ``svplots`` adds additional coverage and summary plots for CNVkit and detected
+ ensemble variants.
+ - ``bwa-mem`` forces use of bwa mem even for samples with less than 70bp
+ reads.
+ - ``gvcf`` forces gVCF output for callers that support it (GATK
+ HaplotypeCaller, FreeBayes, Platypus).
+ - ``vep_splicesite_annotations`` enables the use of the MaxEntScan and
+ SpliceRegion plugin for VEP. Both optional plugins add extra splice site
+ annotations.
+ - ``gemini_allvariants`` enables all variants to go into GEMINI, not only
+ those that pass filters.
+ - ``vcf2db_expand`` decompresses and expands the genotype columns in the
+ vcfanno prepared GEMINI databases, enabling standard SQL queries on
+ genotypes and depths.
+ - ``bnd-genotype`` enables genotyping of breakends in Lumpy calls, which
+ improves accuracy but can be slow.
+ - ``lumpy_usecnv`` uses input calls from CNVkit as prior evidence to Lumpy calling.
.. _GEMINI database: https://github.com/arq5x/gemini