Skip to content

Commit

Permalink
Adding variant_calling step part to compute BAF (#209)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Sep 15, 2022
1 parent 3fdf28f commit 8f97a80
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 0 deletions.
19 changes: 19 additions & 0 deletions snappy_pipeline/workflows/variant_calling/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,22 @@ rule variant_calling_jannovar_statistics_report:
wf.get_log_file("jannovar_statistics", "run"),
wrapper:
wf.wrapper_path("jannovar/statistics")


# Generate B allele fraction files --------------------------------------------


rule variant_calling_baf_file_generation:
input:
**wf.get_input_files("baf_file_generation", "run"),
output:
**wf.get_output_files("baf_file_generation", "run"),
threads: wf.get_resource("baf_file_generation", "run", "threads")
resources:
time=wf.get_resource("baf_file_generation", "run", "time"),
memory=wf.get_resource("baf_file_generation", "run", "memory"),
partition=wf.get_resource("baf_file_generation", "run", "partition"),
log:
wf.get_log_file("baf_file_generation", "run"),
wrapper:
wf.wrapper_path("baf_file_generation")
120 changes: 120 additions & 0 deletions snappy_pipeline/workflows/variant_calling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@
DEFAULT_CONFIG = r"""
# Default configuration variant_calling
step_config:
baf_file_generation:
min_ad: 10
enabled: false
variant_calling:
path_ngs_mapping: ../ngs_mapping # REQUIRED
tools: ['gatk_ug']
Expand Down Expand Up @@ -1022,6 +1025,76 @@ def get_resource_usage(self, action):
)


class BafFileGenerationStepPart(BaseStepPart):
"""Base class for computing B allele fraction files.
One file is generated per sample in the output VCF files.
"""

#: Step name
name = "baf_file_generation"

#: Class available actions
actions = ("run",)

def __init__(self, parent):
super().__init__(parent)
self.base_path_out = (
"work/{mapper}.{var_caller}.{index_ngs_library}/report/baf/"
"{mapper}.{var_caller}.{index_ngs_library}.{donor_ngs_library}"
)
# Build shortcut from index library name to pedigree
self.index_ngs_library_to_pedigree = OrderedDict()
for sheet in self.parent.shortcut_sheets:
self.index_ngs_library_to_pedigree.update(sheet.index_ngs_library_to_pedigree)

@dictify
def get_input_files(self, action):
"""Return path to input files"""
# Validate action
self._validate_action(action)
# Return path to input VCF file
yield "vcf", (
"work/{mapper}.{var_caller}.{index_ngs_library}/out/"
"{mapper}.{var_caller}.{index_ngs_library}.vcf.gz"
)

@dictify
def get_output_files(self, action):
"""Return output files that all germline variant calling sub steps must return (VCF +
TBI file)
"""
# Validate action
self._validate_action(action)
ext_names = {"bw": ".bw", "bw_md5": ".bw.md5"}
for key, ext in ext_names.items():
yield key, self.base_path_out + ext

def get_log_file(self, action):
# Validate action
self._validate_action(action)
return (
"work/{mapper}.{var_caller}.{index_ngs_library}/log/baf/"
"{mapper}.{var_caller}.{index_ngs_library}.{donor_ngs_library}.log"
)

def get_resource_usage(self, action):
"""Get Resource Usage
:param action: Action (i.e., step) in the workflow, example: 'run'.
:type action: str
:return: Returns ResourceUsage for step.
"""
# Validate action
self._validate_action(action)
return ResourceUsage(
threads=1,
time="02:00:00",
memory="1024M",
)


class VariantCallingWorkflow(BaseStep):
"""Perform germline variant calling"""

Expand Down Expand Up @@ -1055,6 +1128,7 @@ def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir)
VarscanStepPart,
BcftoolsStatsStepPart,
JannovarStatisticsStepPart,
BafFileGenerationStepPart,
LinkOutStepPart,
)
)
Expand Down Expand Up @@ -1101,6 +1175,8 @@ def get_result_files(self):
)
# Yield report files
yield from self._yield_bcftools_report_files()
if self.w_config["step_config"]["baf_file_generation"]["enabled"]:
yield from self._yield_baf_file_generation_files()
yield from self._yield_jannovar_report_files()

def _yield_result_files(self, tpl, **kwargs):
Expand Down Expand Up @@ -1171,6 +1247,50 @@ def _yield_bcftools_report_files(self):
ext=["txt", "txt.md5"],
)

def _yield_baf_file_generation_files(self):
name_pattern = "{mapper}.{caller}.{index_library.name}"
tpl = (
"output/"
+ name_pattern
+ "/report/baf/"
+ name_pattern
+ ".{donor_library.name}.baf.{ext}"
)
for sheet in filter(is_not_background, self.shortcut_sheets):
for caller in self.config["tools"]:
if self.config[caller].get("pedigree_wise", True):
for pedigree in sheet.cohort.pedigrees:
if not pedigree.index:
msg = "INFO: pedigree without index (names: {})" # pragma: no cover
print(
msg.format( # pragma: no cover
list(sorted(d.name for d in pedigree.donors))
),
file=sys.stderr,
)
continue # pragma: no cover
elif not pedigree.index.dna_ngs_library: # pragma: no cover
msg = "INFO: pedigree index DNA NGS library (names: {})"
print(
msg.format( # pragma: no cover
list(sorted(d.name for d in pedigree.donors))
),
file=sys.stderr,
)
continue # pragma: no cover
for donor in pedigree.donors:
if donor.dna_ngs_library:
yield from expand(
tpl,
mapper=self.w_config["step_config"]["ngs_mapping"]["tools"][
"dna"
],
caller=[caller],
index_library=[pedigree.index.dna_ngs_library],
donor_library=[donor.dna_ngs_library],
ext=["bw", "bw.md5"],
)

def _yield_jannovar_report_files(self):
name_pattern = "{mapper}.{caller}.{index_library.name}"
tpl = "output/" + name_pattern + "/report/jannovar_statistics/" + name_pattern + ".{ext}"
Expand Down
7 changes: 7 additions & 0 deletions snappy_wrappers/wrappers/baf_file_generation/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
channels:
- conda-forge
- bioconda
dependencies:
- bcftools ==1.15.1
- htslib ==1.15.1
- ucsc-wigtobigwig
43 changes: 43 additions & 0 deletions snappy_wrappers/wrappers/baf_file_generation/wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-

from snakemake.shell import shell

shell(
r"""
# -----------------------------------------------------------------------------
# Redirect stderr to log file by default and enable printing executed commands
exec 2> >(tee -a "{snakemake.log.log}")
set -x
# -----------------------------------------------------------------------------
export TMPDIR=$(mktemp -d)
trap "rm -rf $TMPDIR" ERR EXIT
bcftools query \
-s {snakemake.wildcards.donor_ngs_library} \
-f '%CHROM\t%POS[\t%DP\t%AD]\n' \
{snakemake.input.vcf} \
| awk -F $'\t' 'BEGIN {{ OFS=FS; prev=0; }}
{{ if (prev != $1) {{
printf("variableStep chrom=%s\n", $1, span);
}} else {{
split($4, a, ",");
dp = $3;
rd = a[1];
printf("%s\t%f\n", old2, (dp - rd) / dp);
}}
old2=$2;
old3=$3;
old4=$4;
prev=$1;
}}' \
> $TMPDIR/tmp.wig
cut -f 1-2 {snakemake.config[static_data_config][reference][path]}.fai \
> $TMPDIR/chrom.sizes
wigToBigWig $TMPDIR/tmp.wig $TMPDIR/chrom.sizes {snakemake.output.bw}
pushd $(dirname {snakemake.output.bw})
md5sum $(basename {snakemake.output.bw}) >$(basename {snakemake.output.bw_md5})
"""
)
21 changes: 21 additions & 0 deletions tests/snappy_pipeline/workflows/test_workflows_variant_calling.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def minimal_config():
path: /path/to/dbsnp.vcf.gz
step_config:
baf_file_generation:
enabled: true
ngs_mapping:
tools:
dna: ['bwa']
Expand Down Expand Up @@ -701,6 +703,7 @@ def test_variant_calling_workflow(variant_calling_workflow):
"""Tests simple functionality of the workflow."""
# Check created sub steps
expected = [
"baf_file_generation",
"bcftools",
"bcftools_stats",
"freebayes",
Expand Down Expand Up @@ -794,6 +797,24 @@ def test_variant_calling_workflow(variant_calling_workflow):
)
for ext in ("txt", "txt.md5")
]
tpl = (
"output/{mapper}.{var_caller}.P00{i}-N1-DNA1-WGS1/report/"
"baf/{mapper}.{var_caller}.P00{i}-N1-DNA1-WGS1.P00{t}-N1-DNA1-WGS1.baf.{ext}"
)
expected += [
tpl.format(mapper=mapper, var_caller=var_caller, i=i, t=t, ext=ext)
for i, t in ((1, 1), (1, 2), (1, 3), (4, 4), (4, 5), (4, 6))
for mapper in ("bwa",)
for var_caller in (
"bcftools",
"freebayes",
"gatk_hc",
"gatk_hc_gvcf",
"gatk_ug",
"platypus",
)
for ext in ("bw", "bw.md5")
]
tpl = "output/{mapper}.{var_caller}.whole_cohort/out/{mapper}.{var_caller}.whole_cohort.{ext}"
expected += [
tpl.format(mapper=mapper, var_caller=var_caller, ext=ext)
Expand Down

0 comments on commit 8f97a80

Please sign in to comment.