Skip to content

Commit

Permalink
WIP: unit tests for ParallelBaseWrapper ...
Browse files Browse the repository at this point in the history
  • Loading branch information
eudesbarbosa committed Feb 23, 2022
1 parent c199440 commit 07ba87d
Show file tree
Hide file tree
Showing 2 changed files with 193 additions and 1 deletion.
42 changes: 42 additions & 0 deletions tests/snappy_wrappers/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from collections import namedtuple
from unittest.mock import MagicMock

from pyfakefs import fake_filesystem
import pytest


@pytest.fixture
def fai_file_content():
"""Returns FAI file content based on hs37d5 (chromosome 1 only)."""
return "1\t249250621\t52\t60\t61"


@pytest.fixture
def fake_fs():
"""Return ``namedtuple`` with fake file system objects."""
klass = namedtuple("FakeFsBundle", "fs os open inter_process_lock")
fake_fs = fake_filesystem.FakeFilesystem()
fake_os = fake_filesystem.FakeOsModule(fake_fs)
fake_open = fake_filesystem.FakeFileOpen(fake_fs)
fake_lock = MagicMock()
return klass(fs=fake_fs, os=fake_os, open=fake_open, inter_process_lock=fake_lock)


@pytest.fixture
def somatic_variant_fake_fs(fake_fs, fai_file_content):
"""Return fake file system setup with files for the somatic variant calling workflow."""
# Create work directory
fake_fs.fs.makedirs("/work", exist_ok=True)
# Create static files
fake_fs.fs.create_file("/path/to/ref.fa", create_missing_dirs=True)
fake_fs.fs.create_file("/path/to/ref.fa.fai", contents=fai_file_content)
return fake_fs


def patch_module_fs(module_name, fake_fs, mocker):
"""Helper function to mock out the file-system related things in the module with the given
name using the given fake_fs and pytest-mock mocker
"""
mocker.patch("{}.os".format(module_name), fake_fs.os)
mocker.patch("{}.open".format(module_name), fake_fs.open, create=True)
mocker.patch("{}.os".format(module_name), fake_fs.os)
152 changes: 151 additions & 1 deletion tests/snappy_wrappers/test_wrapper_parallel.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import textwrap

import pytest
import ruamel.yaml as yaml
from snakemake.io import InputFiles, Log, OutputFiles, Params, Resources, Wildcards
from snakemake.script import Snakemake

from snappy_wrappers.genome_regions import GenomeRegion
from snappy_wrappers.wrapper_parallel import (
ParallelBaseWrapper,
ParallelSomaticVariantCallingBaseWrapper,
days,
gib,
hours,
Expand All @@ -12,13 +17,90 @@
minutes,
)

from .conftest import patch_module_fs


class FakeParallelMutect2Wrapper(ParallelSomaticVariantCallingBaseWrapper):
"""Fake parallel execution of Mutect2.
It is used among other things as a way to test the class ``ParallelBaseWrapper``.
"""

inner_wrapper = "mutect2/run"
step_name = "somatic_variant_calling"
tool_name = "mutect2"


@pytest.fixture(scope="module") # otherwise: performance issues
def minimal_config():
"""Return YAML parsing result for (germline) configuration"""
return yaml.round_trip_load(
textwrap.dedent(
r"""
static_data_config:
reference:
path: /path/to/ref.fa
cosmic:
path: /path/to/cosmic.vcf.gz
dbsnp:
path: /path/to/dbsnp.vcf.gz
step_config:
ngs_mapping:
tools:
dna: ['bwa']
compute_coverage_bed: true
path_target_regions: /path/to/regions.bed
bwa:
path_index: /path/to/bwa/index.fa
somatic_variant_calling:
tools:
- mutect2
mutect2:
panel_of_normals: '' # Set path to panel of normals vcf if required
germline_resource: REQUIRED # Germline variants resource (same as panel of normals)
common_variants: REQUIRED # Common germline variants for contamination estimation
# Parallelization configuration
drmaa_snippet: '' # value to pass in as additional DRMAA arguments
num_cores: 2 # number of cores to use locally
window_length: 50000000 # split input into windows of this size, each triggers a job
num_jobs: 500 # number of windows to process in parallel
use_drmaa: true # use DRMAA for parallel processing
restart_times: 5 # number of times to re-launch jobs in case of failure
max_jobs_per_second: 2 # throttling of job creation
max_status_checks_per_second: 10 # throttling of status checks
debug_trunc_tokens: 0 # truncation to first N tokens (0 for none)
keep_tmpdir: never # keep temporary directory, {always, never, onerror}
job_mult_memory: 1 # memory multiplier
job_mult_time: 1 # running time multiplier
merge_mult_memory: 1 # memory multiplier for merging
merge_mult_time: 1 # running time multiplier for merging
ignore_chroms: # patterns of chromosome names to ignore
- NC_007605 # herpes virus
- hs37d5 # GRCh37 decoy
- chrEBV # Eppstein-Barr Virus
- '*_decoy' # decoy contig
- 'HLA-*' # HLA genes
- 'GL000220.*' # Contig with problematic, repetitive DNA in GRCh37
data_sets:
first_batch:
file: sheet.tsv
search_patterns:
- {'left': '*/*/*_R1.fastq.gz', 'right': '*/*/*_R2.fastq.gz'}
search_paths: ['/path']
type: matched_cancer
naming_scheme: only_secondary_id
"""
).lstrip()
)


@pytest.fixture
def snakemake_obj():
def snakemake_obj(minimal_config):
"""Returns Snakemake object."""
# Define helper variables
rule_name = "somatic_variant_calling_mutect2_run"
threads = 2
bench_iteration = 2
scriptdir = "/work"
input_dict = {
"tumor_bai": "NGS_MAPPING/output/bwa.P001-T1-DNA1-WGS1/out/bwa.P001-T1-DNA1-WGS1.bam.bai",
Expand Down Expand Up @@ -55,19 +137,31 @@ def snakemake_obj():
params_ = Params(fromdict=params_dict)
log_ = Log(fromdict=log_dict)
wildcards_ = Wildcards(fromdict=wildcards_dict)
resources_ = Resources(fromdict={})

return Snakemake(
rulename=rule_name,
threads=threads,
bench_iteration=bench_iteration,
input_=input_,
output=output_,
log=log_,
params=params_,
wildcards=wildcards_,
config=minimal_config,
scriptdir=scriptdir,
resources=resources_,
)


@pytest.fixture
def fake_parallel_wrapper(snakemake_obj, somatic_variant_fake_fs, mocker):
"""Returns FakeParallelMutect2Wrapper object."""
# Patch out file-system
patch_module_fs("snappy_wrappers.wrapper_parallel", somatic_variant_fake_fs, mocker)
return FakeParallelMutect2Wrapper(snakemake=snakemake_obj)


# Test isolated methods ----------------------------------------------------------------------------


Expand Down Expand Up @@ -168,3 +262,59 @@ def test_days():


# Test ParallelBaseWrapper -----------------------------------------------------------------------


def test_parallel_base_wrapper_get_fai_path(fake_parallel_wrapper):
"""Tests ParallelBaseWrapper.get_fai_path()"""
# Define expected - defined in `minimal_config`: static_data_config/reference/path
expected = "/path/to/ref.fa.fai"
# Get actual and assert
actual = fake_parallel_wrapper.get_fai_path()
assert actual == expected


def test_parallel_base_wrapper_get_all_log_files(fake_parallel_wrapper):
"""Tests ParallelBaseWrapper.get_all_log_files()"""
# Define expected - defined in snakemake.log
base_path = "/work/bwa.mutect2.P001-T1-DNA1-WGS1/log/bwa.mutect2.P001-T1-DNA1-WGS1"
expected = {
"conda_info": base_path + ".conda_info.txt",
"conda_info_md5": base_path + ".conda_info.txt.md5",
"conda_list": base_path + ".conda_list.txt",
"conda_list_md5": base_path + ".conda_list.txt.md5",
}
# Get actual and assert
actual = fake_parallel_wrapper.get_all_log_files()
assert actual == expected


def test_parallel_base_wrapper_get_regions(fake_parallel_wrapper):
"""Tests ParallelBaseWrapper.get_regions()"""
# Define expected
genome_region_chr1_list = [
(0, 50010000),
(49990000, 100010000),
(99990000, 150010000),
(149990000, 200010000),
(199990000, 249250621),
]
expected = [
GenomeRegion(chrom="1", begin=region[0], end=region[1])
for region in genome_region_chr1_list
]
# Get actual and assert
actual = fake_parallel_wrapper.get_regions()
assert actual == expected


# def test_parallel_somatic_variant_calling_construct_parallel_rules(
# snakemake_obj, somatic_variant_fake_fs, mocker
# ):
# """Tests ParallelSomaticVariantCallingBaseWrapper.construct_parallel_rules()"""
#
# # Patch out file-system
# patch_module_fs("snappy_wrappers.wrapper_parallel", somatic_variant_fake_fs, mocker)
# svc_parallel = FakeParallelMutect2Wrapper(snakemake=snakemake_obj)
# actual = list(svc_parallel.construct_parallel_rules())
# print(actual)
# assert False

0 comments on commit 07ba87d

Please sign in to comment.