diff --git a/snappy_pipeline/workflows/targeted_seq_cnv_calling/__init__.py b/snappy_pipeline/workflows/targeted_seq_cnv_calling/__init__.py index 24433c324..1fe0cf083 100644 --- a/snappy_pipeline/workflows/targeted_seq_cnv_calling/__init__.py +++ b/snappy_pipeline/workflows/targeted_seq_cnv_calling/__init__.py @@ -41,8 +41,7 @@ - ``xhmm`` """ - -from collections import OrderedDict +from collections import OrderedDict, defaultdict import os import re @@ -76,6 +75,10 @@ # Set the max number of samples used in the CNV calling model. # If None is set, it will use all samples in the model. Recommended value: XXX model_max_size: null + # Set the offset for max number of samples used in the CNV calling model. + # Example: if max is 100 and offset is 50, as the number of samples available grow the max in + # the models will be: 100, 150, 200, 250, ... + model_max_size_offset: null # Path to the ngs_mapping step. path_ngs_mapping: ../ngs_mapping @@ -597,16 +600,40 @@ def _get_input_files_annotate_gc(wildcards): @dictify def _get_input_files_filter_intervals(self, wildcards): + """Yield input files for rule ``targeted_seq_cnv_calling_gcnv_filter_intervals``. + + Coverage files need to be generated for all samples for both CASE and COHORT mode. + Nevertheless, if the max number of samples per model is set, the number of coverage files + will be capped per library kit. + + :param wildcards: Snakemake wildcards associated with rule. + :type wildcards: snakemake.io.Wildcards + """ + # Initialise variables + library_counter_dict = defaultdict(lambda: 0) + max_model_size = self.get_adjusted_max_samples_in_model() + + # Yield annotate gc files - from rule `targeted_seq_cnv_calling_gcnv_annotate_gc` yield from self._get_input_files_annotate_gc(wildcards).items() name_pattern = "gcnv_annotate_gc.{wildcards.library_kit}".format(wildcards=wildcards) ext = "tsv" yield ext, "work/{name_pattern}/out/{name_pattern}.{ext}".format( name_pattern=name_pattern, ext=ext ) + # Yield coverage files - from rule `targeted_seq_cnv_calling_gcnv_coverage` key = "covs" covs = [] for lib in sorted(self.index_ngs_library_to_donor): if self.ngs_library_to_kit.get(lib) == wildcards.library_kit: + + # Update counter + library_counter_dict[wildcards.library_kit] += 1 + + # Evaluate if max required + if max_model_size and library_counter_dict[wildcards.library_kit] > max_model_size: + continue + + # Prepare and append name patterns name_pattern = "{mapper}.gcnv_coverage.{library_name}".format( mapper=wildcards.mapper, library_name=lib ) @@ -664,6 +691,11 @@ def _get_input_files_coverage(self, wildcards): @dictify def _get_input_files_call_cnvs_cohort_mode(self, wildcards): + """Yield input files for rule `targeted_seq_cnv_calling_gcnv_call_cnvs` in COHORT mode + + :param wildcards: Snakemake wildcards associated with rule. + :type wildcards: snakemake.io.Wildcards + """ path_pattern = ( "work/{name_pattern}/out/{name_pattern}/temp_{{shard}}/scattered.interval_list" ) @@ -691,6 +723,40 @@ def _get_input_files_call_cnvs_cohort_mode(self, wildcards): name_pattern=path_pattern, ext="tsv" ) + @dictify + def _get_input_files_call_cnvs_case_mode(self, wildcards): + """Yield input files for rule `targeted_seq_cnv_calling_gcnv_call_cnvs` in CASE mode + + :param wildcards: Snakemake wildcards associated with rule. + :type wildcards: snakemake.io.Wildcards + """ + # Initialise variables + tsv_ext = "tsv" + tsv_path_pattern = "{mapper}.gcnv_coverage.{library_name}" + ploidy_ext = "ploidy" + ploidy_path_pattern = "{mapper}.gcnv_contig_ploidy.{library_kit}" + scatter_path_pattern = ( + "work/{name_pattern}/out/{name_pattern}/temp_{{shard}}/scattered.interval_list" + ) + + # Yield coverage tsv files for all library associated with kit + for lib in sorted(self.index_ngs_library_to_donor): + if self.ngs_library_to_kit.get(lib) == wildcards.library_kit: + path_pattern = tsv_path_pattern.format(mapper=wildcards.mapper, library_name=lib) + yield tsv_ext, "work/{name_pattern}/out/{name_pattern}.{ext}".format( + name_pattern=path_pattern, ext=tsv_ext + ) + + # Yield ploidy files + path_pattern = ploidy_path_pattern.format(**wildcards) + yield ploidy_ext, "work/{name_pattern}/out/{name_pattern}/.done".format( + name_pattern=path_pattern + ) + + # Yield scatter files + name_pattern = "{mapper}.gcnv_scatter_intervals.{library_kit}" + yield "interval_list_shard", scatter_path_pattern.format(name_pattern=name_pattern) + @dictify def _get_input_files_post_germline_calls(self, wildcards, checkpoints): checkpoint = checkpoints.targeted_seq_cnv_calling_gcnv_scatter_intervals @@ -814,6 +880,17 @@ def _get_output_files_call_cnvs_cohort_mode(): ) ) + @staticmethod + @dictify + def _get_output_files_call_cnvs_case_mode(): + ext = "done" + name_pattern_calls = "{mapper}.gcnv_call_cnvs.{library_kit}.{shard}" + yield ext, touch( + "work/{name_pattern}/out/{name_pattern}/{{library_name}}/.{ext}".format( + name_pattern=name_pattern_calls, ext=ext + ) + ) + @staticmethod @dictify def _get_output_files_post_germline_calls(): @@ -870,10 +947,13 @@ def get_log_file(self, action): name_pattern = "{{mapper}}.gcnv_{action}.{{library_kit}}".format(action=action) return "work/{name_pattern}/log/{name_pattern}.log".format(name_pattern=name_pattern) elif action == "call_cnvs_cohort_mode": - name_pattern = "{{mapper}}.gcnv_call_cnvs.{{library_kit}}.{{shard}}".format( - action=action - ) + name_pattern = "{mapper}.gcnv_call_cnvs.{library_kit}.{shard}" return "work/{name_pattern}/log/{name_pattern}.log".format(name_pattern=name_pattern) + elif action == "call_cnvs_case_mode": + name_pattern = "{mapper}.gcnv_call_cnvs.{library_kit}.{shard}" + return "work/{name_pattern}/log/{name_pattern}_{{library_name}}.log".format( + name_pattern=name_pattern + ) else: name_pattern = "{{mapper}}.gcnv_{action}.{{library_name}}".format(action=action) return "work/{name_pattern}/log/{name_pattern}.log".format(name_pattern=name_pattern) @@ -911,6 +991,7 @@ def get_cnv_model_result_files(self, _unused): def get_run_mode(self, wildcards): """Get run mode + :param wildcards: Snakemake wildcards associated with rule. :type wildcards: snakemake.io.Wildcards @@ -927,6 +1008,40 @@ def get_run_mode(self, wildcards): else: return "COHORT" + def get_adjusted_max_samples_in_model(self): + """Get adjusted max samples in model + + Scenarios: + * If model_max_size is integer and offset is null: Model will at most have size + equal to model_max_size. + * If model_max_size is integer and offset is integer: Model will be rebuilt whenever + amount of samples in cohort is equal to (model_max_size + n_adjust * offset), + where n is an integer and it must be greater than zero: + n_adjust = floor( (total samples - model_max_size) / offset ) + + :return: Returns max number of samples used to build gCNV model adjusted by the offset. + """ + # Initialise variables + n_adjust = 0 + max_size = self.config["model_max_size"] + offset = self.config["model_max_size_offset"] + offset = offset if offset else 0 + _, _, kit_counts = self.parent.pick_kits_and_donors() # get list of library kits counts + max_kit_count = max(kit_counts.values()) + + # Return null if not defined + if not max_size: + return None + + # Define adjust factor + if offset: + n_adjust = (int(max_kit_count) - int(max_size)) // int(offset) + + # Calculate adjusted max + max_size_adjusted = int(max_size) + (n_adjust * int(offset)) + + return max_size_adjusted + def update_cluster_config(self, cluster_config): """Update cluster configuration for gCNV CNV calling""" for action in self.actions: @@ -1066,7 +1181,29 @@ def _yield_result_files(self, tpl, donors, **kwargs): def check_config(self): """Check that the necessary configuration is available for the step""" + # Initialise variable + message = "Argument for {par} must be an integer, received: '{value}'." + + # Check if path to BAM files is available self.ensure_w_config( config_keys=("step_config", "targeted_seq_cnv_calling", "path_ngs_mapping"), msg="Path to NGS mapping not configured but required for targeted seq. CNV calling", ) + + # Check max model values + model_max_size = self.w_config["step_config"]["targeted_seq_cnv_calling"]["model_max_size"] + if model_max_size: + try: + int(model_max_size) + except ValueError: + message = message.format(par="model max size", value=model_max_size) + raise ValueError(message) + + # Check max model values offset + m_offset = self.w_config["step_config"]["targeted_seq_cnv_calling"]["model_max_size_offset"] + if m_offset: + try: + int(m_offset) + except ValueError: + message = message.format(par="model max size offset", value=m_offset) + raise ValueError(message) diff --git a/snappy_pipeline/workflows/targeted_seq_cnv_calling/gcnv_case_mode.rules b/snappy_pipeline/workflows/targeted_seq_cnv_calling/gcnv_case_mode.rules index bacb8354b..d420d5f45 100644 --- a/snappy_pipeline/workflows/targeted_seq_cnv_calling/gcnv_case_mode.rules +++ b/snappy_pipeline/workflows/targeted_seq_cnv_calling/gcnv_case_mode.rules @@ -2,5 +2,67 @@ # gCNV needs many rules, thus they are in their own file. -# Include common rule -include: "gcnv_common.rules" +def targeted_seq_cnv_calling_gcnv_post_germline_calls_input(wildcards): + # Poor mans currying. + return wf.get_input_files("gcnv", "post_germline_calls")(wildcards, checkpoints) + + +def get_gcnv_scatter_intervals_input(): + return wf.substep_getattr("gcnv", "get_cnv_model_result_files")(None) + + +checkpoint targeted_seq_cnv_calling_gcnv_scatter_intervals: + input: + gcnv_model_wf(get_gcnv_scatter_intervals_input()), + output: + directory(wf.get_output_files("gcnv", "scatter_intervals")), + log: + wf.get_log_file("gcnv", "scatter_intervals"), + wrapper: + wf.wrapper_path("gcnv/scatter_intervals") + + +rule targeted_seq_cnv_calling_gcnv_call_cnvs: + input: + unpack(wf.get_input_files("gcnv", "call_cnvs_case_mode")), + output: + **wf. get_output_files("gcnv","call_cnvs_case_mode"), + log: + wf.get_log_file("gcnv", "call_cnvs_case_mode"), + wrapper: + wf.wrapper_path("gcnv/call_cnvs_case_mode") + + +rule targeted_seq_cnv_calling_gcnv_post_germline_calls: + input: + unpack(targeted_seq_cnv_calling_gcnv_post_germline_calls_input), + output: + **wf. get_output_files("gcnv","post_germline_calls"), + log: + wf.get_log_file("gcnv", "post_germline_calls"), + wrapper: + wf.wrapper_path("gcnv/post_germline_calls") + + +rule targeted_seq_cnv_calling_gcnv_merge_cohort_vcfs: + input: + wf.get_input_files("gcnv", "merge_cohort_vcfs"), + output: + **wf. get_output_files("gcnv","merge_cohort_vcfs"), + log: + wf.get_log_file("gcnv", "merge_cohort_vcfs"), + wrapper: + wf.wrapper_path("gcnv/merge_cohort_vcfs") + + +rule targeted_seq_cnv_calling_gcnv_extract_ped: + input: + unpack(wf.get_input_files("gcnv", "extract_ped")), + output: + **( wf. get_output_files("gcnv","extract_ped")), + log: + wf.get_log_file("gcnv", "extract_ped"), + params: + ped_members=wf.substep_getattr("gcnv", "get_ped_members"), + wrapper: + wf.wrapper_path("gcnv/extract_ped") diff --git a/snappy_wrappers/wrappers/gcnv/call_cnvs_case_mode/environment.yaml b/snappy_wrappers/wrappers/gcnv/call_cnvs_case_mode/environment.yaml new file mode 120000 index 000000000..2e107ac86 --- /dev/null +++ b/snappy_wrappers/wrappers/gcnv/call_cnvs_case_mode/environment.yaml @@ -0,0 +1 @@ +../environment.yaml \ No newline at end of file diff --git a/snappy_wrappers/wrappers/gcnv/call_cnvs_case_mode/wrapper.py b/snappy_wrappers/wrappers/gcnv/call_cnvs_case_mode/wrapper.py new file mode 100644 index 000000000..e27555e90 --- /dev/null +++ b/snappy_wrappers/wrappers/gcnv/call_cnvs_case_mode/wrapper.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +from snakemake.shell import shell + +print("snakemake.input =", vars(snakemake.input)) + +shell( + r""" +set -x +set -euo pipefail + +export TMPDIR=$(mktemp -d) +trap "rm -rf $TMPDIR" ERR EXIT + +export MKL_NUM_THREADS=16 +export OMP_NUM_THREADS=16 +export THEANO_FLAGS="base_compiledir=$TMPDIR/theano_compile_dir" + +gatk GermlineCNVCaller \ + --run-mode CASE \ + --input {snakemake.input.tsv} \ + --contig-ploidy-calls $(dirname {snakemake.input.ploidy})/ploidy-calls \ + --model $(dirname {snakemake.input.interval_list_shard})/ploidy-model \ + --output $(dirname {snakemake.output.done}) \ + --output-prefix cnv_calls +""" +) diff --git a/tests/snappy_pipeline/workflows/test_workflows_targeted_seq_cnv_calling.py b/tests/snappy_pipeline/workflows/test_workflows_targeted_seq_cnv_calling.py index e5d2d47d1..be6e19774 100644 --- a/tests/snappy_pipeline/workflows/test_workflows_targeted_seq_cnv_calling.py +++ b/tests/snappy_pipeline/workflows/test_workflows_targeted_seq_cnv_calling.py @@ -136,6 +136,28 @@ def minimal_config_large_cohort(minimal_config): return minimal_config_adjusted +@pytest.fixture +def minimal_config_large_cohort_with_offset(minimal_config): + """Returns minimum configuration with `model_max_size_offset` set to 50.""" + minimal_config_adjusted = copy.deepcopy(minimal_config) + minimal_config_adjusted["data_sets"]["first_batch"]["file"] = "sheet_large_cohort_trio.tsv" + minimal_config_adjusted["step_config"]["targeted_seq_cnv_calling"]["model_max_size"] = 100 + minimal_config_adjusted["step_config"]["targeted_seq_cnv_calling"]["model_max_size_offset"] = 50 + return minimal_config_adjusted + + +@pytest.fixture +def minimal_config_failure(minimal_config): + """Returns minimum configuration with invalid argument for `model_max_size_offset`.""" + minimal_config_adjusted = copy.deepcopy(minimal_config) + minimal_config_adjusted["data_sets"]["first_batch"]["file"] = "sheet_large_cohort_trio.tsv" + minimal_config_adjusted["step_config"]["targeted_seq_cnv_calling"]["model_max_size"] = 100 + minimal_config_adjusted["step_config"]["targeted_seq_cnv_calling"][ + "model_max_size_offset" + ] = "fifty" # should cause a failure in config check + return minimal_config_adjusted + + @pytest.fixture def targeted_seq_cnv_calling_workflow( dummy_workflow, @@ -192,6 +214,34 @@ def targeted_seq_cnv_calling_workflow_large_cohort( ) +@pytest.fixture +def targeted_seq_cnv_calling_workflow_large_cohort_with_offset( + dummy_workflow, + minimal_config_large_cohort_with_offset, + dummy_cluster_config, + config_lookup_paths, + work_dir, + config_paths, + germline_sheet_fake_fs2, + mocker, +): + """Return TargetedSeqCnvCallingWorkflow object pre-configured with germline sheet""" + # Patch out file-system related things in abstract (the crawling link in step is defined there) + patch_module_fs("snappy_pipeline.workflows.abstract", germline_sheet_fake_fs2, mocker) + # Update the "globals" attribute of the mock workflow (snakemake.workflow.Workflow) so we + # can obtain paths from the function as if we really had a NGSMappingPipelineStep here + dummy_workflow.globals = {"ngs_mapping": lambda x: "NGS_MAPPING/" + x} + # Construct the workflow object + return TargetedSeqCnvCallingWorkflow( + dummy_workflow, + minimal_config_large_cohort_with_offset, + dummy_cluster_config, + config_lookup_paths, + config_paths, + work_dir, + ) + + # Global tests ------------------------------------------------------------------------------------- @@ -207,12 +257,7 @@ def test_target_seq_cnv_calling_workflow_files(targeted_seq_cnv_calling_workflow pattern_out.format(i=i, tool=tool, ext=ext) for i in (1, 4) # only index: P001, P004 for tool in ("gcnv", "xhmm") - for ext in ( - "vcf.gz", - "vcf.gz.md5", - "vcf.gz.tbi", - "vcf.gz.tbi.md5", - ) + for ext in ("vcf.gz", "vcf.gz.md5", "vcf.gz.tbi", "vcf.gz.tbi.md5") ] expected = sorted(expected) # Get actual @@ -258,6 +303,34 @@ def test_pick_kits_and_donors( assert expected_kit_counts == kit_counts +def test_configure_checks( + dummy_workflow, + minimal_config_failure, + dummy_cluster_config, + config_lookup_paths, + work_dir, + config_paths, + germline_sheet_fake_fs2, + mocker, +): + """Tests TargetedSeqCnvCallingWorkflow.check_config() for invalid `model_max_size_offset`""" + # Patch out file-system related things in abstract (the crawling link in step is defined there) + patch_module_fs("snappy_pipeline.workflows.abstract", germline_sheet_fake_fs2, mocker) + # Update the "globals" attribute of the mock workflow (snakemake.workflow.Workflow) so we + # can obtain paths from the function as if we really had a NGSMappingPipelineStep here + dummy_workflow.globals = {"ngs_mapping": lambda x: "NGS_MAPPING/" + x} + # Construct the workflow object - should fail + with pytest.raises(ValueError): + TargetedSeqCnvCallingWorkflow( + dummy_workflow, + minimal_config_failure, + dummy_cluster_config, + config_lookup_paths, + config_paths, + work_dir, + ) + + # Global GcnvStepPart Tests ------------------------------------------------------------------------ @@ -311,6 +384,35 @@ def test_gcnv_get_cnv_model_result_files( assert actual == expected +def test_gcnv_get_adjusted_max_samples_in_model( + targeted_seq_cnv_calling_workflow, + targeted_seq_cnv_calling_workflow_large_cohort, + targeted_seq_cnv_calling_workflow_large_cohort_with_offset, +): + """Tests GcnvStepPart.get_adjusted_max_samples_in_model()""" + + # Test small cohort (6 samples) max_sample value is set to null + expected = None + actual = targeted_seq_cnv_calling_workflow.substep_getattr( + "gcnv", "get_adjusted_max_samples_in_model" + )() + assert actual == expected + + # Test large trio cohort (501 samples), max_sample value is set to 100 + expected = 100 + actual = targeted_seq_cnv_calling_workflow_large_cohort.substep_getattr( + "gcnv", "get_adjusted_max_samples_in_model" + )() + assert actual == expected + + # Test large trio cohort (501 samples), max_sample value is 100, and offset is 50 + expected = 500 + actual = targeted_seq_cnv_calling_workflow_large_cohort_with_offset.substep_getattr( + "gcnv", "get_adjusted_max_samples_in_model" + )() + assert actual == expected + + def test_gcnv_call_assertion(targeted_seq_cnv_calling_workflow): """Tests raise UnsupportedActionException""" with pytest.raises(UnsupportedActionException): @@ -413,8 +515,21 @@ def test_gcnv_annotate_gc_step_part_get_log_file(targeted_seq_cnv_calling_workfl # Tests for GcnvStepPart (filter_intervals) -------------------------------------------------------- -def test_gcnv_filter_intervals_step_part_get_input_files(targeted_seq_cnv_calling_workflow): +def test_gcnv_filter_intervals_step_part_get_input_files( + targeted_seq_cnv_calling_workflow, + targeted_seq_cnv_calling_workflow_large_cohort, + targeted_seq_cnv_calling_workflow_large_cohort_with_offset, +): """Tests GcnvStepPart._get_input_files_filter_intervals()""" + # Initialise variables + # Notes: + # - library kit defined in conftest: `germline_sheet_tsv` + # - mapper defined in `minimal_config` + wildcards = Wildcards( + fromdict={"mapper": "bwa", "library_kit": "Agilent_SureSelect_Human_All_Exon_V6"} + ) + + # Test small cohort - 6 samples # Define expected interval_list_out = ( "work/gcnv_preprocess_intervals.Agilent_SureSelect_Human_All_Exon_V6/out/" @@ -429,17 +544,35 @@ def test_gcnv_filter_intervals_step_part_get_input_files(targeted_seq_cnv_callin ) csv_list_out = [csv_pattern.format(i=i) for i in range(1, 7)] # P001 - P006 expected = {"interval_list": interval_list_out, "tsv": tsv_out, "covs": csv_list_out} - # Get actual. Notes: - # - library kit defined in conftest: `germline_sheet_tsv` - # - mapper defined in `minimal_config` - wildcards = Wildcards( - fromdict={"mapper": "bwa", "library_kit": "Agilent_SureSelect_Human_All_Exon_V6"} - ) + # Get actual actual = targeted_seq_cnv_calling_workflow.get_input_files("gcnv", "filter_intervals")( wildcards ) assert actual == expected + # Test larger cohort - 501 samples, max cohort size is 100 + # Define expected + expected = 100 + # Call and get actual + output = targeted_seq_cnv_calling_workflow_large_cohort.get_input_files( + "gcnv", "filter_intervals" + )(wildcards) + actual = len(output.get("covs")) + print(actual) + assert actual == expected, "Should return 100 coverage paths - max number of samples in model." + + # Test larger cohort - 501 samples, max cohort size is 100, and offset is 50 + # Define expected + expected = 500 + # Call and get actual + output = targeted_seq_cnv_calling_workflow_large_cohort_with_offset.get_input_files( + "gcnv", "filter_intervals" + )(wildcards) + actual = len(output.get("covs")) + print(actual) + message = "Should return 500 coverage paths - max number of samples in model ajusted by offset." + assert actual == expected, message + def test_gcnv_filter_intervals_step_part_get_output_files(targeted_seq_cnv_calling_workflow): """Tests GcnvStepPart._get_output_files_filter_intervals()""" @@ -1027,10 +1160,7 @@ def test_xhmm_zscore_center_step_part_get_input_files(targeted_seq_cnv_calling_w expected = [base_out] # Get actual wildcards = Wildcards( - fromdict={ - "mapper": "bwa", - "library_kit": "Agilent_SureSelect_Human_All_Exon_V6", - } + fromdict={"mapper": "bwa", "library_kit": "Agilent_SureSelect_Human_All_Exon_V6"} ) actual = targeted_seq_cnv_calling_workflow.get_input_files("xhmm", "zscore_center")(wildcards) assert actual == expected @@ -1085,10 +1215,7 @@ def test_xhmm_refilter_step_part_get_input_files(targeted_seq_cnv_calling_workfl } # Get actual wildcards = Wildcards( - fromdict={ - "mapper": "bwa", - "library_kit": "Agilent_SureSelect_Human_All_Exon_V6", - } + fromdict={"mapper": "bwa", "library_kit": "Agilent_SureSelect_Human_All_Exon_V6"} ) actual = targeted_seq_cnv_calling_workflow.get_input_files("xhmm", "refilter")(wildcards) assert actual == expected @@ -1132,10 +1259,7 @@ def test_xhmm_discover_step_part_get_input_files(targeted_seq_cnv_calling_workfl expected = {"center_zscore": center_zscore_out, "refilter_original": refilter_original_out} # Get actual wildcards = Wildcards( - fromdict={ - "mapper": "bwa", - "library_kit": "Agilent_SureSelect_Human_All_Exon_V6", - } + fromdict={"mapper": "bwa", "library_kit": "Agilent_SureSelect_Human_All_Exon_V6"} ) actual = targeted_seq_cnv_calling_workflow.get_input_files("xhmm", "discover")(wildcards) assert actual == expected @@ -1177,10 +1301,7 @@ def test_xhmm_genotype_step_part_get_input_files(targeted_seq_cnv_calling_workfl } # Get actual wildcards = Wildcards( - fromdict={ - "mapper": "bwa", - "library_kit": "Agilent_SureSelect_Human_All_Exon_V6", - } + fromdict={"mapper": "bwa", "library_kit": "Agilent_SureSelect_Human_All_Exon_V6"} ) actual = targeted_seq_cnv_calling_workflow.get_input_files("xhmm", "genotype")(wildcards) assert actual == expected