WIP: adjust scatter intervals wrapper...

bihealth · Nov 4, 2021 · d41089d · d41089d
1 parent 8910805
commit d41089d
Show file tree

Hide file tree

Showing 4 changed files with 61 additions and 14 deletions.
diff --git a/snappy_pipeline/workflows/targeted_seq_cnv_calling/__init__.py b/snappy_pipeline/workflows/targeted_seq_cnv_calling/__init__.py
@@ -963,6 +963,19 @@ def get_cnv_model_result_files(self, _unused):
 
         :return: Returns list of result files for the gCNV build model sub-workflow.
         """
+        output_list = []
+        resul_files_dict = self.get_cnv_model_result_files_as_dict(None)
+        for v in resul_files_dict.values():
+            output_list.extend(v)
+        return output_list
+
+    @dictify
+    def get_cnv_model_result_files_as_dict(self, _unused):
+        """Get gCNV model results as dictionary.
+
+        :return: Returns dictionary of result files for the gCNV build model sub-workflow.
+        Keys: 'interval_list', 'ploidy'.
+        """
         # Initialise variables
         name_pattern_contig_ploidy = (
             "work/{mapper}.gcnv_contig_ploidy.{library_kit}/out/"
@@ -978,16 +991,18 @@ def get_cnv_model_result_files(self, _unused):
 
         if "gcnv" in self.config["tools"]:
             chosen_kits = [kit for kit in library_kits if kit_counts.get(kit, 0) > MIN_KIT_SAMPLES]
-            yield from expand(
+            ploidy_exp = expand(
                 name_pattern_contig_ploidy,
                 mapper=self.w_config["step_config"]["ngs_mapping"]["tools"]["dna"],
                 library_kit=chosen_kits,
             )
-            yield from expand(
+            yield "ploidy", ploidy_exp
+            filter_exp = expand(
                 name_pattern_filter_intervals,
                 mapper=self.w_config["step_config"]["ngs_mapping"]["tools"]["dna"],
                 library_kit=chosen_kits,
             )
+            yield "interval_list", filter_exp
 
     def get_run_mode(self, wildcards):
         """Get run mode

diff --git a/snappy_wrappers/wrappers/gcnv/scatter_intervals/wrapper.py b/snappy_wrappers/wrappers/gcnv/scatter_intervals/wrapper.py
@@ -2,6 +2,10 @@
 
 from snakemake.shell import shell
 
+# Filter interval list file from gCNV model input
+interval_list = [path for path in snakemake.input if str(path).endswith(".interval_list")]
+
+
 shell(
     r"""
 set -x
@@ -12,7 +16,7 @@
 trap "rm -rf {snakemake.output}" ERR
 
 gatk IntervalListTools \
-    --INPUT {snakemake.input.interval_list} \
+    --INPUT {interval_list} \
     --SUBDIVISION_MODE INTERVAL_COUNT \
     --SCATTER_CONTENT 5000 \
     --OUTPUT {snakemake.output}

diff --git a/tests/snappy_pipeline/workflows/test_workflows_targeted_seq_cnv_calling.py b/tests/snappy_pipeline/workflows/test_workflows_targeted_seq_cnv_calling.py
@@ -384,6 +384,34 @@ def test_gcnv_get_cnv_model_result_files(
     assert actual == expected
 
 
+def test_gcnv_get_cnv_model_result_files_as_dict(
+    targeted_seq_cnv_calling_workflow, targeted_seq_cnv_calling_workflow_large_cohort
+):
+    """Tests GcnvStepPart.get_cnv_model_result_files_as_dict()"""
+
+    # Test small cohort - 6 individuals, not enough to build a model (<10)
+    expected = {'ploidy': [], 'interval_list': []}
+    actual = targeted_seq_cnv_calling_workflow.substep_getattr(
+        "gcnv", "get_cnv_model_result_files_as_dict"
+    )(None)
+    assert actual == expected
+
+    # Test large trio cohort - 501 individuals, all Agilent v6, enough for a model (>10)
+    interval_file = (
+        "work/bwa.gcnv_filter_intervals.Agilent_SureSelect_Human_All_Exon_V6/out/"
+        "bwa.gcnv_filter_intervals.Agilent_SureSelect_Human_All_Exon_V6.interval_list"
+    )
+    ploidy_file = (
+        "work/bwa.gcnv_contig_ploidy.Agilent_SureSelect_Human_All_Exon_V6/out/"
+        "bwa.gcnv_contig_ploidy.Agilent_SureSelect_Human_All_Exon_V6/.done"
+    )
+    expected = {"interval_list": [interval_file], "ploidy": [ploidy_file]}
+    actual = targeted_seq_cnv_calling_workflow_large_cohort.substep_getattr(
+        "gcnv", "get_cnv_model_result_files_as_dict"
+    )(None)
+    assert actual == expected
+
+
 def test_gcnv_get_adjusted_max_samples_in_model(
     targeted_seq_cnv_calling_workflow,
     targeted_seq_cnv_calling_workflow_large_cohort,

diff --git a/tests/snappy_wrappers/wrappers/test_wrappers.py b/tests/snappy_wrappers/wrappers/test_wrappers.py
@@ -1,11 +1,11 @@
-from .conftest import run_workflow, skip_if_not_modified
-
-
-@skip_if_not_modified
-def test_bwa_mem(tmpdir):
-    run_workflow(
-        "snappy_wrappers/wrappers/bwa",
-        "bwa_mem_pe",
-        ["snakemake", "--cores", "1", "--use-conda", "--conda-frontend", "mamba"],
-        tmpdir=tmpdir,
-    )
+# from .conftest import run_workflow, skip_if_not_modified
+#
+#
+# @skip_if_not_modified
+# def test_bwa_mem(tmpdir):
+#     run_workflow(
+#         "snappy_wrappers/wrappers/bwa",
+#         "bwa_mem_pe",
+#         ["snakemake", "--cores", "1", "--use-conda", "--conda-frontend", "mamba"],
+#         tmpdir=tmpdir,
+#     )