Included more config validations.

bihealth · Dec 20, 2021 · b72be97 · b72be97
1 parent a5b4d57
commit b72be97
Show file tree

Hide file tree

Showing 3 changed files with 105 additions and 19 deletions.
diff --git a/snappy_pipeline/workflows/targeted_seq_cnv_calling/__init__.py b/snappy_pipeline/workflows/targeted_seq_cnv_calling/__init__.py
@@ -45,6 +45,7 @@
 from collections import OrderedDict, defaultdict
 from copy import deepcopy
 import glob
+import json
 import os
 import re
 
@@ -570,6 +571,9 @@ def validate_request(self):
         if len(path_to_models) == 0:
             return "cohort_mode"
         else:
+            # Validate configuration
+            self.validate_precomputed_model_paths_config(config=path_to_models)
+
             # Case 2: path to model provided -> analysis in CASE MODE using precomputed model
             if not all([model.get("model_pattern") == reserved_w for model in path_to_models]):
                 for model in path_to_models:
@@ -613,6 +617,56 @@ def validate_request(self):
                 return "case_mode_build"
         return None
 
+    def validate_precomputed_model_paths_config(self, config):
+        """Validate precomputed model config.
+
+        Evaluates if provided configuration has the following format:
+
+        precomputed_model_paths:
+          - library: "Agilent SureSelect Human All Exon V6"
+            model_pattern: /path/to/model_*
+            # OR model_pattern: "__build__"
+
+        :param config: Precomputed model configuration dictionary list.
+        :type config: list
+
+        :raises InvalidConfiguration: if configuration not as expected for
+        ``precomputed_model_paths`` list.
+        """
+        # Initialise variables
+        expected_keys = ("library", "model_pattern")
+        expected_format = (
+            '{\n    "library": "Agilent SureSelect Human All Exon V6"\n'
+            '    "model_pattern": "/path/to/model_*"\n'
+            '    # OR "model_pattern": "__build__"\n}\n'
+        )
+        # Test
+        for model in config:
+            # Test keys
+            n_keys_pass = len(model) == 2
+            keys_pass = all([key in expected_keys for key in model])
+            # Test values
+            values_pass = all([isinstance(value, str)] for value in model.values())
+            # Validate
+            if not (n_keys_pass and keys_pass and values_pass):
+                msg_tpl = (
+                    "Provided configuration not as expected...\n"
+                    "Expected:\n{e_}\nObserved:\n{o_}\n"
+                )
+                pretty_model = self._pretty_print_config(config=model)
+                raise InvalidConfiguration(msg_tpl.format(e_=expected_format, o_=pretty_model))
+
+    @staticmethod
+    def _pretty_print_config(config):
+        """Pretty format configuration.
+
+        :param config: Configuration dictionary to be formatted.
+        :type config: OrderedDict
+
+        :return: Configuration as a nicely formatted string.
+        """
+        return str(json.dumps(config, sort_keys=False, indent=4))
+
     def validate_model_requirements(self, library_kit, library_count_dict):
         """Validate build model requirements.
 
@@ -716,10 +770,14 @@ def get_params(self, action):
         return getattr(self, "_get_params_{}".format(action))
 
     def _get_params_coverage(self, wildcards):
-        """
+        """Get coverage parameter.
+
+        :param wildcards: Snakemake wildcards associated with rule, namely: 'library_name'
+        (e.g., 'P001-N1-DNA1-WGS1').
+        :type wildcards: snakemake.io.Wildcards
 
-        :param wildcards:
-        :return:
+        :return: Returns dictionary with library kit name associated with library. Example:
+        {'library_kit': 'Agilent_SureSelect_Human_All_Exon_V6'}
         """
         return {"library_kit": self.ngs_library_to_kit[wildcards.library_name]}
 
@@ -1251,13 +1309,12 @@ def get_log_file(action):
             "filter_intervals",
             "contig_ploidy",
             "contig_ploidy_case_mode",
-            "call_cnvs_case_mode",
             "scatter_intervals",
             "merge_cohort_vcfs",
         ):
             name_pattern = "{{mapper}}.gcnv_{action}.{{library_kit}}".format(action=action)
             return "work/{name_pattern}/log/{name_pattern}.log".format(name_pattern=name_pattern)
-        elif action == "call_cnvs_cohort_mode":
+        elif action.startswith("call_cnvs"):
             name_pattern = "{{mapper}}.gcnv_{action}.{{library_kit}}.{{shard}}".format(
                 action=action
             )

diff --git a/tests/snappy_pipeline/workflows/test_workflows_targeted_seq_cnv_calling.py b/tests/snappy_pipeline/workflows/test_workflows_targeted_seq_cnv_calling.py
@@ -1,14 +1,15 @@
 # -*- coding: utf-8 -*-
 """Tests for the targeted_seq_cnv_calling workflow module code"""
 
+from collections import OrderedDict
 import copy
 import textwrap
 
 import pytest
 import ruamel.yaml as yaml
 from snakemake.io import Wildcards
 
-from snappy_pipeline.base import UnsupportedActionException
+from snappy_pipeline.base import InvalidConfiguration, UnsupportedActionException
 from snappy_pipeline.workflows.targeted_seq_cnv_calling import TargetedSeqCnvCallingWorkflow
 
 from .common import get_expected_output_vcf_files_dict
@@ -541,6 +542,34 @@ def test_gcnv_validate_model_requirements(
     assert actual == expected
 
 
+# validate_precomputed_model_paths_config
+def test_gcnv_validate_precomputed_model_paths_config(targeted_seq_cnv_calling_workflow):
+    """Tests GcnvStepPart.validate_model_requirements()
+
+    Note: all tests performed with naive OrderedDict, but only because that is the regular input
+    format. In these tests, the order is not import.
+    """
+    # Initialise input
+    valid_dict = {"library": "library", "model_pattern": "/path/to/model_*"}
+    typo_dict = {"library_n": "library", "model_pattern": "/path/to/model_*"}
+    missing_key_dict = {"model_pattern": "/path/to/model_*"}
+
+    # Sanity check
+    targeted_seq_cnv_calling_workflow.substep_getattr(
+        "gcnv", "validate_precomputed_model_paths_config"
+    )(config=[valid_dict])
+    # Test key typo
+    with pytest.raises(InvalidConfiguration):
+        targeted_seq_cnv_calling_workflow.substep_getattr(
+            "gcnv", "validate_precomputed_model_paths_config"
+        )(config=[valid_dict, typo_dict])
+    # Test key missing
+    with pytest.raises(InvalidConfiguration):
+        targeted_seq_cnv_calling_workflow.substep_getattr(
+            "gcnv", "validate_precomputed_model_paths_config"
+        )(config=[valid_dict, missing_key_dict])
+
+
 def test_gcnv_validate_model_directory(fake_fs, mocker, targeted_seq_cnv_calling_workflow):
     """Tests GcnvStepPart.validate_model_directory()"""
     # Model required files
@@ -1049,8 +1078,8 @@ def test_gcnv_call_cnvs_case_mode_step_part_get_log_file(targeted_seq_cnv_callin
     """Tests GcnvStepPart.get_log_file for 'call_cnvs_case_mode' step"""
     # Define expected
     expected = (
-        "work/{mapper}.gcnv_call_cnvs_case_mode.{library_kit}/log/"
-        "{mapper}.gcnv_call_cnvs_case_mode.{library_kit}.log"
+        "work/{mapper}.gcnv_call_cnvs_case_mode.{library_kit}.{shard}/log/"
+        "{mapper}.gcnv_call_cnvs_case_mode.{library_kit}.{shard}.log"
     )
     # Get actual
     actual = targeted_seq_cnv_calling_workflow.get_log_file("gcnv", "call_cnvs_case_mode")

diff --git a/tests/snappy_wrappers/wrappers/test_wrappers.py b/tests/snappy_wrappers/wrappers/test_wrappers.py
@@ -1,11 +1,11 @@
-# from .conftest import run_workflow, skip_if_not_modified
-#
-#
-# @skip_if_not_modified
-# def test_bwa_mem(tmpdir):
-#     run_workflow(
-#         "snappy_wrappers/wrappers/bwa",
-#         "bwa_mem_pe",
-#         ["snakemake", "--cores", "1", "--use-conda", "--conda-frontend", "mamba"],
-#         tmpdir=tmpdir,
-#     )
+from .conftest import run_workflow, skip_if_not_modified
+
+
+@skip_if_not_modified
+def test_bwa_mem(tmpdir):
+    run_workflow(
+        "snappy_wrappers/wrappers/bwa",
+        "bwa_mem_pe",
+        ["snakemake", "--cores", "1", "--use-conda", "--conda-frontend", "mamba"],
+        tmpdir=tmpdir,
+    )