Skip to content

Commit

Permalink
Included more config validations.
Browse files Browse the repository at this point in the history
  • Loading branch information
eudesbarbosa committed Dec 20, 2021
1 parent a5b4d57 commit b72be97
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 19 deletions.
67 changes: 62 additions & 5 deletions snappy_pipeline/workflows/targeted_seq_cnv_calling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from collections import OrderedDict, defaultdict
from copy import deepcopy
import glob
import json
import os
import re

Expand Down Expand Up @@ -570,6 +571,9 @@ def validate_request(self):
if len(path_to_models) == 0:
return "cohort_mode"
else:
# Validate configuration
self.validate_precomputed_model_paths_config(config=path_to_models)

# Case 2: path to model provided -> analysis in CASE MODE using precomputed model
if not all([model.get("model_pattern") == reserved_w for model in path_to_models]):
for model in path_to_models:
Expand Down Expand Up @@ -613,6 +617,56 @@ def validate_request(self):
return "case_mode_build"
return None

def validate_precomputed_model_paths_config(self, config):
"""Validate precomputed model config.
Evaluates if provided configuration has the following format:
precomputed_model_paths:
- library: "Agilent SureSelect Human All Exon V6"
model_pattern: /path/to/model_*
# OR model_pattern: "__build__"
:param config: Precomputed model configuration dictionary list.
:type config: list
:raises InvalidConfiguration: if configuration not as expected for
``precomputed_model_paths`` list.
"""
# Initialise variables
expected_keys = ("library", "model_pattern")
expected_format = (
'{\n "library": "Agilent SureSelect Human All Exon V6"\n'
' "model_pattern": "/path/to/model_*"\n'
' # OR "model_pattern": "__build__"\n}\n'
)
# Test
for model in config:
# Test keys
n_keys_pass = len(model) == 2
keys_pass = all([key in expected_keys for key in model])
# Test values
values_pass = all([isinstance(value, str)] for value in model.values())
# Validate
if not (n_keys_pass and keys_pass and values_pass):
msg_tpl = (
"Provided configuration not as expected...\n"
"Expected:\n{e_}\nObserved:\n{o_}\n"
)
pretty_model = self._pretty_print_config(config=model)
raise InvalidConfiguration(msg_tpl.format(e_=expected_format, o_=pretty_model))

@staticmethod
def _pretty_print_config(config):
"""Pretty format configuration.
:param config: Configuration dictionary to be formatted.
:type config: OrderedDict
:return: Configuration as a nicely formatted string.
"""
return str(json.dumps(config, sort_keys=False, indent=4))

def validate_model_requirements(self, library_kit, library_count_dict):
"""Validate build model requirements.
Expand Down Expand Up @@ -716,10 +770,14 @@ def get_params(self, action):
return getattr(self, "_get_params_{}".format(action))

def _get_params_coverage(self, wildcards):
"""
"""Get coverage parameter.
:param wildcards: Snakemake wildcards associated with rule, namely: 'library_name'
(e.g., 'P001-N1-DNA1-WGS1').
:type wildcards: snakemake.io.Wildcards
:param wildcards:
:return:
:return: Returns dictionary with library kit name associated with library. Example:
{'library_kit': 'Agilent_SureSelect_Human_All_Exon_V6'}
"""
return {"library_kit": self.ngs_library_to_kit[wildcards.library_name]}

Expand Down Expand Up @@ -1251,13 +1309,12 @@ def get_log_file(action):
"filter_intervals",
"contig_ploidy",
"contig_ploidy_case_mode",
"call_cnvs_case_mode",
"scatter_intervals",
"merge_cohort_vcfs",
):
name_pattern = "{{mapper}}.gcnv_{action}.{{library_kit}}".format(action=action)
return "work/{name_pattern}/log/{name_pattern}.log".format(name_pattern=name_pattern)
elif action == "call_cnvs_cohort_mode":
elif action.startswith("call_cnvs"):
name_pattern = "{{mapper}}.gcnv_{action}.{{library_kit}}.{{shard}}".format(
action=action
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
# -*- coding: utf-8 -*-
"""Tests for the targeted_seq_cnv_calling workflow module code"""

from collections import OrderedDict
import copy
import textwrap

import pytest
import ruamel.yaml as yaml
from snakemake.io import Wildcards

from snappy_pipeline.base import UnsupportedActionException
from snappy_pipeline.base import InvalidConfiguration, UnsupportedActionException
from snappy_pipeline.workflows.targeted_seq_cnv_calling import TargetedSeqCnvCallingWorkflow

from .common import get_expected_output_vcf_files_dict
Expand Down Expand Up @@ -541,6 +542,34 @@ def test_gcnv_validate_model_requirements(
assert actual == expected


# validate_precomputed_model_paths_config
def test_gcnv_validate_precomputed_model_paths_config(targeted_seq_cnv_calling_workflow):
"""Tests GcnvStepPart.validate_model_requirements()
Note: all tests performed with naive OrderedDict, but only because that is the regular input
format. In these tests, the order is not import.
"""
# Initialise input
valid_dict = {"library": "library", "model_pattern": "/path/to/model_*"}
typo_dict = {"library_n": "library", "model_pattern": "/path/to/model_*"}
missing_key_dict = {"model_pattern": "/path/to/model_*"}

# Sanity check
targeted_seq_cnv_calling_workflow.substep_getattr(
"gcnv", "validate_precomputed_model_paths_config"
)(config=[valid_dict])
# Test key typo
with pytest.raises(InvalidConfiguration):
targeted_seq_cnv_calling_workflow.substep_getattr(
"gcnv", "validate_precomputed_model_paths_config"
)(config=[valid_dict, typo_dict])
# Test key missing
with pytest.raises(InvalidConfiguration):
targeted_seq_cnv_calling_workflow.substep_getattr(
"gcnv", "validate_precomputed_model_paths_config"
)(config=[valid_dict, missing_key_dict])


def test_gcnv_validate_model_directory(fake_fs, mocker, targeted_seq_cnv_calling_workflow):
"""Tests GcnvStepPart.validate_model_directory()"""
# Model required files
Expand Down Expand Up @@ -1049,8 +1078,8 @@ def test_gcnv_call_cnvs_case_mode_step_part_get_log_file(targeted_seq_cnv_callin
"""Tests GcnvStepPart.get_log_file for 'call_cnvs_case_mode' step"""
# Define expected
expected = (
"work/{mapper}.gcnv_call_cnvs_case_mode.{library_kit}/log/"
"{mapper}.gcnv_call_cnvs_case_mode.{library_kit}.log"
"work/{mapper}.gcnv_call_cnvs_case_mode.{library_kit}.{shard}/log/"
"{mapper}.gcnv_call_cnvs_case_mode.{library_kit}.{shard}.log"
)
# Get actual
actual = targeted_seq_cnv_calling_workflow.get_log_file("gcnv", "call_cnvs_case_mode")
Expand Down
22 changes: 11 additions & 11 deletions tests/snappy_wrappers/wrappers/test_wrappers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# from .conftest import run_workflow, skip_if_not_modified
#
#
# @skip_if_not_modified
# def test_bwa_mem(tmpdir):
# run_workflow(
# "snappy_wrappers/wrappers/bwa",
# "bwa_mem_pe",
# ["snakemake", "--cores", "1", "--use-conda", "--conda-frontend", "mamba"],
# tmpdir=tmpdir,
# )
from .conftest import run_workflow, skip_if_not_modified


@skip_if_not_modified
def test_bwa_mem(tmpdir):
run_workflow(
"snappy_wrappers/wrappers/bwa",
"bwa_mem_pe",
["snakemake", "--cores", "1", "--use-conda", "--conda-frontend", "mamba"],
tmpdir=tmpdir,
)

0 comments on commit b72be97

Please sign in to comment.