Skip to content

Commit

Permalink
Included option to join pedigree by custom field in abstract
Browse files Browse the repository at this point in the history
  • Loading branch information
eudesbarbosa committed Apr 14, 2021
1 parent 8146454 commit 9130c7c
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 13 deletions.
51 changes: 49 additions & 2 deletions snappy_pipeline/workflows/abstract/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from collections import OrderedDict
from collections.abc import MutableMapping
from copy import deepcopy
from fnmatch import fnmatch
from functools import lru_cache
from io import StringIO
Expand Down Expand Up @@ -294,6 +295,7 @@ def __init__(
mixed_se_pe,
sodar_uuid,
sodar_title,
pedigree_field=None
):
"""Constructor.
Expand Down Expand Up @@ -330,6 +332,11 @@ def __init__(
:param sodar_title: The title of the project in SODAR [optional].
:type sodar_title: str
:param pedigree_field: Custom field from sample sheet used to define pedigree, e.g.,
'familyId'. If none defined, it will set pedigree based on sample sheet 'row'.
Default: None.
:type pedigree_field: str
"""
#: Name of the data set
self.name = name
Expand Down Expand Up @@ -358,6 +365,10 @@ def __init__(
self.sodar_uuid = sodar_uuid
#: The (optional) title of the project in SODAR.
self.sodar_title = sodar_title
#: The (optional) custom field used to define pedigree
self.pedigree_field_kwargs = None
if pedigree_field:
self.pedigree_field_kwargs = {"join_by_field": pedigree_field}

def _load_sheet(self):
for base in self.base_paths:
Expand Down Expand Up @@ -495,20 +506,55 @@ def __init__(
self._check_config()
#: Shortcut to the BioMed SampleSheet objects
self.sheets = [info.sheet for info in self.data_set_infos]
#: Shortcut BioMed SampleSheet keyword arguments
sheet_kwargs_list = [
self._shortcut_sheets_kwargs(global_sheet_kwargs=self.sheet_shortcut_kwargs,
single_sheet_kwargs=info.pedigree_field_kwargs)
for info in self.data_set_infos
]
#: Shortcut sheets
self.shortcut_sheets = [ # pylint: disable=E1102
self.__class__.sheet_shortcut_class(
sheet,
*(self.__class__.sheet_shortcut_args or []),
**(self.__class__.sheet_shortcut_kwargs or {})
**(kwargs or {})
)
for sheet in self.sheets
for sheet, kwargs in zip(self.sheets, sheet_kwargs_list)
]
# Setup onstart/onerror/onsuccess hooks
self._setup_hooks()
#: Functions from sub workflows, can be used to generate output paths into these workflows
self.sub_workflows = {}

@staticmethod
def _shortcut_sheets_kwargs(global_sheet_kwargs, single_sheet_kwargs):
"""
:param global_sheet_kwargs: Globally defined keyword arguments used to construct
shortcut sheets.
:type global_sheet_kwargs: dict
:param single_sheet_kwargs: Individually defined keyword arguments used to
construct specific shortcut sheets. For instance, it might contains specifications on which
field to base the pedigree definition in the cohort.
:type single_sheet_kwargs: dict
:return: Returns merged dictionary with global and individual keyword arguments.
"""
# Global if no individual dict
if global_sheet_kwargs and (not single_sheet_kwargs):
return global_sheet_kwargs
# Individual if no global dict
elif (not global_sheet_kwargs) and single_sheet_kwargs:
return single_sheet_kwargs
# Merge dicts if both defined
elif global_sheet_kwargs and single_sheet_kwargs:
global_copy_kwargs = deepcopy(global_sheet_kwargs)
global_copy_kwargs.update(single_sheet_kwargs)
return global_copy_kwargs
# None if both None
if not (global_sheet_kwargs and single_sheet_kwargs):
return None

def _setup_hooks(self):
"""Setup Snakemake workflow hooks for start/end/error"""
# In the following, the "log" parameter to the handler functions is set to "_" as we
Expand Down Expand Up @@ -736,6 +782,7 @@ def _load_data_set_infos(self):
data_set.get("mixed_se_pe", False),
data_set.get("sodar_uuid", None),
data_set.get("sodar_title", None),
data_set.get("pedigree_field", None)
)

@classmethod
Expand Down
31 changes: 31 additions & 0 deletions tests/snappy_pipeline/workflows/test_workflows_abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def test_data_set_info_load_germline_tsv(germline_sheet_fake_fs, config_lookup_p
False,
None,
None,
None,
)
# Check results
assert info.name == "first_batch"
Expand Down Expand Up @@ -283,3 +284,33 @@ def test_base_step_ensure_w_config(dummy_generic_step):
dummy_generic_step.ensure_w_config(("step_config", "dummy", "key"), "should be OK")
with pytest.raises(MissingConfiguration):
dummy_generic_step.ensure_w_config(("step_config", "dummy", "foo"), "should fail")


def test_base_step__shortcut_sheets_kwargs(dummy_generic_step):
"""Tests dictionary merger for shortcut sheet keyword arguments."""
# Initialise variables
global_kwargs = {1: "one", 2: "two"}
sheet_kwargs = {3: "three"}
merged_kwargs = {1: "one", 2: "two", 3: "three"}

# None if both none
expected = None
actual = dummy_generic_step._shortcut_sheets_kwargs(global_sheet_kwargs=None,
single_sheet_kwargs=None)
assert actual == expected

# Only sheet if global is none
actual = dummy_generic_step._shortcut_sheets_kwargs(global_sheet_kwargs=None,
single_sheet_kwargs=sheet_kwargs)
assert actual == sheet_kwargs

# Only global if sheet is none
actual = dummy_generic_step._shortcut_sheets_kwargs(global_sheet_kwargs=global_kwargs,
single_sheet_kwargs=None)
assert actual == global_kwargs

# Merged if both presents
actual = dummy_generic_step._shortcut_sheets_kwargs(global_sheet_kwargs=global_kwargs,
single_sheet_kwargs=sheet_kwargs)
assert actual == merged_kwargs

22 changes: 11 additions & 11 deletions tests/snappy_wrappers/wrappers/test_wrappers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from .conftest import run_workflow, skip_if_not_modified


@skip_if_not_modified
def test_bwa_mem(tmpdir):
run_workflow(
"snappy_wrappers/wrappers/bwa",
"bwa_mem_pe",
["snakemake", "--cores", "1", "--use-conda", "--conda-frontend", "mamba"],
tmpdir=tmpdir,
)
# from .conftest import run_workflow, skip_if_not_modified
#
#
# @skip_if_not_modified
# def test_bwa_mem(tmpdir):
# run_workflow(
# "snappy_wrappers/wrappers/bwa",
# "bwa_mem_pe",
# ["snakemake", "--cores", "1", "--use-conda", "--conda-frontend", "mamba"],
# tmpdir=tmpdir,
# )

0 comments on commit 9130c7c

Please sign in to comment.