Skip to content

Commit

Permalink
Merge 12f33cb into b971088
Browse files Browse the repository at this point in the history
  • Loading branch information
ericblanc20 committed Apr 25, 2024
2 parents b971088 + 12f33cb commit 4f3a629
Show file tree
Hide file tree
Showing 8 changed files with 368 additions and 324 deletions.
31 changes: 15 additions & 16 deletions snappy_pipeline/workflows/somatic_variant_annotation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,8 @@ def params_function(wildcards):

def get_normal_lib_name(self, wildcards):
"""Return name of normal (non-cancer) library"""
pair = self.tumor_ngs_library_to_sample_pair[wildcards.tumor_library]
return pair.normal_sample.dna_ngs_library.name
pair = self.tumor_ngs_library_to_sample_pair.get(wildcards.tumor_library, None)
return pair.normal_sample.dna_ngs_library.name if pair else None


class JannovarAnnotateSomaticVcfStepPart(AnnotateSomaticVcfStepPart):
Expand Down Expand Up @@ -426,20 +426,19 @@ def _yield_result_files_matched(self, tpl, **kwargs):
Mutect.
"""
for sheet in filter(is_not_background, self.shortcut_sheets):
for sample_pair in sheet.all_sample_pairs:
if (
not sample_pair.tumor_sample.dna_ngs_library
or not sample_pair.normal_sample.dna_ngs_library
):
msg = (
"INFO: sample pair for cancer bio sample {} has is missing primary"
"normal or primary cancer NGS library"
)
print(msg.format(sample_pair.tumor_sample.name), file=sys.stderr)
continue
yield from expand(
tpl, tumor_library=[sample_pair.tumor_sample.dna_ngs_library], **kwargs
)
for bio_entity in sheet.sheet.bio_entities.values():
for bio_sample in bio_entity.bio_samples.values():
if not bio_sample.extra_infos.get("isTumor", False):
continue
for test_sample in bio_sample.test_samples.values():
extraction_type = test_sample.extra_infos.get("extractionType", "unknown")
if extraction_type.lower() != "dna":
if extraction_type == "unknown":
msg = "INFO: sample {} has missing extraction type, ignored"
print(msg.format(test_sample.name), file=sys.stderr)
continue
for ngs_library in test_sample.ngs_libraries.values():
yield from expand(tpl, tumor_library=[ngs_library], **kwargs)

def _yield_result_files_joint(self, tpl, **kwargs):
"""Build output paths from path template and extension list.
Expand Down
92 changes: 56 additions & 36 deletions snappy_pipeline/workflows/somatic_variant_calling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@
germline_resource: '' # Germline variants resource (same as panel of normals)
common_variants: '' # Common germline variants for contamination estimation
extra_arguments: [] # List additional Mutect2 arguments
# Each additional argument xust be in the form:
# Each additional argument must be in the form:
# "--<argument name> <argument value>"
# For example, to filter reads prior to calling & to
# add annotations to the output vcf:
Expand Down Expand Up @@ -387,32 +387,41 @@ def input_function(wildcards):
ngs_mapping = self.parent.sub_workflows["ngs_mapping"]
# Get names of primary libraries of the selected cancer bio sample and the
# corresponding primary normal sample
normal_base_path = (
"output/{mapper}.{normal_library}/out/{mapper}.{normal_library}".format(
normal_library=self.get_normal_lib_name(wildcards), **wildcards
)
)
tumor_base_path = (
"output/{mapper}.{tumor_library}/out/" "{mapper}.{tumor_library}"
).format(**wildcards)
return {
"normal_bam": ngs_mapping(normal_base_path + ".bam"),
"normal_bai": ngs_mapping(normal_base_path + ".bam.bai"),
input_files = {
"tumor_bam": ngs_mapping(tumor_base_path + ".bam"),
"tumor_bai": ngs_mapping(tumor_base_path + ".bam.bai"),
}

normal_library = self.get_normal_lib_name(wildcards)
if normal_library:
normal_base_path = (
"output/{mapper}.{normal_library}/out/{mapper}.{normal_library}".format(
normal_library=normal_library, **wildcards
)
)
input_files.update(
{
"normal_bam": ngs_mapping(normal_base_path + ".bam"),
"normal_bai": ngs_mapping(normal_base_path + ".bam.bai"),
}
)

return input_files

return input_function

def get_normal_lib_name(self, wildcards):
"""Return name of normal (non-cancer) library"""
pair = self.tumor_ngs_library_to_sample_pair[wildcards.tumor_library]
return pair.normal_sample.dna_ngs_library.name
pair = self.tumor_ngs_library_to_sample_pair.get(wildcards.tumor_library, None)
return pair.normal_sample.dna_ngs_library.name if pair else None

def get_tumor_lib_name(self, wildcards):
"""Return name of tumor library"""
pair = self.tumor_ngs_library_to_sample_pair[wildcards.tumor_library]
return pair.tumor_sample.dna_ngs_library.name
pair = self.tumor_ngs_library_to_sample_pair.get(wildcards.tumor_library, None)
return pair.tumor_sample.dna_ngs_library.name if pair else wildcards.tumor_library

def get_output_files(self, action):
"""Return output files that all somatic variant calling sub steps must
Expand Down Expand Up @@ -577,19 +586,30 @@ def _get_input_files_run(self, wildcards):
ngs_mapping = self.parent.sub_workflows["ngs_mapping"]
# Get names of primary libraries of the selected cancer bio sample and the
# corresponding primary normal sample
normal_base_path = "output/{mapper}.{normal_library}/out/{mapper}.{normal_library}".format(
normal_library=self.get_normal_lib_name(wildcards), **wildcards
)
tumor_base_path = (
"output/{mapper}.{tumor_library}/out/" "{mapper}.{tumor_library}"
).format(**wildcards)
return {
"normal_bam": ngs_mapping(normal_base_path + ".bam"),
"normal_bai": ngs_mapping(normal_base_path + ".bam.bai"),
input_files = {
"tumor_bam": ngs_mapping(tumor_base_path + ".bam"),
"tumor_bai": ngs_mapping(tumor_base_path + ".bam.bai"),
}

normal_library = self.get_normal_lib_name(wildcards)
if normal_library:
normal_base_path = (
"output/{mapper}.{normal_library}/out/{mapper}.{normal_library}".format(
normal_library=normal_library, **wildcards
)
)
input_files.update(
{
"normal_bam": ngs_mapping(normal_base_path + ".bam"),
"normal_bai": ngs_mapping(normal_base_path + ".bam.bai"),
}
)

return input_files

def _get_input_files_filter(self, wildcards):
"""Get input files for rule ``filter``.
Expand All @@ -609,9 +629,10 @@ def _get_input_files_filter(self, wildcards):
"stats": base_path + ".raw.vcf.stats",
"f1r2": base_path + ".raw.f1r2_tar.tar.gz",
}
if "contamination" in self.actions:
input_files["table"] = base_path + ".contamination.tbl"
input_files["segments"] = base_path + ".segments.tbl"
if self.get_normal_lib_name(wildcards):
if "contamination" in self.actions:
input_files["table"] = base_path + ".contamination.tbl"
input_files["segments"] = base_path + ".segments.tbl"
return input_files

def _get_input_files_pileup_normal(self, wildcards):
Expand Down Expand Up @@ -1196,20 +1217,19 @@ def _yield_result_files_matched(self, tpl, **kwargs):
Mutect.
"""
for sheet in filter(is_not_background, self.shortcut_sheets):
for sample_pair in sheet.all_sample_pairs:
if (
not sample_pair.tumor_sample.dna_ngs_library
or not sample_pair.normal_sample.dna_ngs_library
):
msg = (
"INFO: sample pair for cancer bio sample {} has is missing primary"
"normal or primary cancer NGS library"
)
print(msg.format(sample_pair.tumor_sample.name), file=sys.stderr)
continue
yield from expand(
tpl, tumor_library=[sample_pair.tumor_sample.dna_ngs_library], **kwargs
)
for bio_entity in sheet.sheet.bio_entities.values():
for bio_sample in bio_entity.bio_samples.values():
if not bio_sample.extra_infos.get("isTumor", False):
continue
for test_sample in bio_sample.test_samples.values():
extraction_type = test_sample.extra_infos.get("extractionType", "unknown")
if extraction_type.lower() != "dna":
if extraction_type == "unknown":
msg = "INFO: sample {} has missing extraction type, ignored"
print(msg.format(test_sample.name), file=sys.stderr)
continue
for ngs_library in test_sample.ngs_libraries.values():
yield from expand(tpl, tumor_library=[ngs_library], **kwargs)

def _yield_result_files_joint(self, tpl, **kwargs):
"""Build output paths from path template and extension list.
Expand Down

0 comments on commit 4f3a629

Please sign in to comment.