Skip to content

Commit

Permalink
Test all callers
Browse files Browse the repository at this point in the history
  • Loading branch information
ericblanc20 committed Oct 28, 2022
1 parent 0f3bb59 commit 63d3a72
Show file tree
Hide file tree
Showing 3 changed files with 206 additions and 98 deletions.
12 changes: 10 additions & 2 deletions snappy_pipeline/workflows/somatic_gene_fusion_calling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,22 @@
__author__ = "Manuel Holtgrewe <manuel.holtgrewe@bih-charite.de>"

#: HLA typing tools
GENE_FUSION_CALLERS = ("fusioncatcher", "jaffa", "pizzly", "hera", "star_fusion")
GENE_FUSION_CALLERS = (
"arriba",
"defuse",
"fusioncatcher",
"hera",
"jaffa",
"pizzly",
"star_fusion",
)

#: Default configuration for the somatic_gene_fusion_calling step
DEFAULT_CONFIG = r"""
step_config:
somatic_gene_fusion_calling:
path_link_in: "" # OPTIONAL Override data set configuration search paths for FASTQ files
tools: ['fusioncatcher', 'jaffa', 'arriba']
tools: ['fusioncatcher', 'jaffa', 'arriba', 'defuse', 'hera', 'pizzly', 'star_fusion']
fusioncatcher:
data_dir: REQUIRED # REQUIRED
configuration: null # optional
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def minimal_config():
bwa:
path_index: /path/to/bwa/index.fasta
somatic_gene_fusion_calling:
tools: ['arriba', 'fusioncatcher']
tools: ['arriba', 'fusioncatcher', 'jaffa', 'star_fusion', 'defuse', 'hera', 'pizzly']
fusioncatcher:
data_dir: REQUIRED # REQUIRED
pizzly:
Expand Down Expand Up @@ -387,85 +387,101 @@ def test_arriba_step_part_get_resource_usage(somatic_gene_fusion_calling_workflo
def test_somatic_gene_fusion_calling_workflow(somatic_gene_fusion_calling_workflow):
"""Test simple functionality of the workflow"""
# Check created sub steps
expected = ["fusioncatcher", "pizzly", "hera", "star_fusion", "defuse", "arriba", "jaffa"]
expected.extend(["link_in", "link_out"])
expected = [
"arriba",
"defuse",
"fusioncatcher",
"hera",
"jaffa",
"pizzly",
"star_fusion",
"link_in",
"link_out",
]
assert list(sorted(somatic_gene_fusion_calling_workflow.sub_steps.keys())) == sorted(expected)
# Check result file construction
callers = ["arriba", "fusioncatcher"]
expected = []
for library in ["P001-T1-RNA1-mRNA_seq1", "P002-T2-RNA1-mRNA_seq1"]:
for caller in callers:
if caller == "arriba":
expected.append("output/arriba.{library}/out/.done".format(library=library))
expected.append(
"output/arriba.{library}/out/arriba.{library}.fusions.tsv".format(
library=library
)
)
expected.append(
"output/arriba.{library}/out/arriba.{library}.discarded_fusions.tsv.gz".format(
library=library
)
)
expected.append(
"output/arriba.{library}/log/arriba.{library}.log".format(library=library)
)
expected.append(
"output/arriba.{library}/log/arriba.{library}.conda_list.txt".format(
library=library
)
)
expected.append(
"output/arriba.{library}/log/arriba.{library}.conda_info.txt".format(
library=library
)
)
expected.append("output/arriba.{library}/log/Log.out".format(library=library))
expected.append("output/arriba.{library}/log/Log.std.out".format(library=library))
expected.append("output/arriba.{library}/log/Log.final.out".format(library=library))
expected.append("output/arriba.{library}/log/SJ.out.tab".format(library=library))
expected.append(
"output/arriba.{library}/out/arriba.{library}.fusions.tsv.md5".format(
library=library
)
)
expected.append(
"output/arriba.{library}/out/arriba.{library}.discarded_fusions.tsv.gz.md5".format(
library=library
)
)
expected.append(
"output/arriba.{library}/log/arriba.{library}.log.md5".format(library=library)
)
expected.append(
"output/arriba.{library}/log/arriba.{library}.conda_list.txt.md5".format(
library=library
)
)
expected.append(
"output/arriba.{library}/log/arriba.{library}.conda_info.txt.md5".format(
library=library
)
)
expected.append("output/arriba.{library}/log/Log.out.md5".format(library=library))
expected.append(
"output/arriba.{library}/log/Log.std.out.md5".format(library=library)
)
expected.append(
"output/arriba.{library}/log/Log.final.out.md5".format(library=library)
)
expected.append(
"output/arriba.{library}/log/SJ.out.tab.md5".format(library=library)
)
else:
expected.append(
"output/{caller}.{library}/out/.done".format(caller=caller, library=library)
)
expected.append(
"output/{caller}.{library}/log/snakemake.gene_fusion_calling.log".format(
caller=caller, library=library
)
)
actual = set(somatic_gene_fusion_calling_workflow.get_result_files())
# Add expected for `arriba` - special case
# Out:
expected += [
"output/arriba.P002-T2-RNA1-mRNA_seq1/out/.done",
"output/arriba.P001-T1-RNA1-mRNA_seq1/out/.done",
]
base_name_out = (
"output/arriba.P00{i}-T{i}-RNA1-mRNA_seq1/{dir_}/arriba.P00{i}-T{i}-RNA1-mRNA_seq1.{ext}"
)
expected += [
base_name_out.format(i=i, dir_="out", ext=ext)
for i in (1, 2)
for ext in (
"fusions.tsv",
"fusions.tsv.md5",
"discarded_fusions.tsv.gz",
"discarded_fusions.tsv.gz.md5",
)
]
# Log
expected += [
base_name_out.format(i=i, dir_="log", ext=ext)
for i in (1, 2)
for ext in (
"log",
"log.md5",
"conda_list.txt",
"conda_list.txt.md5",
"conda_info.txt",
"conda_info.txt.md5",
)
]
base_name_log = "output/arriba.P00{i}-T{i}-RNA1-mRNA_seq1/log/{file_}"
expected += [
base_name_log.format(i=i, file_=file_)
for i in (1, 2)
for file_ in (
"Log.out",
"Log.out.md5",
"Log.std.out",
"Log.std.out.md5",
"Log.final.out",
"Log.final.out.md5",
"SJ.out.tab",
"SJ.out.tab.md5",
"Log.out",
"Log.out.md5",
)
]
# Expected for remaining callers
# Out:
base_name_out = "output/{caller}.P00{i}-T{i}-RNA1-mRNA_seq1/out/.done"
expected += [
base_name_out.format(i=i, caller=caller)
for i in (1, 2)
for caller in (
"defuse",
"fusioncatcher",
"hera",
"jaffa",
"pizzly",
"star_fusion",
)
]
# Log:
base_name_log = (
"output/{caller}.P00{i}-T{i}-RNA1-mRNA_seq1/log/snakemake.gene_fusion_calling.log"
)
expected += [
base_name_log.format(i=i, caller=caller)
for i in (1, 2)
for caller in (
"defuse",
"fusioncatcher",
"hera",
"jaffa",
"pizzly",
"star_fusion",
)
]
expected = set(expected)

actual = set(somatic_gene_fusion_calling_workflow.get_result_files())
assert actual == expected
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def minimal_config():
path_index: /path/to/bwa/index.fasta
somatic_gene_fusion_calling:
path_link_in: /preprocess
tools: ['fusioncatcher', 'jaffa', 'arriba']
tools: ['arriba', 'fusioncatcher', 'jaffa', 'star_fusion', 'defuse', 'hera', 'pizzly']
fusioncatcher:
data_dir: REQUIRED # REQUIRED
pizzly:
Expand Down Expand Up @@ -385,20 +385,104 @@ def test_arriba_step_part_get_resource_usage(somatic_gene_fusion_calling_workflo
# Tests for SomaticGeneFusionCallingWorkflow -------------------------------------------------------


# def test_somatic_gene_fusion_calling_workflow(somatic_gene_fusion_calling_workflow):
# """Test simple functionality of the workflow"""
# # Check created sub steps
# expected = ["link_out", "mantis"]
# assert list(sorted(somatic_gene_fusion_calling_workflow.sub_steps.keys())) == expected
# # Check result file construction
# expected = [
# "output/mantis.bwa.P001-T1-DNA1-WGS1/out/mantis.bwa.P001-T1-DNA1-WGS1_results.txt",
# "output/mantis.bwa.P001-T1-DNA1-WGS1/out/mantis.bwa.P001-T1-DNA1-WGS1_results.txt.status",
# "output/mantis.bwa.P002-T1-DNA1-WGS1/out/mantis.bwa.P002-T1-DNA1-WGS1_results.txt",
# "output/mantis.bwa.P002-T1-DNA1-WGS1/out/mantis.bwa.P002-T1-DNA1-WGS1_results.txt.status",
# "output/mantis.bwa.P002-T2-DNA1-WGS1/out/mantis.bwa.P002-T2-DNA1-WGS1_results.txt",
# "output/mantis.bwa.P002-T2-DNA1-WGS1/out/mantis.bwa.P002-T2-DNA1-WGS1_results.txt.status",
# ]
# actual = set(somatic_gene_fusion_calling_workflow.get_result_files())
# expected = set(expected)
# assert actual == expected
def test_somatic_gene_fusion_calling_workflow(somatic_gene_fusion_calling_workflow):
"""Test simple functionality of the workflow"""
# Check created sub steps
expected = [
"arriba",
"defuse",
"fusioncatcher",
"hera",
"jaffa",
"pizzly",
"star_fusion",
"link_in",
"link_out",
]
assert list(sorted(somatic_gene_fusion_calling_workflow.sub_steps.keys())) == sorted(expected)
# Check result file construction
expected = []
# Add expected for `arriba` - special case
# Out:
expected += [
"output/arriba.P002-T2-RNA1-mRNA_seq1/out/.done",
"output/arriba.P001-T1-RNA1-mRNA_seq1/out/.done",
]
base_name_out = (
"output/arriba.P00{i}-T{i}-RNA1-mRNA_seq1/{dir_}/arriba.P00{i}-T{i}-RNA1-mRNA_seq1.{ext}"
)
expected += [
base_name_out.format(i=i, dir_="out", ext=ext)
for i in (1, 2)
for ext in (
"fusions.tsv",
"fusions.tsv.md5",
"discarded_fusions.tsv.gz",
"discarded_fusions.tsv.gz.md5",
)
]
# Log
expected += [
base_name_out.format(i=i, dir_="log", ext=ext)
for i in (1, 2)
for ext in (
"log",
"log.md5",
"conda_list.txt",
"conda_list.txt.md5",
"conda_info.txt",
"conda_info.txt.md5",
)
]
base_name_log = "output/arriba.P00{i}-T{i}-RNA1-mRNA_seq1/log/{file_}"
expected += [
base_name_log.format(i=i, file_=file_)
for i in (1, 2)
for file_ in (
"Log.out",
"Log.out.md5",
"Log.std.out",
"Log.std.out.md5",
"Log.final.out",
"Log.final.out.md5",
"SJ.out.tab",
"SJ.out.tab.md5",
"Log.out",
"Log.out.md5",
)
]
# Expected for remaining callers
# Out:
base_name_out = "output/{caller}.P00{i}-T{i}-RNA1-mRNA_seq1/out/.done"
expected += [
base_name_out.format(i=i, caller=caller)
for i in (1, 2)
for caller in (
"defuse",
"fusioncatcher",
"hera",
"jaffa",
"pizzly",
"star_fusion",
)
]
# Log:
base_name_log = (
"output/{caller}.P00{i}-T{i}-RNA1-mRNA_seq1/log/snakemake.gene_fusion_calling.log"
)
expected += [
base_name_log.format(i=i, caller=caller)
for i in (1, 2)
for caller in (
"defuse",
"fusioncatcher",
"hera",
"jaffa",
"pizzly",
"star_fusion",
)
]
expected = set(expected)

actual = set(somatic_gene_fusion_calling_workflow.get_result_files())
assert actual == expected

0 comments on commit 63d3a72

Please sign in to comment.