From 2e2f1225eda884f23ec6ebaed872c4d748e7c2a6 Mon Sep 17 00:00:00 2001 From: alsmith Date: Fri, 26 Apr 2024 17:38:39 +0100 Subject: [PATCH 1/5] Refactor HeatmapFiles class to include make_heatmaps property in design.py --- seqnado/design.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/seqnado/design.py b/seqnado/design.py index ce278c8b..541e4275 100644 --- a/seqnado/design.py +++ b/seqnado/design.py @@ -941,6 +941,7 @@ def files(self) -> List[str]: class HeatmapFiles(BaseModel): assay: Literal["ChIP", "ATAC", "RNA", "SNP"] + make_heatmaps: bool = False @property def heatmap_files(self) -> List[str]: @@ -952,7 +953,10 @@ def heatmap_files(self) -> List[str]: @computed_field @property def files(self) -> List[str]: - return self.heatmap_files + if self.make_heatmaps: + return self.heatmap_files + else: + return [] class HubFiles(BaseModel): From efa0f0c78cd44baad20f172c4e16d0dcf8acee5c Mon Sep 17 00:00:00 2001 From: alsmith Date: Fri, 26 Apr 2024 17:55:40 +0100 Subject: [PATCH 2/5] Refactor NonRNAOutput class in design.py to remove computed_field decorator --- seqnado/design.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/seqnado/design.py b/seqnado/design.py index 541e4275..5d1e10f1 100644 --- a/seqnado/design.py +++ b/seqnado/design.py @@ -1158,7 +1158,6 @@ def merged_peaks(self): prefix="seqnado_output/peaks/merged/", ) - @computed_field @property def peaks(self) -> List[str]: pcf_samples = PeakCallingFiles( @@ -1225,8 +1224,9 @@ def peaks(self): ip_sample_names = [ s for s in self.sample_names - if any([c not in s for c in self.control_names]) + if not any([c in s for c in self.control_names]) ] + pcf_samples = PeakCallingFiles( assay=self.assay, names=ip_sample_names, @@ -1250,7 +1250,6 @@ def spikeins(self): ) return sif.files - @computed_field @property def files(self) -> List[str]: files = [] From 65bc64ccddf4f038df5bfab60d9e4b408f33717c Mon Sep 17 00:00:00 2001 From: alsmith Date: Fri, 26 Apr 2024 18:31:15 +0100 Subject: [PATCH 3/5] fix: correct query function to provide both the ip and control if requested --- seqnado/design.py | 19 +++++++-- seqnado/workflow/rules/peak_call_chip.smk | 52 +++++++++-------------- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/seqnado/design.py b/seqnado/design.py index 5d1e10f1..774c5878 100644 --- a/seqnado/design.py +++ b/seqnado/design.py @@ -480,7 +480,7 @@ def controls_performed(self) -> List[str]: control.add(f.control_performed) return list(control) - def query(self, sample_name: str) -> FastqSetIP: + def query(self, sample_name: str, full_experiment: bool = False) -> Union[FastqSetIP, Dict[str, FastqSetIP]]: """ Extracts a pair of fastq files from the design. """ @@ -488,18 +488,31 @@ def query(self, sample_name: str) -> FastqSetIP: control_names = set( f.control_fullname for f in self.experiments if f.has_control ) + is_control = False + + experiment_files = dict() if sample_name in ip_names or sample_name in control_names: for experiment in self.experiments: if experiment.ip_set_fullname == sample_name: - return experiment.ip + experiment_files["ip"] = experiment.ip + experiment_files["control"] = experiment.control + elif ( experiment.has_control and experiment.control_fullname == sample_name ): - return experiment.control + is_control = True + experiment_files["ip"] = experiment.ip + experiment_files["control"] = experiment.control else: raise ValueError(f"Could not find sample with name {sample_name}") + + + if full_experiment: + return experiment_files + else: + return experiment_files["ip"] if not is_control else experiment_files["control"] @classmethod def from_fastq_files(cls, fq: List[Union[str, pathlib.Path]], **kwargs): diff --git a/seqnado/workflow/rules/peak_call_chip.smk b/seqnado/workflow/rules/peak_call_chip.smk index 4bdf62e6..f494e36b 100644 --- a/seqnado/workflow/rules/peak_call_chip.smk +++ b/seqnado/workflow/rules/peak_call_chip.smk @@ -25,37 +25,27 @@ def format_macs_options(wildcards, options): return options -def get_control_bam(wildcards): - exp = DESIGN.query(sample_name=f"{wildcards.sample}_{wildcards.treatment}") - if exp: - control = f"seqnado_output/aligned/{wildcards.sample}_{exp.ip_or_control_name}.bam" - else: - control = "UNDEFINED" - return control - - -def get_control_tag(wildcards): - exp = DESIGN.query(sample_name=f"{wildcards.sample}_{wildcards.treatment}") - if not exp: - control = "UNDEFINED" - else: - control = f"seqnado_output/tag_dirs/{wildcards.sample}_{exp.ip_or_control_name}" - return control - - -def get_control_bigwig(wildcards): - exp = DESIGN.query(sample_name=f"{wildcards.sample}_{wildcards.treatment}") - if not exp: - control = "UNDEFINED" - else: - control = f"seqnado_output/bigwigs/deeptools/unscaled/{wildcards.sample}_{exp.ip_or_control_name}.bigWig" - return control +def get_control_file(wildcards, file_type: Literal["bam", "tag", "bigwig"], allow_null=False): + exp = DESIGN.query(sample_name=f"{wildcards.sample}_{wildcards.treatment}", full_experiment=True) + + if not exp["control"] and not allow_null: # if control is not defined, return UNDEFINED. This is to prevent the rule from running + return "UNDEFINED" + elif not exp["control"] and allow_null: # if control is not defined, return empty list + return [] + + match file_type: + case "bam": + return f"seqnado_output/aligned/{exp['control'].name}.bam" + case "tag": + return f"seqnado_output/tag_dirs/{exp['control'].name}" + case "bigwig": + return f"seqnado_output/bigwigs/deeptools/unscaled/{exp['control'].name}.bigWig" rule macs2_with_input: input: treatment="seqnado_output/aligned/{sample}_{treatment}.bam", - control=get_control_bam, + control=lambda wc: get_control_file(wc, file_type="bam", allow_null=False), output: peaks="seqnado_output/peaks/macs/{sample}_{treatment}.bed", params: @@ -78,7 +68,7 @@ rule macs2_with_input: rule macs2_no_input: input: treatment="seqnado_output/aligned/{sample}_{treatment}.bam", - control=lambda wc: [] if get_control_bam(wc) == "UNDEFINED" else get_control_bam(wc), + control=lambda wc: get_control_file(wc, file_type="bam", allow_null=True), output: peaks="seqnado_output/peaks/macs/{sample}_{treatment}.bed", params: @@ -101,7 +91,7 @@ rule macs2_no_input: rule homer_with_input: input: treatment="seqnado_output/tag_dirs/{sample}_{treatment}", - control=get_control_tag, + control=lambda wc: get_control_file(wc, file_type="tag", allow_null=False), output: peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed", log: @@ -123,7 +113,7 @@ rule homer_with_input: rule homer_no_input: input: treatment="seqnado_output/tag_dirs/{sample}_{treatment}", - control=lambda wc: [] if get_control_tag(wc) == "UNDEFINED" else get_control_tag(wc), + control=lambda wc: get_control_file(wc, file_type="tag", allow_null=True), output: peaks="seqnado_output/peaks/homer/{sample}_{treatment}.bed", log: @@ -145,7 +135,7 @@ rule homer_no_input: rule lanceotron_with_input: input: treatment="seqnado_output/bigwigs/deeptools/unscaled/{sample}_{treatment}.bigWig", - control=get_control_bigwig, + control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=False), output: peaks="seqnado_output/peaks/lanceotron/{sample}_{treatment}.bed", log: @@ -170,7 +160,7 @@ rule lanceotron_with_input: rule lanceotron_no_input: input: treatment="seqnado_output/bigwigs/deeptools/unscaled/{sample}_{treatment}.bigWig", - control=lambda wc: [] if get_control_bigwig(wc) == "UNDEFINED" else get_control_bigwig(wc), + control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=False), output: peaks="seqnado_output/peaks/lanceotron/{sample}_{treatment}.bed", log: From 6036f76eceab3c3a7da5471a8142dbe7f55f24a8 Mon Sep 17 00:00:00 2001 From: alsmith Date: Fri, 26 Apr 2024 18:41:37 +0100 Subject: [PATCH 4/5] Refactor control lambda function in peak_call_chip.smk to allow null control files --- seqnado/workflow/rules/peak_call_chip.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqnado/workflow/rules/peak_call_chip.smk b/seqnado/workflow/rules/peak_call_chip.smk index f494e36b..652d021f 100644 --- a/seqnado/workflow/rules/peak_call_chip.smk +++ b/seqnado/workflow/rules/peak_call_chip.smk @@ -160,7 +160,7 @@ rule lanceotron_with_input: rule lanceotron_no_input: input: treatment="seqnado_output/bigwigs/deeptools/unscaled/{sample}_{treatment}.bigWig", - control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=False), + control=lambda wc: get_control_file(wc, file_type="bigwig", allow_null=True), output: peaks="seqnado_output/peaks/lanceotron/{sample}_{treatment}.bed", log: From 686825c54b83398436d5ca81023e8cc9defc52c4 Mon Sep 17 00:00:00 2001 From: alsmith Date: Fri, 26 Apr 2024 18:58:09 +0100 Subject: [PATCH 5/5] Refactor query function in DesignIP class to return IPExperiment object --- seqnado/design.py | 18 ++++++++---------- seqnado/workflow/rules/peak_call_chip.smk | 11 ++++++----- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/seqnado/design.py b/seqnado/design.py index 774c5878..4937b247 100644 --- a/seqnado/design.py +++ b/seqnado/design.py @@ -480,7 +480,7 @@ def controls_performed(self) -> List[str]: control.add(f.control_performed) return list(control) - def query(self, sample_name: str, full_experiment: bool = False) -> Union[FastqSetIP, Dict[str, FastqSetIP]]: + def query(self, sample_name: str, full_experiment: bool = False) -> Union[FastqSetIP, IPExperiment]: """ Extracts a pair of fastq files from the design. """ @@ -490,29 +490,27 @@ def query(self, sample_name: str, full_experiment: bool = False) -> Union[FastqS ) is_control = False - experiment_files = dict() - if sample_name in ip_names or sample_name in control_names: for experiment in self.experiments: if experiment.ip_set_fullname == sample_name: - experiment_files["ip"] = experiment.ip - experiment_files["control"] = experiment.control - + exp = experiment + break elif ( experiment.has_control and experiment.control_fullname == sample_name ): is_control = True - experiment_files["ip"] = experiment.ip - experiment_files["control"] = experiment.control + exp = experiment + break + else: raise ValueError(f"Could not find sample with name {sample_name}") if full_experiment: - return experiment_files + return exp else: - return experiment_files["ip"] if not is_control else experiment_files["control"] + return exp.ip if not is_control else exp.control @classmethod def from_fastq_files(cls, fq: List[Union[str, pathlib.Path]], **kwargs): diff --git a/seqnado/workflow/rules/peak_call_chip.smk b/seqnado/workflow/rules/peak_call_chip.smk index 652d021f..cea624fb 100644 --- a/seqnado/workflow/rules/peak_call_chip.smk +++ b/seqnado/workflow/rules/peak_call_chip.smk @@ -28,18 +28,19 @@ def format_macs_options(wildcards, options): def get_control_file(wildcards, file_type: Literal["bam", "tag", "bigwig"], allow_null=False): exp = DESIGN.query(sample_name=f"{wildcards.sample}_{wildcards.treatment}", full_experiment=True) - if not exp["control"] and not allow_null: # if control is not defined, return UNDEFINED. This is to prevent the rule from running + if not exp.has_control and not allow_null: # if control is not defined, return UNDEFINED. This is to prevent the rule from running return "UNDEFINED" - elif not exp["control"] and allow_null: # if control is not defined, return empty list + elif not exp.has_control and allow_null: # if control is not defined, return empty list return [] match file_type: case "bam": - return f"seqnado_output/aligned/{exp['control'].name}.bam" + fn = f"seqnado_output/aligned/{exp.control_fullname}.bam" case "tag": - return f"seqnado_output/tag_dirs/{exp['control'].name}" + fn = f"seqnado_output/tag_dirs/{exp.control_fullname}" case "bigwig": - return f"seqnado_output/bigwigs/deeptools/unscaled/{exp['control'].name}.bigWig" + fn = f"seqnado_output/bigwigs/deeptools/unscaled/{exp.control_fullname}.bigWig" + return fn rule macs2_with_input: