Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Reinstate snp pipeline #185

Merged
merged 21 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file modified .github/dependabot.yml
100644 → 100755
Empty file.
Empty file modified .github/workflows/build_docs.yml
100644 → 100755
Empty file.
Empty file modified .github/workflows/publish_pypi_release.yml
100644 → 100755
Empty file.
Empty file modified .github/workflows/test_pipelines.yml
100644 → 100755
Empty file.
Empty file modified .gitignore
100644 → 100755
Empty file.
Empty file modified .pre-commit-config.yaml
100644 → 100755
Empty file.
Empty file modified LICENSE
100644 → 100755
Empty file.
Empty file modified MANIFEST.in
100644 → 100755
Empty file.
Empty file modified README.md
100644 → 100755
Empty file.
Empty file modified conftest.py
100644 → 100755
Empty file.
Empty file modified docs/cluster_config.md
100644 → 100755
Empty file.
Empty file modified docs/faq.md
100644 → 100755
Empty file.
Empty file modified docs/index.md
100644 → 100755
Empty file.
Empty file modified docs/installation.md
100644 → 100755
Empty file.
Empty file modified docs/pipeline.md
100644 → 100755
Empty file.
Empty file modified environment.yml
100644 → 100755
Empty file.
Empty file modified environment_minimal.yml
100644 → 100755
Empty file.
Empty file modified mkdocs.yml
100644 → 100755
Empty file.
Empty file modified pyproject.toml
100644 → 100755
Empty file.
Empty file modified seqnado/__init__.py
100644 → 100755
Empty file.
Empty file modified seqnado/cli.py
100644 → 100755
Empty file.
104 changes: 84 additions & 20 deletions seqnado/config.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
template_dir = os.path.join(package_dir, "workflow/config")


# Helper Functions
def get_user_input(prompt, default=None, is_boolean=False, choices=None):
while True:
user_input = (
Expand Down Expand Up @@ -62,7 +61,7 @@ def setup_configuration(assay, genome, template_data):
if genome in genome_values:
genome_dict[genome] = {
"indices": genome_values[genome].get(
"bt2_indices" if assay in ["chip", "atac"] else "star_indices", ""
"star_indices" if assay in ["rna"] else "bt2_indices"
),
"chromosome_sizes": genome_values[genome].get("chromosome_sizes", ""),
"gtf": genome_values[genome].get("gtf", ""),
Expand Down Expand Up @@ -143,19 +142,26 @@ def setup_configuration(assay, genome, template_data):
)

# Make bigwigs
template_data["make_bigwigs"] = get_user_input(
"Do you want to make bigwigs? (yes/no)", default="no", is_boolean=True
)
if template_data["make_bigwigs"]:
template_data["pileup_method"] = get_user_input(
"Pileup method:", default="deeptools", choices=["deeptools", "homer"]
)
template_data["scale"] = get_user_input(
"Scale bigwigs? (yes/no)", default="no", is_boolean=True
)
template_data["make_heatmaps"] = get_user_input(
"Do you want to make heatmaps? (yes/no)", default="no", is_boolean=True
if assay not in ["snp"]:
template_data["make_bigwigs"] = get_user_input(
"Do you want to make bigwigs? (yes/no)", default="no", is_boolean=True
)
if template_data["make_bigwigs"]:
template_data["pileup_method"] = get_user_input(
"Pileup method:",
default="deeptools",
choices=["deeptools", "homer"],
)
template_data["scale"] = get_user_input(
"Scale bigwigs? (yes/no)", default="no", is_boolean=True
)
template_data["make_heatmaps"] = get_user_input(
"Do you want to make heatmaps? (yes/no)", default="no", is_boolean=True
)
else:
template_data["pileup_method"] = "False"
template_data["scale"] = "False"
template_data["make_heatmaps"] = "False"

# Call peaks
if assay in ["chip", "atac"]:
Expand Down Expand Up @@ -196,15 +202,47 @@ def setup_configuration(assay, genome, template_data):
else "False"
)

# SNP options
template_data["call_snps"] = (
get_user_input("Call SNPs? (yes/no)", default="no", is_boolean=True)
if assay == "snp"
else "False"
)
if assay == "snp" and template_data["call_snps"]:

template_data["snp_calling_method"] = get_user_input(
"SNP caller:",
default="bcftools",
choices=["bcftools", "deepvariant"],
)

template_data["fasta"] = get_user_input(
"Path to reference fasta:", default="path/to/reference.fasta"
)

template_data["fasta_index"] = get_user_input(
"Path to reference fasta index:", default="path/to/reference.fasta.fai"
)

template_data["snp_database"] = get_user_input(
"Path to SNP database:",
default="path/to/snp_database",
)
else:
template_data["snp_calling_method"] = "False"
template_data["fasta"] = "False"
template_data["fasta_index"] = "False"
template_data["snp_database"] = "False"

# Make UCSC hub
template_data["make_ucsc_hub"] = get_user_input(
"Do you want to make a UCSC hub? (yes/no)", default="no", is_boolean=True
)

template_data["UCSC_hub_directory"] = (
get_user_input("UCSC hub directory:", default="/path/to/ucsc_hub/")
get_user_input("UCSC hub directory:", default="seqnado_output/hub/")
if template_data["make_ucsc_hub"]
else "."
else "seqnado_output/hub/"
)
template_data["email"] = (
get_user_input("What is your email address?", default=f"{username}@example.com")
Expand All @@ -218,7 +256,13 @@ def setup_configuration(assay, genome, template_data):
)

template_data["options"] = (
TOOL_OPTIONS if assay in ["chip", "atac"] else TOOL_OPTIONS_RNA
TOOL_OPTIONS
if assay in ["chip", "atac"]
else (
TOOL_OPTIONS_RNA
if assay == "rna"
else TOOL_OPTIONS_SNP if assay == "snp" else ""
)
)


Expand Down Expand Up @@ -260,8 +304,8 @@ def setup_configuration(assay, genome, template_data):
stringency: stringent

heatmap:
options:
colormap: RdYlBu_r
options: -b 1000 -m 5000 -a 1000
colormap: RdYlBu_r
"""

TOOL_OPTIONS_RNA = """
Expand Down Expand Up @@ -295,8 +339,28 @@ def setup_configuration(assay, genome, template_data):
bamcoverage: -bs 1 --normalizeUsing CPM

heatmap:
options: -b 1000 -m 5000 -a 1000
colormap: RdYlBu_r
"""


TOOL_OPTIONS_SNP = """
trim_galore:
threads: 8
options: --2colour 20

bowtie2:
threads: 8
options:

picard:
threads: 8
options:
colormap: RdYlBu_r

bcftools:
threads: 16
options:

"""


Expand Down
Empty file modified seqnado/data/logo.txt
100644 → 100755
Empty file.
109 changes: 95 additions & 14 deletions seqnado/design.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -480,27 +480,45 @@ def controls_performed(self) -> List[str]:
control.add(f.control_performed)
return list(control)

def query(self, sample_name: str) -> FastqSetIP:
def query(
self, sample_name: str, full_experiment: bool = False
) -> Union[FastqSetIP, Dict[str, FastqSetIP]]:
"""
Extracts a pair of fastq files from the design.
"""
ip_names = set(f.ip_set_fullname for f in self.experiments)
control_names = set(
f.control_fullname for f in self.experiments if f.has_control
)
is_control = False

experiment_files = dict()

if sample_name in ip_names or sample_name in control_names:
for experiment in self.experiments:
if experiment.ip_set_fullname == sample_name:
return experiment.ip
experiment_files["ip"] = experiment.ip
experiment_files["control"] = experiment.control

elif (
experiment.has_control
and experiment.control_fullname == sample_name
):
return experiment.control
is_control = True
experiment_files["ip"] = experiment.ip
experiment_files["control"] = experiment.control
else:
raise ValueError(f"Could not find sample with name {sample_name}")

if full_experiment:
return experiment_files
else:
return (
experiment_files["ip"]
if not is_control
else experiment_files["control"]
)

@classmethod
def from_fastq_files(cls, fq: List[Union[str, pathlib.Path]], **kwargs):
"""
Expand Down Expand Up @@ -718,7 +736,7 @@ def from_design(
subset_value: Optional[List[str]] = None,
include_controls: bool = False,
):

if isinstance(design, Design):
df = (
design.to_dataframe()
Expand All @@ -745,13 +763,11 @@ def from_design(
)
df = pd.concat([df_ip, df_control])


if subset_value:
df = df.query(f"{subset_column} in {subset_value}")

samples = df.index.tolist()


reference_sample = reference_sample or df.index[0]

return cls(
Expand Down Expand Up @@ -863,7 +879,13 @@ class BigWigFiles(BaseModel):
assay: Literal["ChIP", "ATAC", "RNA", "SNP"]
names: List[str]
pileup_method: Union[
Literal["deeptools", "homer"], List[Literal["deeptools", "homer"]]
Literal["deeptools", "homer", False],
List[
Literal[
"deeptools",
"homer",
]
],
] = None
make_bigwigs: bool = False
scale_method: Optional[Literal["cpm", "rpkm", "spikein", "csaw", "merged"]] = None
Expand All @@ -875,7 +897,9 @@ def model_post_init(self, __context: Any) -> None:
self.pileup_method = [self.pileup_method]

if self.include_unscaled and not self.scale_method:
self.scale_method = ["unscaled",]
self.scale_method = [
"unscaled",
]
elif self.include_unscaled and self.scale_method:
self.scale_method = ["unscaled", self.scale_method]
else:
Expand Down Expand Up @@ -941,6 +965,7 @@ def files(self) -> List[str]:

class HeatmapFiles(BaseModel):
assay: Literal["ChIP", "ATAC", "RNA", "SNP"]
make_heatmaps: bool = False

@property
def heatmap_files(self) -> List[str]:
Expand All @@ -952,7 +977,10 @@ def heatmap_files(self) -> List[str]:
@computed_field
@property
def files(self) -> List[str]:
return self.heatmap_files
if self.make_heatmaps:
return self.heatmap_files
else:
return []


class HubFiles(BaseModel):
Expand Down Expand Up @@ -1006,9 +1034,11 @@ class Output(BaseModel):
sample_names: List[str]

make_bigwigs: bool = False
pileup_method: Optional[
Union[Literal["deeptools", "homer"], List[Literal["deeptools", "homer"]]]
pileup_method: Union[
Literal["deeptools", "homer", False],
List[Literal["deeptools", "homer"]],
] = None

scale_method: Optional[Literal["cpm", "rpkm", "spikein", "csaw"]] = None

make_heatmaps: bool = False
Expand Down Expand Up @@ -1154,7 +1184,6 @@ def merged_peaks(self):
prefix="seqnado_output/peaks/merged/",
)

@computed_field
@property
def peaks(self) -> List[str]:
pcf_samples = PeakCallingFiles(
Expand Down Expand Up @@ -1221,8 +1250,9 @@ def peaks(self):
ip_sample_names = [
s
for s in self.sample_names
if any([c not in s for c in self.control_names])
if not any([c in s for c in self.control_names])
]

pcf_samples = PeakCallingFiles(
assay=self.assay,
names=ip_sample_names,
Expand All @@ -1246,7 +1276,6 @@ def spikeins(self):
)
return sif.files

@computed_field
@property
def files(self) -> List[str]:
files = []
Expand All @@ -1270,3 +1299,55 @@ def files(self) -> List[str]:
files.extend(file_list)

return files


class SNPOutput(Output):
assay: Literal["SNP"]
call_snps: bool = False
sample_names: List[str]
make_ucsc_hub: bool = False
snp_calling_method: Optional[
Union[
Literal["bcftools", "deepvariant", False],
List[Literal["bcftools", "deepvariant"]],
]
] = None

@property
def design(self):
return ["seqnado_output/design.csv"]

@property
def snp_files(self) -> List[str]:
if self.call_snps:
return expand(
"seqnado_output/variant/{method}/{sample}.vcf.gz",
sample=self.sample_names,
method=self.snp_calling_method,
)
else:
return []

@computed_field
@property
def files(self) -> List[str]:
files = []
files.extend(
QCFiles(
assay=self.assay,
fastq_screen=self.fastq_screen,
library_complexity=self.library_complexity,
).files
)

for file_list in (
self.snp_files,
self.design,
):
if file_list:
files.extend(file_list)

if self.call_snps:
files.append(self.snp_files)

return files
Empty file modified seqnado/helpers.py
100644 → 100755
Empty file.
6 changes: 5 additions & 1 deletion seqnado/workflow/config/config.yaml.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ remove_blacklist: "{{remove_blacklist}}"
blacklist: "{{blacklist}}"

remove_pcr_duplicates_method: "{{remove_pcr_duplicates_method}}"
library_complexity: "{{library_complexity}}"

shift_atac_reads: "{{shift_atac_reads}}"

Expand All @@ -44,6 +43,11 @@ salmon_index: "{{salmon_index}}"

run_deseq2: "{{run_deseq2}}"

call_snps: "{{call_snps}}"
snp_calling_method: "{{snp_calling_method}}"
fasta: "{{fasta}}"
fasta_index: "{{fasta_index}}"
snp_database: "{{snp_database}}"

make_ucsc_hub: "{{make_ucsc_hub}}"
ucsc_hub_details:
Expand Down
Empty file modified seqnado/workflow/envs/environment.yml
100644 → 100755
Empty file.
Empty file modified seqnado/workflow/envs/profiles/profile_singularity/config.yaml
100644 → 100755
Empty file.
Empty file.
Empty file modified seqnado/workflow/envs/profiles/profile_test/config.v8+.yaml
100644 → 100755
Empty file.
Empty file modified seqnado/workflow/rules/align.smk
100644 → 100755
Empty file.
Empty file modified seqnado/workflow/rules/align_rna.smk
100644 → 100755
Empty file.
Empty file modified seqnado/workflow/rules/alignment_counts.smk
100644 → 100755
Empty file.
Empty file modified seqnado/workflow/rules/alignment_post_processing.smk
100644 → 100755
Empty file.
Loading
Loading