From c341b3783e78831b7bfa276dd582b949d3631656 Mon Sep 17 00:00:00 2001 From: Shaun Jackman Date: Mon, 22 Oct 2018 16:36:22 -0700 Subject: [PATCH] Add porechop --- changelog.md | 1 + .../components/reads_quality_control.py | 34 +++++++++++++++++++ flowcraft/generator/engine.py | 1 + flowcraft/generator/templates/porechop.nf | 28 +++++++++++++++ 4 files changed, 64 insertions(+) create mode 100644 flowcraft/generator/templates/porechop.nf diff --git a/changelog.md b/changelog.md index 41ef21ae..00979308 100644 --- a/changelog.md +++ b/changelog.md @@ -22,6 +22,7 @@ resolution - Added component `abyss`. - Added component `bandage`. +- Added component `porechop`. - Added component `unicycler`. ### Minor/Other changes diff --git a/flowcraft/generator/components/reads_quality_control.py b/flowcraft/generator/components/reads_quality_control.py index de0426c4..04b1aecd 100644 --- a/flowcraft/generator/components/reads_quality_control.py +++ b/flowcraft/generator/components/reads_quality_control.py @@ -433,3 +433,37 @@ def __init__(self, **kwargs): self.status_channels = [ "downsample_fastq" ] + +class Porechop(Process): + """Porechop trims adapters from Oxford Nanopore reads. + + This process is set with: + + - ``input_type``: fastq + - ``output_type``: fastq + - ``ptype``: pre_assembly + """ + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + self.input_type = "fastq" + self.output_type = "fastq" + + self.link_end.append({"link": "raw_long_reads", "alias": "raw_long_reads"}) + self.link_start.append("long_reads") + + self.params = { + "long_reads": { + "default": "null", + "description": "FASTQ or FASTA file of long reads" + }, + } + + self.directives = { + "porechop": { + "cpus": 4, + "container": "quay.io/biocontainers/porechop", + "version": "0.2.3_seqan2.1.1--py36h2d50403_3" + } + } diff --git a/flowcraft/generator/engine.py b/flowcraft/generator/engine.py index 4f509d38..4edc3ede 100644 --- a/flowcraft/generator/engine.py +++ b/flowcraft/generator/engine.py @@ -88,6 +88,7 @@ "momps": typing.Momps, "patho_typing": typing.PathoTyping, "pilon": ap.Pilon, + "porechop": readsqc.Porechop, "process_skesa": ap.ProcessSkesa, "process_spades": ap.ProcessSpades, "progressive_mauve":alignment.ProgressiveMauve, diff --git a/flowcraft/generator/templates/porechop.nf b/flowcraft/generator/templates/porechop.nf new file mode 100644 index 00000000..99f3507c --- /dev/null +++ b/flowcraft/generator/templates/porechop.nf @@ -0,0 +1,28 @@ +// True when a raw_long_reads secondary channel is connected to this component. +has_raw_long_reads_{{pid}} = binding.hasVariable('raw_long_reads_{{pid}}') + +process porechop_{{pid}} { + {% include "post.txt" ignore missing %} + + publishDir "results/porechop_{{pid}}", pattern: "*.fastq.gz" + publishDir "reports/porechop_{{pid}}", pattern: "*.log" + + tag { sample_id } + + input: + set sample_id, file(fastq_pair) from {{input_channel}} + file raw_long_reads from has_raw_long_reads_{{pid}} ? raw_long_reads_{{pid}} : + Channel.fromPath(params.long_reads{{param_id}}) + + output: + set sample_id, file(fastq_pair) into {{output_channel}} + file "${sample_id}.fastq.gz" into long_reads_{{pid}} + {% with task_name="porechop" %} + {%- include "compiler_channels.txt" ignore missing -%} + {% endwith %} + + script: + "time porechop -t $task.cpus --format fastq.gz -i ${raw_long_reads} -o ${sample_id}.fastq.gz >${sample_id}.log" +} + +{{ forks }}