Skip to content

Commit

Permalink
Update simulated reads workflow to add fastp and cutadapt.
Browse files Browse the repository at this point in the history
  • Loading branch information
jdidion committed Mar 12, 2018
1 parent 35f4c37 commit a00d85a
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 10 deletions.
1 change: 1 addition & 0 deletions paper/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,4 @@ From the 'containers' directory, run:
* These papers do a nice job of benchmarking trimmers. Consider adding some more benchmarks.
* http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-16-S1-S2
* https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-015-0454-y
* Make machine info work on OSX (currently requires /proc/cpuinfo and /proc/meminfo)
4 changes: 2 additions & 2 deletions paper/containers/tools/tool-names.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ atropos (adapter) Atropos (adapter-match)
atropos (insert) Atropos (insert-match)
seqpurge SeqPurge
skewer Skewer
fastp fastp
cutadapt Cutadapt
fastp fastp
cutadapt Cutadapt
bwa BWA
bwameth bwa-meth
star STAR
Expand Down
52 changes: 44 additions & 8 deletions paper/workflow/simulated.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
* - Reduce params.batchSize
* - If you have all of the software installed locally, you can disable
* Docker/Singularity by commenting out the 'container' directives.
*
* Note: When running the 'local' profile, a fake .jobid file is generated in
* order to satisfy the output file requirement, even though the tasks that
* process .jobid files are not run. Hopefully a future version of nextflow
* will support conditional output files.
*/

// An absolute path to the container image is required for Singularity but
Expand Down Expand Up @@ -131,6 +136,9 @@ process Atropos {
alignerArgs = "-O 7"
}
"""
if [ "${workflow.profile}" == "local" ]; then
touch .jobid
fi
cat /proc/cpuinfo /proc/meminfo > ${taskId}.machine_info.txt
/usr/bin/time -v -o ${taskId}.timing.txt atropos trim \
--op-order GACQW -T $task.cpus --batch-size $params.batchSize \
Expand Down Expand Up @@ -172,6 +180,9 @@ process Skewer {
script:
taskId = "skewer_${task.cpus}_${err}_q${qcut}"
"""
if [ "${workflow.profile}" == "local" ]; then
touch .jobid
fi
cat /proc/cpuinfo /proc/meminfo > ${taskId}.machine_info.txt
/usr/bin/time -v -o ${taskId}.timing.txt skewer \
-m pe -l $params.minLength -r 0.2 \
Expand Down Expand Up @@ -210,6 +221,9 @@ process SeqPurge {
script:
taskId = "seqpurge_${task.cpus}_${err}_q${qcut}"
"""
if [ "${workflow.profile}" == "local" ]; then
touch .jobid
fi
cat /proc/cpuinfo /proc/meminfo > ${taskId}.machine_info.txt
/usr/bin/time -v -o ${taskId}.timing.txt SeqPurge \
-in1 ${reads[0]} -in2 ${reads[1]} \
Expand Down Expand Up @@ -247,6 +261,9 @@ process AdapterRemoval {
script:
taskId = "adapterremoval_${task.cpus}_${err}_q${qcut}"
"""
if [ "${workflow.profile}" == "local" ]; then
touch .jobid
fi
cat /proc/cpuinfo /proc/meminfo > ${taskId}.machine_info.txt
/usr/bin/time -v -o ${taskId}.timing.txt AdapterRemoval \
--file1 ${reads[0]} --file2 ${reads[1]} \
Expand Down Expand Up @@ -283,13 +300,23 @@ process fastp {

script:
taskId = "fastp_${task.cpus}_${err}_q${qcut}"
cutArgs = null
if ( qcut > 0) {
cutArgs = "--cut_by_quality3 --cut_mean_quality $qcut"
}
else {
cutArgs = ""
}
"""
if [ "${workflow.profile}" == "local" ]; then
touch .jobid
fi
cat /proc/cpuinfo /proc/meminfo > ${taskId}.machine_info.txt
/usr/bin/time -v -o ${taskId}.timing.txt fastp \
-i ${reads[0]} -I ${reads[1]} -o ${taskId}.1.fq.gz -O ${taskId}.2.fq.gz
--adapter_sequence $params.adapter1 --adapter_sequence_r2 $params.adapter2
--thread $task.cpus --cut_by_quality3 --cut_mean_quality $qcut
--length_required $params.minLength --disable_quality_filtering
-i ${reads[0]} -I ${reads[1]} -o ${taskId}.1.fq.gz -O ${taskId}.2.fq.gz \
--adapter_sequence $params.adapter1 --adapter_sequence_r2 $params.adapter2 \
--thread $task.cpus $cutArgs \
--length_required $params.minLength --disable_quality_filtering \
> "${taskId}.report.txt"
"""
}
Expand Down Expand Up @@ -320,11 +347,14 @@ process Cutadapt {
script:
taskId = "cutadapt_${task.cpus}_${err}_q${qcut}"
"""
if [ "${workflow.profile}" == "local" ]; then
touch .jobid
fi
cat /proc/cpuinfo /proc/meminfo > ${taskId}.machine_info.txt
/usr/bin/time -v -o ${taskId}.timing.txt cutadapt \
-j $task.cpus -O 7 -q $qcut --trim-n \
-a $params.adapter1 -A $params.adapter2 -m $params.minLength \
-o ${taskId}.1.fq.gz -p ${taskId}.2.fq.gz ${reads[0]} ${reads[1]}
-o ${taskId}.1.fq.gz -p ${taskId}.2.fq.gz ${reads[0]} ${reads[1]} \
> "${taskId}.report.txt"
"""
}
Expand Down Expand Up @@ -520,7 +550,7 @@ process CreateMachineTable {

input:
val parsedRows from machineParsed.toList()

output:
file "machine_info.txt"

Expand Down Expand Up @@ -582,7 +612,10 @@ process ParseJob {

input:
set val(name), file(jobSummaryFile) from jobSummary


when:
workflow.profile == 'cluster'

output:
stdout jobParsed

Expand All @@ -604,7 +637,10 @@ process ShowJobMemoryUsage {

input:
val parsedJobs from jobParsed.toList()


when:
workflow.profile == 'cluster'

output:
file "job.mem.pickle"
file "job.mem.tex"
Expand Down

0 comments on commit a00d85a

Please sign in to comment.