Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,15 @@

## Changes in upcoming release (`dev` branch)

### Features

- Added a new `clearInput` parameter to components that change their input.
The aim of this option is to allow the controlled removal of temporary files,
which is particularly useful in very large workflows.

### Components changes

Updated images for components `mash_dist`, `mash_screen` and
- Updated images for components `mash_dist`, `mash_screen` and
`mapping_patlas`.

### New components
Expand All @@ -14,6 +20,8 @@ Updated images for components `mash_dist`, `mash_screen` and

- Added `--export-directives` option to `build` mode to export component's
directives in JSON format to standard output.
- Added more date information in `inspect` mode, including the year and the
locale of the executing system.

## 1.3.0

Expand Down
35 changes: 32 additions & 3 deletions flowcraft/generator/components/assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ def __init__(self, **kwargs):
"If 'auto' the SPAdes k-mer lengths will be determined "
"from the maximum read length of each assembly. If "
"'default', SPAdes will use the default k-mer lengths. "
},
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}

Expand Down Expand Up @@ -81,6 +89,18 @@ def __init__(self, **kwargs):
"scratch": "true"
}}

self.params = {
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}


class ViralAssembly(Process):
"""
Process to assemble viral genomes, based on SPAdes and megahit
Expand All @@ -95,7 +115,8 @@ def __init__(self, **kwargs):

self.dependencies = ["integrity_coverage"]

self.status_channels = ["va_spades" , "va_megahit", "report_viral_assembly"]
self.status_channels = ["va_spades", "va_megahit",
"report_viral_assembly"]

self.link_end.append({"link": "SIDE_max_len", "alias": "SIDE_max_len"})

Expand All @@ -105,7 +126,7 @@ def __init__(self, **kwargs):
"container": "flowcraft/viral_assembly",
"version": "0.1-1",
"scratch": "true"
},"va_megahit": {
}, "va_megahit": {
"cpus": 4,
"memory": "{ 5.GB * task.attempt }",
"container": "flowcraft/viral_assembly",
Expand Down Expand Up @@ -146,5 +167,13 @@ def __init__(self, **kwargs):
"from the maximum read length of each assembly. If "
"'default', megahit will use the default k-mer lengths. "
"(default: $params.megahitKmers)"
},
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}
}
11 changes: 11 additions & 0 deletions flowcraft/generator/components/assembly_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,17 @@ def __init__(self, **kwargs):
self.link_end.append({"link": "SIDE_BpCoverage",
"alias": "SIDE_BpCoverage"})

self.params = {
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}

self.directives = {
"pilon": {
"cpus": 4,
Expand Down
20 changes: 19 additions & 1 deletion flowcraft/generator/components/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@ def __init__(self, **kwargs):
"default": "null",
"description": "Specifies the reference indexes to be provided "
"to bowtie2."
},
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}

Expand All @@ -55,8 +63,10 @@ def __init__(self, **kwargs):
"report_bowtie"
]


class Retrieve_mapped(Process):
"""Samtools process to to align short paired-end sequencing reads to long reference sequences
"""Samtools process to to align short paired-end sequencing reads to
long reference sequences

This process is set with:

Expand All @@ -74,6 +84,14 @@ def __init__(self, **kwargs):
self.output_type = "fastq"

self.params = {
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}

self.dependencies = ["bowtie"]
Expand Down
33 changes: 33 additions & 0 deletions flowcraft/generator/components/metagenomics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
except ImportError:
from flowcraft.generator.process import Process


class Kraken(Process):
"""kraken process template interface

Expand Down Expand Up @@ -80,6 +81,14 @@ def __init__(self, **kwargs):
"default": 0.9,
"description": "probability threshold for EM final classification."
"Default: 0.9"
},
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}

Expand Down Expand Up @@ -129,6 +138,14 @@ def __init__(self, **kwargs):
"from the maximum read length of each assembly. If "
"'default', megahit will use the default k-mer lengths. "
"(default: $params.megahitKmers)"
},
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}

Expand Down Expand Up @@ -173,6 +190,14 @@ def __init__(self, **kwargs):
"from the maximum read length of each assembly. If "
"'default', metaSPAdes will use the default k-mer lengths. "
"(default: $params.metaspadesKmers)"
},
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}

Expand Down Expand Up @@ -245,6 +270,14 @@ def __init__(self, **kwargs):
"default": "'/index_hg19/hg19'",
"description": "Specifies the reference indexes to be provided "
"to bowtie2."
},
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}

Expand Down
41 changes: 37 additions & 4 deletions flowcraft/generator/components/reads_quality_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,14 @@ def __init__(self, **kwargs):
"default": "55",
"description":
"Drop the read if it is below a specified length "
},
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}

Expand Down Expand Up @@ -292,6 +300,14 @@ def __init__(self, **kwargs):
"default": "55",
"description":
"Drop the read if it is below a specified length."
},
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}

Expand Down Expand Up @@ -334,9 +350,18 @@ def __init__(self, **kwargs):
"default": "'A 50%; T 50%; N 50%'",
"description":
"Pattern to filter the reads. Please separate parameter"
"values with a space and separate new parameter sets with semicolon (;)."
"Parameters are defined by two values: the pattern (any combination of the"
"letters ATCGN), and the number of repeats or percentage of occurence."
"values with a space and separate new parameter sets with"
" semicolon (;). Parameters are defined by two values: "
"the pattern (any combination of the letters ATCGN), and "
"the number of repeats or percentage of occurence."
},
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}

Expand Down Expand Up @@ -381,6 +406,14 @@ def __init__(self, **kwargs):
"description":
"Maximum estimated depth coverage allowed. FastQ with "
"higher estimated depth will be subsampled to this value."
},
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}

Expand All @@ -393,4 +426,4 @@ def __init__(self, **kwargs):

self.status_channels = [
"downsample_fastq"
]
]
11 changes: 11 additions & 0 deletions flowcraft/generator/components/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,17 @@ def __init__(self, **kwargs):

self.link_end.append({"link": "__fastq", "alias": "_LAST_fastq"})

self.params = {
"clearInput": {
"default": "false",
"description":
"Permanently removes temporary input files. This option "
"is only useful to remove temporary files in large "
"workflows and prevents nextflow's resume functionality. "
"Use with caution."
}
}

self.directives = {
"momps": {
"cpus": 3,
Expand Down
13 changes: 11 additions & 2 deletions flowcraft/generator/inspect.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import os
import sys
import time
import curses
import signal
import locale
Expand Down Expand Up @@ -1378,6 +1379,13 @@ def _send_status_info(self, run_id):
general_details = self._prepare_general_details()
status_data = self._prepare_run_status_data()

# Add current year to start and stop dates
time_start = "{} {}".format(time.strftime("%Y"), self.time_start)
time_stop = "{} {}".format(time.strftime("%Y"), self.time_stop) \
if self.time_stop else "-"
# Get enconding for proper parsing of time
time_locale = locale.getlocale()[0]

status_json = {
"generalOverview": overview_data,
"generalDetails": general_details,
Expand All @@ -1386,8 +1394,9 @@ def _send_status_info(self, run_id):
"processInfo": self._convert_process_dict(),
"processTags": self.process_tags,
"runStatus": status_data,
"timeStart": str(self.time_start),
"timeStop": str(self.time_stop) if self.time_stop else "-",
"timeStart": time_start,
"timeStop": time_stop,
"timeLocale": time_locale,
"processes": list(self.processes)
}

Expand Down
20 changes: 19 additions & 1 deletion flowcraft/generator/templates/bowtie.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ if (params.index{{ param_id }} == null && params.reference{{ param_id }} == null
exit 1, "Provide only an index OR a reference fasta file."
}

clear = params.clearInput{{ param_id }} ? "true" : "false"
checkpointClear_{{ pid }} = Channel.value(clear)

if (params.reference{{ param_id }}){

Expand Down Expand Up @@ -61,7 +63,23 @@ process bowtie_{{ pid }} {

script:
"""
bowtie2 -x $index -1 ${fastq_pair[0]} -2 ${fastq_pair[1]} -p $task.cpus 1> ${sample_id}.bam 2> ${sample_id}_bowtie2.log
{
bowtie2 -x $index -1 ${fastq_pair[0]} -2 ${fastq_pair[1]} -p $task.cpus 1> ${sample_id}.bam 2> ${sample_id}_bowtie2.log

if [ "$clear" = "true" ];
then
work_regex=".*/work/.{2}/.{30}/.*"
file_source1=\$(readlink -f \$(pwd)/${fastq_pair[0]})
file_source2=\$(readlink -f \$(pwd)/${fastq_pair[1]})
if [[ "\$file_source1" =~ \$work_regex ]]; then
rm \$file_source1 \$file_source2
fi
fi

echo pass > .status
} || {
echo fail > .status
}
"""
}

Expand Down
2 changes: 1 addition & 1 deletion flowcraft/generator/templates/downsample_fastq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ IN_genome_size_{{ pid }} = Channel.value(params.genomeSize{{ param_id }})
IN_depth_{{ pid }} = Channel.value(params.depth{{ param_id }})
.map{it -> it.toString().isNumber() ? it : exit(1, "The depth parameter must be a number or a float. Provided value: '${params.depth{{ param_id }}}'")}

clear = params.clearAtCheckpoint ? "true" : "false"
clear = params.clearInput{{ param_id }} ? "true" : "false"
checkpointClear_{{ pid }} = Channel.value(clear)

process downsample_fastq_{{ pid }} {
Expand Down
Loading