Skip to content

Commit

Permalink
use FOFN for csvtk concat
Browse files Browse the repository at this point in the history
  • Loading branch information
rpetit3 committed Apr 3, 2024
1 parent b3998be commit 13cc192
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 52 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ description: A full list of Bactopia releases and a description of the changes.
### `Fixed`

- missing schema for clean-yer-reads
- use `--infile-list` with `csvtk concat` to support 10k+ inputs

### `Enhancements to OSS`

Expand Down
51 changes: 0 additions & 51 deletions lib/nf/functions.nf
Original file line number Diff line number Diff line change
Expand Up @@ -107,57 +107,6 @@ def _get_module_schemas(modules) {
return module_schemas
}

def get_resources(profile, max_memory, max_cpus) {
/* Adjust memory/cpu requests for standard profile only */
def Map resources = [:]
resources.MAX_MEMORY = max_memory
resources.MAX_MEMORY_INT = resources.MAX_MEMORY.toString().split(" ")[0]
resources.MAX_CPUS = max_cpus.toInteger()
resources.MAX_CPUS_75 = Math.round(resources.MAX_CPUS * 0.75)
resources.MAX_CPUS_50 = Math.round(resources.MAX_CPUS * 0.50)
resources.MAX_CPUS_1 = 1
return resources
}

def _get_max_memory(requested) {
/* Get the maximum available memory for the given system */
def available = Math.floor(Double.parseDouble(SysHelper.getAvailMemory().toGiga().toString().split(" ")[0])).toInteger()
if (available < requested.toInteger()) {
log.warn "Maximum memory (${requested}) was adjusted to fit your system (${available})"
return available
}

return requested
}

def _get_max_cpus(requested) {
/* Get the maximum available cpus for the given system */
def available = SysHelper.getAvailCpus()
if (available < requested) {
log.warn "Maximum CPUs (${requested}) was adjusted to fit your system (${available})"
return available
}

return requested
}

def print_efficiency() {
/* Inform user how local bactiopia run will use resources */
if (['standard', 'docker', 'singularity'].contains(workflow.profile)) {
// This is a local run on a single machine
available = SysHelper.getAvailCpus()
tasks = Math.round(available / params.max_cpus)
log.info """
Each task will use a maximum of ${params.max_cpus} CPUs out of the available ${available}
CPUs. At most ${tasks} task(s) will be run at a time, this can affect
the efficiency of Bactopia. You can use the '-qs' parameter to
alter the number of tasks to run at a time (e.g. '-qs 2', means
only 2 tasks or a maximum of ${2 * params.max_cpus} CPUs will be used at once)
""".stripIndent()
log.info ""
}
}

def is_available_workflow(wf) {
if (params.available_workflows['bactopia'].contains(wf)) {
return true
Expand Down
6 changes: 5 additions & 1 deletion modules/nf-core/csvtk/concat/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ process CSVTK_CONCAT {
'quay.io/biocontainers/csvtk:0.27.2--h9ee0642_0' }"

input:
tuple val(meta), path(csv)
tuple val(meta), path(csv, stageAs: 'inputs/*')
val in_format
val out_format

Expand All @@ -32,13 +32,17 @@ process CSVTK_CONCAT {
def out_delimiter = out_format == "tsv" ? "--out-tabs" : (out_format == "csv" ? "" : "--out-delimiter '${out_format}'")
out_extension = out_format == "tsv" ? 'tsv' : 'csv'
"""
# Create a file of files for csvtk
ls inputs/ | awk '{ print "inputs/"\$1 }' > fofn.txt
csvtk \\
concat \\
$options.args \\
--num-cpus $task.cpus \\
${delimiter} \\
${out_delimiter} \\
--out-file ${prefix}.${out_extension} \\
--infile-list fofn.txt
$csv
cat <<-END_VERSIONS > versions.yml
Expand Down

0 comments on commit 13cc192

Please sign in to comment.