From 13cc192930b8c236146ede729fc2abb7ff77ab04 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Wed, 3 Apr 2024 03:54:11 +0000 Subject: [PATCH] use FOFN for csvtk concat --- CHANGELOG.md | 1 + lib/nf/functions.nf | 51 ---------------------------- modules/nf-core/csvtk/concat/main.nf | 6 +++- 3 files changed, 6 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 122548c4..011689a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ description: A full list of Bactopia releases and a description of the changes. ### `Fixed` - missing schema for clean-yer-reads +- use `--infile-list` with `csvtk concat` to support 10k+ inputs ### `Enhancements to OSS` diff --git a/lib/nf/functions.nf b/lib/nf/functions.nf index c9c1dfbd..7aa33a7a 100644 --- a/lib/nf/functions.nf +++ b/lib/nf/functions.nf @@ -107,57 +107,6 @@ def _get_module_schemas(modules) { return module_schemas } -def get_resources(profile, max_memory, max_cpus) { - /* Adjust memory/cpu requests for standard profile only */ - def Map resources = [:] - resources.MAX_MEMORY = max_memory - resources.MAX_MEMORY_INT = resources.MAX_MEMORY.toString().split(" ")[0] - resources.MAX_CPUS = max_cpus.toInteger() - resources.MAX_CPUS_75 = Math.round(resources.MAX_CPUS * 0.75) - resources.MAX_CPUS_50 = Math.round(resources.MAX_CPUS * 0.50) - resources.MAX_CPUS_1 = 1 - return resources -} - -def _get_max_memory(requested) { - /* Get the maximum available memory for the given system */ - def available = Math.floor(Double.parseDouble(SysHelper.getAvailMemory().toGiga().toString().split(" ")[0])).toInteger() - if (available < requested.toInteger()) { - log.warn "Maximum memory (${requested}) was adjusted to fit your system (${available})" - return available - } - - return requested -} - -def _get_max_cpus(requested) { - /* Get the maximum available cpus for the given system */ - def available = SysHelper.getAvailCpus() - if (available < requested) { - log.warn "Maximum CPUs (${requested}) was adjusted to fit your system (${available})" - return available - } - - return requested -} - -def print_efficiency() { - /* Inform user how local bactiopia run will use resources */ - if (['standard', 'docker', 'singularity'].contains(workflow.profile)) { - // This is a local run on a single machine - available = SysHelper.getAvailCpus() - tasks = Math.round(available / params.max_cpus) - log.info """ - Each task will use a maximum of ${params.max_cpus} CPUs out of the available ${available} - CPUs. At most ${tasks} task(s) will be run at a time, this can affect - the efficiency of Bactopia. You can use the '-qs' parameter to - alter the number of tasks to run at a time (e.g. '-qs 2', means - only 2 tasks or a maximum of ${2 * params.max_cpus} CPUs will be used at once) - """.stripIndent() - log.info "" - } -} - def is_available_workflow(wf) { if (params.available_workflows['bactopia'].contains(wf)) { return true diff --git a/modules/nf-core/csvtk/concat/main.nf b/modules/nf-core/csvtk/concat/main.nf index 8f5996e1..a84cb14d 100644 --- a/modules/nf-core/csvtk/concat/main.nf +++ b/modules/nf-core/csvtk/concat/main.nf @@ -16,7 +16,7 @@ process CSVTK_CONCAT { 'quay.io/biocontainers/csvtk:0.27.2--h9ee0642_0' }" input: - tuple val(meta), path(csv) + tuple val(meta), path(csv, stageAs: 'inputs/*') val in_format val out_format @@ -32,6 +32,9 @@ process CSVTK_CONCAT { def out_delimiter = out_format == "tsv" ? "--out-tabs" : (out_format == "csv" ? "" : "--out-delimiter '${out_format}'") out_extension = out_format == "tsv" ? 'tsv' : 'csv' """ + # Create a file of files for csvtk + ls inputs/ | awk '{ print "inputs/"\$1 }' > fofn.txt + csvtk \\ concat \\ $options.args \\ @@ -39,6 +42,7 @@ process CSVTK_CONCAT { ${delimiter} \\ ${out_delimiter} \\ --out-file ${prefix}.${out_extension} \\ + --infile-list fofn.txt $csv cat <<-END_VERSIONS > versions.yml