From 67201d11304745c2a689dadc4b415ad25da9ef79 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Fri, 7 Apr 2023 05:09:57 +0000 Subject: [PATCH] get more bactopia tools working --- conf/workflows.config | 2 +- lib/nf/bactopia.nf | 17 ++-- modules/nf-core/bracken/main.nf | 138 ++++++++++++++++++++++++++ modules/nf-core/bracken/meta.yml | 60 +++++++++++ modules/nf-core/bracken/params.config | 21 ++++ modules/nf-core/bracken/params.json | 117 ++++++++++++++++++++++ modules/nf-core/busco/main.nf | 50 +++++----- modules/nf-core/busco/params.config | 1 + modules/nf-core/busco/params.json | 2 +- modules/nf-core/kraken2/main.nf | 16 ++- modules/nf-core/rgi/main/main.nf | 7 +- subworkflows/local/agrvate/main.nf | 1 - subworkflows/local/bracken/main.nf | 16 +-- subworkflows/local/busco/main.nf | 7 +- subworkflows/local/kraken2/main.nf | 6 +- workflows/bactopia-tools.nf | 3 +- 16 files changed, 399 insertions(+), 65 deletions(-) create mode 100644 modules/nf-core/bracken/main.nf create mode 100644 modules/nf-core/bracken/meta.yml create mode 100644 modules/nf-core/bracken/params.config create mode 100644 modules/nf-core/bracken/params.json diff --git a/conf/workflows.config b/conf/workflows.config index 006009ba..f4cef6a8 100644 --- a/conf/workflows.config +++ b/conf/workflows.config @@ -172,7 +172,7 @@ params { 'bracken' { description = "Taxonomic classification ans species abundance estimation of sequence reads" ext = "fastq" - path = "modules/local/teton/kraken2_bracken" + path = "modules/nf-core/bracken" } 'busco' { description = "Assembly completeness based on evolutionarily informed expectations" diff --git a/lib/nf/bactopia.nf b/lib/nf/bactopia.nf index 5d526a82..69b87e69 100644 --- a/lib/nf/bactopia.nf +++ b/lib/nf/bactopia.nf @@ -167,25 +167,24 @@ def process_fofn(line, genome_size, species) { def process_accessions(line, genome_size, species) { /* Parse line and determine if single end or paired reads*/ def meta = [:] - accession = line[0] - if (accession.startsWith('GCF') || accession.startsWith('GCA')) { + if (line.accession.startsWith('GCF') || line.accession.startsWith('GCA')) { meta.id = accession.split(/\./)[0] meta.runtype = "assembly_accession" meta.genome_size = genome_size meta.species = species return tuple(meta, [params.empty_r1], [params.empty_r2], file(params.empty_extra)) - } else if (accession.startsWith('DRX') || accession.startsWith('ERX') || accession.startsWith('SRX')) { - meta.id = accession - meta.runtype = line[1] == 'ont' ? "sra_accession_ont" : "sra_accession" + } else if (line.accession.startsWith('DRX') || line.accession.startsWith('ERX') || line.accession.startsWith('SRX')) { + meta.id = line.accession + meta.runtype = line.runtype == 'ont' ? "sra_accession_ont" : "sra_accession" // If genome_size is provided, use it, otherwise use the genome_size from the FOFN - meta.genome_size = genome_size > 0 ? genome_size : line[3] + meta.genome_size = genome_size > 0 ? genome_size : line.genome_size // If species is provided, use it, otherwise use the species from the FOFN - meta.species = species ? species : line[2] + meta.species = species ? species : line.species } else { - log.error("Invalid accession: ${accession} is not an accepted accession type. Accessions must be Assembly (GCF_*, GCA*) or Exeriment (DRX*, ERX*, SRX*) accessions. Please correct to continue.\n\nYou can use 'bactopia search' to convert BioProject, BioSample, or Run accessions into an Experiment accession.") + log.error("Invalid accession: ${line.accession} is not an accepted accession type. Accessions must be Assembly (GCF_*, GCA*) or Exeriment (DRX*, ERX*, SRX*) accessions. Please correct to continue.\n\nYou can use 'bactopia search' to convert BioProject, BioSample, or Run accessions into an Experiment accession.") exit 1 } return tuple(meta, [params.empty_r1], [params.empty_r2], file(params.empty_extra)) @@ -218,7 +217,7 @@ def create_input_channel(runtype, genome_size, species) { .map { row -> process_fofn(row, genome_size, species) } } else if (runtype == "is_accessions") { return Channel.fromPath( params.accessions ) - .splitCsv(strip: true, sep: '\t') + .splitCsv(header:true, strip: true, sep: '\t') .map { row -> process_accessions(row, genome_size, species) } } else if (runtype == "is_accession") { return Channel.fromList([process_accession(params.accession, genome_size, species)]) diff --git a/modules/nf-core/bracken/main.nf b/modules/nf-core/bracken/main.nf new file mode 100644 index 00000000..08dd7545 --- /dev/null +++ b/modules/nf-core/bracken/main.nf @@ -0,0 +1,138 @@ +// Import generic module functions +include { get_resources; initOptions; saveFiles } from '../../../lib/nf/functions' +RESOURCES = get_resources(workflow.profile, params.max_memory, params.max_cpus) +options = initOptions(params.containsKey("options") ? params.options : [:], 'bracken') +options.btype = options.btype ?: "tools" +conda_tools = "bioconda::bactopia-teton=1.0.0" +conda_name = conda_tools.replace("=", "-").replace(":", "-").replace(" ", "-") +conda_env = file("${params.condadir}/${conda_name}").exists() ? "${params.condadir}/${conda_name}" : conda_tools + +process BRACKEN { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? conda_env : null) + container "${ workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bactopia-teton:1.0.0--hdfd78af_0' : + 'quay.io/biocontainers/bactopia-teton:1.0.0--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + path db + + output: + tuple val(meta), path("${prefix}.bracken.tsv") , emit: tsv + tuple val(meta), path('*classified*') , emit: classified + tuple val(meta), path('*unclassified*') , emit: unclassified + tuple val(meta), path("${prefix}.kraken2.report.txt"), emit: kraken2_report + tuple val(meta), path("${prefix}.bracken.report.txt"), emit: bracken_report + tuple val(meta), path("*.abundances.txt") , emit: abundances + tuple val(meta), path("*.krona.html") , emit: krona + path "*.{log,err}" , emit: logs, optional: true + path ".command.*" , emit: nf_logs + path "versions.yml", emit: versions + + script: + prefix = options.suffix ? "${options.suffix}" : "${meta.id}" + def paired = meta.single_end ? "" : "--paired" + classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" + unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" + def is_tarball = db.getName().endsWith(".tar.gz") ? true : false + def BRACKEN_VERSION = "2.7" + def KRAKENTOOLS_VERSION = "1.2" + """ + if [ "$is_tarball" == "true" ]; then + mkdir database + tar -xzf $db -C database + KRAKEN_DB=\$(find database/ -name "hash.k2d" | sed 's=hash.k2d==') + else + KRAKEN_DB=\$(find $db/ -name "hash.k2d" | sed 's=hash.k2d==') + fi + + kraken2 \\ + --db \$KRAKEN_DB \\ + --threads $task.cpus \\ + --unclassified-out $unclassified \\ + --classified-out $classified \\ + --report ${prefix}.kraken2.report.txt \\ + --gzip-compressed \\ + $paired \\ + $options.args \\ + $reads > kracken.out + + # Get read length + if [ "${params.bracken_read_length}" == "0" ]; then + OBS_READ_LENGTH=\$(zcat ${reads[0]} | fastq-scan -q | jq -r '.qc_stats.read_median') + echo \$OBS_READ_LENGTH + # Pre-built Bracken databases come with 50,75,100,150,200,250,300, split the difference + if [ "\$OBS_READ_LENGTH" -gt 275 ]; then + READ_LENGTH="300" + elif [ "\$OBS_READ_LENGTH" -gt 225 ]; then + READ_LENGTH="250" + elif [ "\$OBS_READ_LENGTH" -gt 175 ]; then + READ_LENGTH="200" + elif [ "\$OBS_READ_LENGTH" -gt 125 ]; then + READ_LENGTH="150" + elif [ "\$OBS_READ_LENGTH" -gt 85 ]; then + READ_LENGTH="100" + elif [ "\$OBS_READ_LENGTH" -gt 65 ]; then + READ_LENGTH="75" + else + READ_LENGTH="50" + fi + else + # use user defined read length + READ_LENGTH="${params.bracken_read_length}" + fi + + bracken \\ + $options.args2 \\ + -d \$KRAKEN_DB \\ + -r \$READ_LENGTH \\ + -i ${prefix}.kraken2.report.txt \\ + -w ${prefix}.bracken.report.txt \\ + -o bracken.temp + + # Sort bracken report by 'fraction_total_reads' (column 7) + head -n 1 bracken.temp > ${prefix}.bracken.abundances.txt + grep -v "fraction_total_reads\$" bracken.temp | sort -k 7 -rn >> ${prefix}.bracken.abundances.txt + + # Compress Kraken FASTQs + pigz -p $task.cpus *.fastq + + # Adjust bracken to include unclassified and produce summary + kraken-bracken-summary.py \\ + ${prefix} \\ + ${prefix}.kraken2.report.txt \\ + ${prefix}.bracken.report.txt \\ + ${prefix}.bracken.abundances.txt + + # Create a Krona report from reports + if [ "${params.skip_krona}" == "false" ]; then + # Kraken2 + kreport2krona.py \\ + --report ${prefix}.kraken2.report.txt \\ + --output kraken2-krona.temp + ktImportText -o ${prefix}.kraken2.krona.html kraken2-krona.temp + + # Bracken + kreport2krona.py \\ + --report ${prefix}.bracken.report.txt \\ + --output bracken-krona.temp + ktImportText -o ${prefix}.bracken.krona.html bracken-krona.temp + rm *-krona.temp + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bracken: ${BRACKEN_VERSION} + fastq-scan: \$(echo \$(fastq-scan -v 2>&1) | sed 's/fastq-scan //') + jq: \$(echo \$(jq --version 2>&1) | sed 's/jq-//') + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + krakentools: ${KRAKENTOOLS_VERSION} + krona: \$( echo \$(ktImportText 2>&1) | sed 's/^.*KronaTools //g; s/- ktImportText.*\$//g') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + python: \$(echo \$(python --version 2>&1) | sed 's/Python //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bracken/meta.yml b/modules/nf-core/bracken/meta.yml new file mode 100644 index 00000000..c3b363cf --- /dev/null +++ b/modules/nf-core/bracken/meta.yml @@ -0,0 +1,60 @@ +name: kraken2 +description: Classifies metagenomic sequence data +keywords: + - classify + - metagenomics + - fastq + - db +tools: + - kraken2: + description: | + Kraken2 is a taxonomic sequence classifier that assigns taxonomic labels to sequence reads + homepage: https://ccb.jhu.edu/software/kraken2/ + documentation: https://github.com/DerrickWood/kraken2/wiki/Manual + doi: 10.1186/s13059-019-1891-0 + licence: ['MIT'] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - db: + type: directory + description: Kraken2 database +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - classified: + type: file + description: | + Reads classified to belong to any of the taxa + on the Kraken2 database. + pattern: "*{fastq.gz}" + - unclassified: + type: file + description: | + Reads not classified to belong to any of the taxa + on the Kraken2 database. + pattern: "*{fastq.gz}" + - report: + type: file + description: | + Kraken2 report containing stats about classified + and not classifed reads. + pattern: "*.{report.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bracken/params.config b/modules/nf-core/bracken/params.config new file mode 100644 index 00000000..e5d53eeb --- /dev/null +++ b/modules/nf-core/bracken/params.config @@ -0,0 +1,21 @@ +/* +This file includes default parameter values. +*/ + +params { + // Kraken2 + kraken2_db = null + kraken2_quick_mode = false + kraken2_confidence = 0.0 + kraken2_minimum_base_quality = 0 + kraken2_use_mpa_style = false + kraken2_report_zero_counts = false + kraken2_report_minimizer_data = false + kraken2_use_names = false + kraken2_memory_mapping = false + kraken2_minimum_hit_groups = 2 + bracken_read_length = 0 + bracken_level = "S" + bracken_threshold = 0 + skip_krona = false +} diff --git a/modules/nf-core/bracken/params.json b/modules/nf-core/bracken/params.json new file mode 100644 index 00000000..f767e0c8 --- /dev/null +++ b/modules/nf-core/bracken/params.json @@ -0,0 +1,117 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/kraken2/params.json", + "title": "Kraken2 Module", + "description": "A module for taxonomic classification of sequence reads", + "type": "object", + "definitions": { + "kraken2_bracken_parameters": { + "title": "Kraken2 and Bracken Parameters", + "type": "object", + "description": "", + "default": "", + "fa_icon": "fas fa-exclamation-circle", + "properties": { + "kraken2_db": { + "type": "string", + "description": "The a single tarball or path to a Kraken2 formatted database", + "fa_icon": "fas fa-expand-arrows-alt", + "is_required": true + }, + "kraken2_quick_mode": { + "type": "boolean", + "default": false, + "description": "Quick operation (use first hit or hits)", + "fa_icon": "fas fa-expand-arrows-alt", + "hidden": true + }, + "kraken2_confidence": { + "type": "number", + "default": 0.0, + "description": "Confidence score threshold between 0 and 1", + "fa_icon": "fas fa-expand-arrows-alt" + }, + "kraken2_minimum_base_quality": { + "type": "integer", + "default": 0, + "description": "Minimum base quality used in classification", + "fa_icon": "fas fa-expand-arrows-alt", + "hidden": true + }, + "kraken2_use_mpa_style": { + "type": "boolean", + "default": false, + "description": "Format report output like Kraken 1's kraken-mpa-report", + "fa_icon": "fas fa-expand-arrows-alt" + }, + "kraken2_report_zero_counts": { + "type": "boolean", + "default": false, + "description": "Report counts for ALL taxa, even if counts are zero", + "fa_icon": "fas fa-expand-arrows-alt" + }, + "kraken2_report_minimizer_data": { + "type": "boolean", + "default": false, + "description": "Include minimizer and distinct minimizer count information in report", + "fa_icon": "fas fa-expand-arrows-alt", + "hidden": true + }, + "kraken2_use_names": { + "type": "boolean", + "default": false, + "description": "Print scientific names instead of just taxids", + "fa_icon": "fas fa-expand-arrows-alt", + "hidden": true + }, + "kraken2_memory_mapping": { + "type": "boolean", + "default": false, + "description": "Avoid loading database into RAM", + "fa_icon": "fas fa-expand-arrows-alt", + "hidden": true + }, + "kraken2_minimum_hit_groups": { + "type": "integer", + "default": 2, + "description": "Minimum number of hit groups needed to make a call", + "fa_icon": "fas fa-expand-arrows-alt", + "hidden": true + }, + "bracken_read_length": { + "type": "integer", + "default": 0, + "description": "Read length to get all classifications for (0 = determine at runtime)", + "fa_icon": "fas fa-expand-arrows-alt", + "hidden": true + }, + "bracken_level": { + "type": "string", + "default": "S", + "description": "Level to estimate abundance at", + "fa_icon": "fas fa-expand-arrows-alt", + "hidden": true + }, + "bracken_threshold": { + "type": "integer", + "default": 0, + "description": "Reads required PRIOR to abundance estimation to perform re-estimation", + "fa_icon": "fas fa-expand-arrows-alt", + "hidden": true + }, + "skip_krona": { + "type": "boolean", + "default": false, + "description": "Skip the creation of a Krona report", + "fa_icon": "fas fa-expand-arrows-alt", + "hidden": true + }, + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/kraken2_bracken_parameters" + } + ] +} diff --git a/modules/nf-core/busco/main.nf b/modules/nf-core/busco/main.nf index 8c5becb5..48fc6edc 100644 --- a/modules/nf-core/busco/main.nf +++ b/modules/nf-core/busco/main.nf @@ -8,7 +8,7 @@ conda_name = conda_tools.replace("=", "-").replace(":", "-").replace(" ", "-" conda_env = file("${params.condadir}/${conda_name}").exists() ? "${params.condadir}/${conda_name}" : conda_tools process BUSCO { - tag "$lineage" + tag "$prefix - $lineage" label 'process_medium' conda (params.enable_conda ? conda_env : null) @@ -17,19 +17,27 @@ process BUSCO { 'quay.io/biocontainers/busco:5.4.6--pyhdfd78af_0' }" input: - tuple val(meta), path('tmp_input/*') - each lineage + tuple val(meta), path(fasta) output: - tuple val(meta), path("${lineage}/") , emit: results - tuple val(meta), path("${lineage}/${lineage}-summary.txt"), emit: tsv - path "*.{log,err}" , emit: logs, optional: true - path ".command.*" , emit: nf_logs - path "versions.yml" , emit: versions + tuple val(meta), path("results/*") , emit: results + tuple val(meta), path("results/${prefix}-summary.txt"), emit: tsv + path "*.{log,err}" , emit: logs, optional: true + path ".command.*" , emit: nf_logs + path "versions.yml" , emit: versions script: prefix = options.suffix ? "${options.suffix}" : "${meta.id}" + lineage = params.busco_lineage + def is_compressed = fasta.getName().endsWith(".gz") ? true : false + def fasta_name = fasta.getName().replace(".gz", "") """ + # Have to put FASTA in a directory to force batch mode in busco + mkdir tmp-fasta + if [ "$is_compressed" == "true" ]; then + gzip -c -d $fasta > tmp-fasta/$fasta_name + fi + # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) # Check for container variable initialisation script and source it. if [ -f "/usr/local/env-activate.sh" ]; then @@ -47,32 +55,24 @@ process BUSCO { echo "New AUGUSTUS_CONFIG_PATH=\${AUGUSTUS_CONFIG_PATH}" fi - # Ensure the input is uncompressed - INPUT_SEQS=input_seqs - mkdir "\$INPUT_SEQS" - cd "\$INPUT_SEQS" - for FASTA in ../tmp_input/*; do - if [ "\${FASTA##*.}" == 'gz' ]; then - gzip -cdf "\$FASTA" > \$( basename "\$FASTA" .fna.gz ) - else - cp "\$FASTA" \$( basename "\$FASTA" .fna ) - fi - done - cd .. - busco \\ --cpu $task.cpus \\ - --in "\$INPUT_SEQS" \\ - --out $lineage \\ + --in tmp-fasta/ \\ + --out results \\ --lineage $lineage \\ --mode genome \\ --download_base_url=https://busco-data2.ezlab.org/v5/data \\ $options.args - mv ${lineage}/batch_summary.txt ${lineage}/${lineage}-summary.txt + # cleanup output directory structure + find results/ -name "*.log" | xargs -I {} mv {} ./ + find results/ -type d -path "*logs" | xargs -I {} rm -rf {} + mv results/batch_summary.txt results/${prefix}-summary.txt + mv results/${fasta_name}/* results/ + rm -rf results/${fasta_name} # Busco outputs additional trailing tabs, clean them up - sed -i 's/\t\t\t\$//' ${lineage}/${lineage}-summary.txt + sed -i 's/\t\t\t\$//' results/${prefix}-summary.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/busco/params.config b/modules/nf-core/busco/params.config index b4668021..b29a3950 100644 --- a/modules/nf-core/busco/params.config +++ b/modules/nf-core/busco/params.config @@ -13,4 +13,5 @@ params { augustus_parameters = "" augustus_species = "" augustus_long = false + run_name = params.busco_lineage } diff --git a/modules/nf-core/busco/params.json b/modules/nf-core/busco/params.json index 01f469fb..efd33cf9 100644 --- a/modules/nf-core/busco/params.json +++ b/modules/nf-core/busco/params.json @@ -15,7 +15,7 @@ "busco_lineage": { "type": "string", "default": "bacteria_odb10", - "description": "Specify the name of the BUSCO lineage to be used (can separate by comma)", + "description": "Specify the name of the BUSCO lineage to be used", "fa_icon": "fas fa-expand-arrows-alt" }, "busco_evalue": { diff --git a/modules/nf-core/kraken2/main.nf b/modules/nf-core/kraken2/main.nf index 39683a0e..6c8abe4f 100644 --- a/modules/nf-core/kraken2/main.nf +++ b/modules/nf-core/kraken2/main.nf @@ -1,10 +1,10 @@ -// Import generic module functions +// Import generic module functions include { get_resources; initOptions; saveFiles } from '../../../lib/nf/functions' RESOURCES = get_resources(workflow.profile, params.max_memory, params.max_cpus) options = initOptions(params.containsKey("options") ? params.options : [:], 'kraken2') options.btype = options.btype ?: "tools" -conda_tools = "bioconda::kraken2=2.1.2 conda-forge::pigz=2.6" +conda_tools = "bioconda::bactopia-teton=1.0.0" conda_name = conda_tools.replace("=", "-").replace(":", "-").replace(" ", "-") conda_env = file("${params.condadir}/${conda_name}").exists() ? "${params.condadir}/${conda_name}" : conda_tools @@ -14,8 +14,8 @@ process KRAKEN2 { conda (params.enable_conda ? conda_env : null) container "${ workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' : - 'quay.io/biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' }" + 'https://depot.galaxyproject.org/singularity/bactopia-teton:1.0.0--hdfd78af_0' : + 'quay.io/biocontainers/bactopia-teton:1.0.0--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -32,8 +32,8 @@ process KRAKEN2 { script: prefix = options.suffix ? "${options.suffix}" : "${meta.id}" def paired = meta.single_end ? "" : "--paired" - def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" - def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" + classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" + unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" def is_tarball = db.getName().endsWith(".tar.gz") ? true : false """ if [ "$is_tarball" == "true" ]; then @@ -55,10 +55,6 @@ process KRAKEN2 { $options.args \\ $reads > /dev/null - if [ "${params.skip_bracken}" == "false" ]; then - rm -rf database - fi - pigz -p $task.cpus *.fastq cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/rgi/main/main.nf b/modules/nf-core/rgi/main/main.nf index dc776b09..53c698be 100644 --- a/modules/nf-core/rgi/main/main.nf +++ b/modules/nf-core/rgi/main/main.nf @@ -20,7 +20,7 @@ process RGI_MAIN { tuple val(meta), path(fasta) output: - tuple val(meta), path("*.json"), emit: json + tuple val(meta), path("*.json"), emit: json, optional: true tuple val(meta), path("*.txt") , emit: tsv path "*.{log,err}" , emit: logs, optional: true path ".command.*" , emit: nf_logs @@ -38,6 +38,11 @@ process RGI_MAIN { --output_file $prefix \\ --input_sequence $fasta + # Remove empty json files + if grep "^{}\$" ${prefix}.json; then + rm ${prefix}.json + fi + cat <<-END_VERSIONS > versions.yml "${task.process}": rgi: \$(rgi main --version) diff --git a/subworkflows/local/agrvate/main.nf b/subworkflows/local/agrvate/main.nf index 93425a00..592fa61a 100644 --- a/subworkflows/local/agrvate/main.nf +++ b/subworkflows/local/agrvate/main.nf @@ -4,7 +4,6 @@ include { initOptions } from '../../../lib/nf/functions' options = initOptions(params.containsKey("options") ? params.options : [:], 'agrvate') options.args = params.typing_only ? '--typing_only' : '' -options.subdir = "${params.run_name}" include { AGRVATE as AGRVATE_MODULE } from '../../../modules/nf-core/agrvate/main' addParams( options: options ) include { CSVTK_CONCAT } from '../../../modules/nf-core/csvtk/concat/main' addParams( options: [args: '-C "$"', logs_subdir: 'agrvate-concat', process_name: params.merge_folder] ) diff --git a/subworkflows/local/bracken/main.nf b/subworkflows/local/bracken/main.nf index 509613f8..e148c3e7 100644 --- a/subworkflows/local/bracken/main.nf +++ b/subworkflows/local/bracken/main.nf @@ -31,15 +31,15 @@ workflow BRACKEN { main: ch_versions = Channel.empty() - KRAKEN2_BRACKEN_MODULE(reads, DATABASE) - ch_versions = ch_versions.mix(KRAKEN2_BRACKEN_MODULE.out.versions) + BRACKEN_MODULE(reads, DATABASE) + ch_versions = ch_versions.mix(BRACKEN_MODULE.out.versions) emit: - tsv = KRAKEN2_BRACKEN_MODULE.out.tsv - classified = KRAKEN2_BRACKEN_MODULE.out.classified - unclassified = KRAKEN2_BRACKEN_MODULE.out.unclassified - kraken2_report = KRAKEN2_BRACKEN_MODULE.out.kraken2_report - bracken_report = KRAKEN2_BRACKEN_MODULE.out.bracken_report - abundances = KRAKEN2_BRACKEN_MODULE.out.abundances + tsv = BRACKEN_MODULE.out.tsv + classified = BRACKEN_MODULE.out.classified + unclassified = BRACKEN_MODULE.out.unclassified + kraken2_report = BRACKEN_MODULE.out.kraken2_report + bracken_report = BRACKEN_MODULE.out.bracken_report + abundances = BRACKEN_MODULE.out.abundances versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/busco/main.nf b/subworkflows/local/busco/main.nf index 25468a46..3855b2f7 100644 --- a/subworkflows/local/busco/main.nf +++ b/subworkflows/local/busco/main.nf @@ -1,6 +1,6 @@ // // busco - Assembly completeness based on evolutionarily informed expectations -// +// include { initOptions } from '../../../lib/nf/functions' options = initOptions(params.containsKey("options") ? params.options : [:], 'busco') options.args = [ @@ -15,7 +15,7 @@ options.args = [ ].join(' ').replaceAll("\\s{2,}", " ").trim() include { BUSCO as BUSCO_MODULE } from '../../../modules/nf-core/busco/main' addParams( options: options ) -include { CSVTK_CONCAT } from '../../../modules/nf-core/csvtk/concat/main' addParams( options: [process_name: 'busco'] ) +include { CSVTK_CONCAT } from '../../../modules/nf-core/csvtk/concat/main' addParams( options: [logs_subdir: 'abricate-concat', process_name: params.merge_folder] ) workflow BUSCO { take: @@ -23,9 +23,8 @@ workflow BUSCO { main: ch_versions = Channel.empty() - ch_lineages = Channel.from(params.busco_lineage.split(',')) - BUSCO_MODULE(fasta, ch_lineages) + BUSCO_MODULE(fasta) ch_versions = ch_versions.mix(BUSCO_MODULE.out.versions.first()) // Merge the results diff --git a/subworkflows/local/kraken2/main.nf b/subworkflows/local/kraken2/main.nf index 046df58a..858a2efc 100644 --- a/subworkflows/local/kraken2/main.nf +++ b/subworkflows/local/kraken2/main.nf @@ -1,5 +1,5 @@ // -// kraken2 - Taxonomic classification of sequence reads +// kraken2 - Taxonomic classification of sequence reads // include { initOptions } from '../../../lib/nf/functions' options = initOptions(params.containsKey("options") ? params.options : [:], 'kraken2') @@ -17,7 +17,7 @@ options.args = [ DATABASE = params.kraken2_db ? file(params.kraken2_db) : [] include { KRAKEN2 as KRAKEN2_MODULE } from '../../../modules/nf-core/kraken2/main' addParams( options: options ) - + workflow KRAKEN2 { take: reads // channel: [ val(meta), [ fasta ] ] @@ -31,6 +31,6 @@ workflow KRAKEN2 { emit: classified = KRAKEN2_MODULE.out.classified unclassified = KRAKEN2_MODULE.out.unclassified - report = KRAKEN2_MODULE.out.report + kraken2_report = KRAKEN2_MODULE.out.kraken2_report versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/bactopia-tools.nf b/workflows/bactopia-tools.nf index e14aa500..ec1bb44e 100644 --- a/workflows/bactopia-tools.nf +++ b/workflows/bactopia-tools.nf @@ -159,8 +159,7 @@ workflow BACTOPIATOOLS { BRACKEN(samples) ch_versions = ch_versions.mix(BRACKEN.out.versions) } else if (params.wf == 'busco') { - samples.collect{meta, fna -> fna}.map{ fna -> [[id: 'busco'], fna]}.set{ ch_merge_fna } - BUSCO(ch_merge_fna) + BUSCO(samples) ch_versions = ch_versions.mix(BUSCO.out.versions) } else if (params.wf == 'checkm') { CHECKM(samples)