From 8841acfcdb7b7c88b6cb6b127f869cedbe7ed1e3 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Sat, 17 Sep 2022 17:36:11 -0300 Subject: [PATCH] adding rule to intersect bed (#65) * when using bedtools intersect, keep only the highest intersection of annotation * Update merge_annotations.nf * updating versioning information * change modules to use named outputs for better readability * fixed optional emissions * fix awk print statement * Update CHANGELOG.md * Update CHANGELOG.md --- .zenodo.json | 2 +- markdown/CHANGELOG.md | 6 + modules/KOs/kegg-decoder.nf | 5 +- modules/KOs/kofamscan.nf | 4 +- modules/MGEs/digIS.nf | 7 +- modules/MGEs/draw_gis.nf | 5 +- modules/MGEs/iceberg.nf | 9 +- modules/MGEs/islandpath.nf | 3 +- modules/MGEs/plasmidfinder.nf | 4 +- modules/MGEs/platon.nf | 6 +- modules/assembly/flye.nf | 6 +- modules/assembly/unicycler.nf | 6 +- modules/generic/antismash.nf | 7 +- modules/generic/bakta.nf | 20 +- modules/generic/barrnap.nf | 6 +- modules/generic/compute_gc.nf | 3 +- modules/generic/custom_database.nf | 9 +- modules/generic/custom_database_report.nf | 2 +- modules/generic/gff2gbk.nf | 2 +- modules/generic/gff2sql.nf | 4 +- modules/generic/jbrowse.nf | 2 +- modules/generic/mash.nf | 5 +- modules/generic/merge_annotations.nf | 12 +- modules/generic/methylation.nf | 11 +- modules/generic/mlst.nf | 6 +- modules/generic/ncbi_protein.nf | 2 +- modules/generic/prokka.nf | 20 +- modules/generic/reports.nf | 2 +- modules/generic/sequenceserver.nf | 8 +- modules/prophages/phast.nf | 7 +- modules/prophages/phigaro.nf | 9 +- modules/prophages/phispy.nf | 6 +- modules/resistance/amrfinder.nf | 9 +- modules/resistance/argminer.nf | 5 +- modules/resistance/resfinder.nf | 11 +- modules/resistance/rgi_annotation.nf | 14 +- modules/virulence/vfdb.nf | 7 +- modules/virulence/victors.nf | 7 +- nextflow.config | 2 +- workflows/bacannot.nf | 359 +++++++++------------- 40 files changed, 277 insertions(+), 343 deletions(-) diff --git a/.zenodo.json b/.zenodo.json index 0944f2bf..77663d93 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -2,7 +2,7 @@ "description": "

The pipeline

\n\n

bacannot, is a customisable, easy to use, pipeline that uses state-of-the-art software for comprehensively annotating prokaryotic genomes having only Docker and Nextflow as dependencies. It is able to annotate and detect virulence and resistance genes, plasmids, secondary metabolites, genomic islands, prophages, ICEs, KO, and more, while providing nice an beautiful interactive documents for results exploration.

", "license": "other-open", "title": "fmalmeida/bacannot: A generic but comprehensive bacterial annotation pipeline", - "version": "v3.1.4", + "version": "v3.1.5", "upload_type": "software", "creators": [ { diff --git a/markdown/CHANGELOG.md b/markdown/CHANGELOG.md index 39b70fcd..eaeaa678 100644 --- a/markdown/CHANGELOG.md +++ b/markdown/CHANGELOG.md @@ -2,6 +2,12 @@ The tracking for changes started in v2.1 +## v3.1.5 [17-September-2022] + +* Fixes https://github.com/fmalmeida/bacannot/issues/64 reported by @fmalmeida, which highlights that the resfinder annotation were sometimes being duplicated because a single gene had intersection to more then one CDS regions, but, only one being "true". + - To solve such issue, intersections are now sorted by length, and only the first occurence (per gene) is kept. +* As reported in issue #39, named outputs are now used in modules for better readability. + ## v3.1.4 [13-September-2022] * Fixes https://github.com/fmalmeida/bacannot/issues/62 reported by @rujinlong, where Island-Path tool was failling because it was running on genbank files with no true CDS. This was hapenning because Bakta writes in the comments that the GBK has 0 CDS and, at first, the module was selecting GBK by checking if the CDS string was there. It has now been modified to also work with Bakta. diff --git a/modules/KOs/kegg-decoder.nf b/modules/KOs/kegg-decoder.nf index 56016bfd..fd5115f2 100644 --- a/modules/KOs/kegg-decoder.nf +++ b/modules/KOs/kegg-decoder.nf @@ -7,9 +7,8 @@ process KEGG_DECODER { tuple val(prefix), path('input_mapper.txt') output: - // Grab all outputs - path("*") // Get all files to input directory - tuple val(prefix), path("*.svg") // get svg + path("*") , emit: all // Get all files to input directory + tuple val(prefix), path("*.svg"), emit: results // get svg script: """ diff --git a/modules/KOs/kofamscan.nf b/modules/KOs/kofamscan.nf index 89a8f8ea..405f61c1 100644 --- a/modules/KOs/kofamscan.nf +++ b/modules/KOs/kofamscan.nf @@ -12,8 +12,8 @@ process KOFAMSCAN { output: // Grab all outputs - file("KOfamscan") - tuple val(prefix), file("KOfamscan/${prefix}_ko_forKEGGMapper.txt") + path("KOfamscan"), emit: all + tuple val(prefix), path("KOfamscan/${prefix}_ko_forKEGGMapper.txt"), emit: results script: """ diff --git a/modules/MGEs/digIS.nf b/modules/MGEs/digIS.nf index c999757d..ce5f6cb3 100644 --- a/modules/MGEs/digIS.nf +++ b/modules/MGEs/digIS.nf @@ -12,10 +12,9 @@ process DIGIS { tuple val(prefix), path(genome), path(genbank) output: - // Grab results - file("digIS") - tuple val(prefix), path("digIS/results/${prefix}.gff") - tuple val(prefix), path("${prefix}_IS.gff"), path("digIS/results/fastas/${prefix}_IS.fa"), path("digIS/results/fastas/${prefix}_IS.faa") + path("digIS") , emit: all + tuple val(prefix), path("digIS/results/${prefix}.gff"), emit: gff + tuple val(prefix), path("${prefix}_IS.gff"), path("digIS/results/fastas/${prefix}_IS.fa"), path("digIS/results/fastas/${prefix}_IS.faa"), emit: gff_and_sequences script: """ diff --git a/modules/MGEs/draw_gis.nf b/modules/MGEs/draw_gis.nf index aced2556..ff64cfd2 100644 --- a/modules/MGEs/draw_gis.nf +++ b/modules/MGEs/draw_gis.nf @@ -11,9 +11,8 @@ process DRAW_GIS { tuple val(prefix), file(gff), file(gis_bed) output: - // Outputs must be linked to each prefix (tag) - tuple val(prefix), file("plots") optional true - tuple val(prefix), file("teste.png") optional true + tuple val(prefix), file("plots") , emit: all optional true + tuple val(prefix), file("teste.png"), emit: example optional true script: """ diff --git a/modules/MGEs/iceberg.nf b/modules/MGEs/iceberg.nf index 491fd11b..4ce9a16d 100644 --- a/modules/MGEs/iceberg.nf +++ b/modules/MGEs/iceberg.nf @@ -9,11 +9,10 @@ process ICEBERG { file(bacannot_db) output: - // Outputs must be linked to each prefix (tag) - tuple val(prefix), file("${prefix}_iceberg_blastp_onGenes.summary.txt") - tuple val(prefix), file("${prefix}_iceberg_blastp_onGenes.txt") - tuple val(prefix), file("${prefix}_iceberg_blastn_onGenome.summary.txt") - file('*.txt') // Grab all + tuple val(prefix), path("${prefix}_iceberg_blastp_onGenes.summary.txt") , emit: genes_summary + tuple val(prefix), path("${prefix}_iceberg_blastp_onGenes.txt") , emit: results + tuple val(prefix), path("${prefix}_iceberg_blastn_onGenome.summary.txt"), emit: genome_summary + path('*.txt') , emit: all script: """ diff --git a/modules/MGEs/islandpath.nf b/modules/MGEs/islandpath.nf index 90ef5037..d7ded993 100644 --- a/modules/MGEs/islandpath.nf +++ b/modules/MGEs/islandpath.nf @@ -7,8 +7,7 @@ process ISLANDPATH { tuple val(prefix), file("annotation.gbk") output: - // Outputs must be linked to each prefix (tag) - tuple val(prefix), path("${prefix}_predicted_GIs.bed") + tuple val(prefix), path("${prefix}_predicted_GIs.bed"), emit: results script: """ diff --git a/modules/MGEs/plasmidfinder.nf b/modules/MGEs/plasmidfinder.nf index 0ddedbf8..318e93ea 100644 --- a/modules/MGEs/plasmidfinder.nf +++ b/modules/MGEs/plasmidfinder.nf @@ -11,8 +11,8 @@ process PLASMIDFINDER { file(bacannot_db) output: - tuple val(prefix), path("plasmidfinder") - tuple val(prefix), path("plasmidfinder/results_tab.tsv") + tuple val(prefix), path("plasmidfinder") , emit: all + tuple val(prefix), path("plasmidfinder/results_tab.tsv"), emit: results script: """ diff --git a/modules/MGEs/platon.nf b/modules/MGEs/platon.nf index 7cfe7d0c..40ddaa04 100644 --- a/modules/MGEs/platon.nf +++ b/modules/MGEs/platon.nf @@ -12,9 +12,9 @@ process PLATON { file(bacannot_db) output: - path("platon") - tuple val(prefix), path("platon/${prefix}.tsv") - path("platon_version.txt") + path("platon") , emit: all + tuple val(prefix), path("platon/${prefix}.tsv"), emit: results + path("platon_version.txt") , emit: version script: """ diff --git a/modules/assembly/flye.nf b/modules/assembly/flye.nf index b559fa0e..05e73580 100644 --- a/modules/assembly/flye.nf +++ b/modules/assembly/flye.nf @@ -11,10 +11,10 @@ process FLYE { tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), val(assembly), val(resfinder_species) output: - file "flye_${prefix}" // Saves all files + path "flye_${prefix}", emit: all // Saves all files // Keep tuple structure to mixing channels - tuple val("${prefix}"), val("${entrypoint}"), val("${sread1}"), val("${sread2}"), val("${sreads}"), file("${lreads}"), val("${lr_type}"), file("${fast5}"), file("flye_${prefix}.fasta"), val("${resfinder_species}") - file('flye_version.txt') + tuple val("${prefix}"), val("${entrypoint}"), val("${sread1}"), val("${sread2}"), val("${sreads}"), path("${lreads}"), val("${lr_type}"), path("${fast5}"), path("flye_${prefix}.fasta"), val("${resfinder_species}"), emit: results + path('flye_version.txt'), emit: version script: lr = (lr_type == 'nanopore') ? '--nano-raw' : '--pacbio-raw' diff --git a/modules/assembly/unicycler.nf b/modules/assembly/unicycler.nf index 42f27927..75883bf5 100644 --- a/modules/assembly/unicycler.nf +++ b/modules/assembly/unicycler.nf @@ -11,10 +11,10 @@ process UNICYCLER { tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), val(assembly), val(resfinder_species) output: - file "unicycler_${prefix}" // Save everything + path "unicycler_${prefix}", emit: all // Save everything // Keep tuple structure to mixing channels - tuple val("${prefix}"), val("${entrypoint}"), val("${sread1}"), val("${sread2}"), val("${sreads}"), file("${lreads}"), val("${lr_type}"), file("${fast5}"), file("unicycler_${prefix}.fasta"), val("${resfinder_species}") - file('unicycler_version.txt') + tuple val("${prefix}"), val("${entrypoint}"), val("${sread1}"), val("${sread2}"), val("${sreads}"), path("${lreads}"), val("${lr_type}"), path("${fast5}"), path("unicycler_${prefix}.fasta"), val("${resfinder_species}"), emit: results + path('unicycler_version.txt'), emit: version script: unpaired_param = (sreads.getName() != "input.3") ? "-s $sreads" : "" diff --git a/modules/generic/antismash.nf b/modules/generic/antismash.nf index c80db694..04627166 100644 --- a/modules/generic/antismash.nf +++ b/modules/generic/antismash.nf @@ -11,10 +11,9 @@ process ANTISMASH { file(bacannot_db) output: - // Grab results - tuple val(prefix), path("antiSMASH/regions.gff") - path("antiSMASH") - path("*_version.txt") + tuple val(prefix), path("antiSMASH/regions.gff"), emit: gff + path("antiSMASH") , emit: all + path("*_version.txt") , emit: version script: def gbk_suffix = (params.bakta_db) ? "gbff" : "gbk" diff --git a/modules/generic/bakta.nf b/modules/generic/bakta.nf index 9f48a42a..8d02e8b1 100644 --- a/modules/generic/bakta.nf +++ b/modules/generic/bakta.nf @@ -13,17 +13,17 @@ process BAKTA { output: // Grab all outputs - file "annotation" + path "annotation", emit: all // Outputs must be linked to each prefix (tag) - tuple val(prefix), file("annotation/${prefix}.gff3") // annotation in gff format - tuple val(prefix), file("annotation/${prefix}.gbff") // annotation in gbk format - tuple val(prefix), file("annotation/${prefix}.fna") // renamed genome - tuple val(prefix), file("annotation/${prefix}.faa") // gene aa sequences - tuple val(prefix), file("annotation/${prefix}.ffn") // gene nt sequences - tuple val(prefix), file("annotation/${prefix}.fna"), file("${lreads}"), file("${fast5}") // For methylation calling - tuple val(prefix), file("annotation/${prefix}.fna"), val("${resfinder_species}") // For resfinder - tuple val(prefix), file("annotation/${prefix}.txt") // bakta stats - file('bakta_version.txt') // Save bakta version + tuple val(prefix), path("annotation/${prefix}.gff3"), emit: gff // annotation in gff format + tuple val(prefix), path("annotation/${prefix}.gbff"), emit: gbk // annotation in gbk format + tuple val(prefix), path("annotation/${prefix}.fna") , emit: genome // renamed genome + tuple val(prefix), path("annotation/${prefix}.faa") , emit: proteins // gene aa sequences + tuple val(prefix), path("annotation/${prefix}.ffn") , emit: genes // gene nt sequences + tuple val(prefix), path("annotation/${prefix}.fna"), path("${lreads}"), path("${fast5}"), emit: genome_with_fast5 // For methylation calling + tuple val(prefix), path("annotation/${prefix}.fna"), val("${resfinder_species}"), emit: genome_with_species // For resfinder + tuple val(prefix), path("annotation/${prefix}.txt") , emit: summary // bakta stats + path('bakta_version.txt'), emit: version // Save bakta version script: """ diff --git a/modules/generic/barrnap.nf b/modules/generic/barrnap.nf index c95689c6..8d5c1cf4 100644 --- a/modules/generic/barrnap.nf +++ b/modules/generic/barrnap.nf @@ -10,9 +10,9 @@ process BARRNAP { tuple val(prefix), file(genome) output: - tuple val(prefix), path("${prefix}_rRNA.gff") - tuple val(prefix), path("${prefix}_rRNA.fa") - path('barrnap_version.txt') + tuple val(prefix), path("${prefix}_rRNA.gff"), emit: gff + tuple val(prefix), path("${prefix}_rRNA.fa") , emit: fasta + path('barrnap_version.txt') , emit: version script: """ diff --git a/modules/generic/compute_gc.nf b/modules/generic/compute_gc.nf index 45b3372d..031548bf 100644 --- a/modules/generic/compute_gc.nf +++ b/modules/generic/compute_gc.nf @@ -6,8 +6,7 @@ process COMPUTE_GC { tuple val(prefix), file(genome) output: - // Outputs must be linked to each prefix (tag) - tuple val(prefix), path("input_GC_500_bps.sorted.bedGraph"), path("input.sizes") + tuple val(prefix), path("input_GC_500_bps.sorted.bedGraph"), path("input.sizes"), emit: results script: """ diff --git a/modules/generic/custom_database.nf b/modules/generic/custom_database.nf index 19a1c209..4c205a85 100644 --- a/modules/generic/custom_database.nf +++ b/modules/generic/custom_database.nf @@ -8,11 +8,10 @@ process CUSTOM_DATABASE { each file(customDB) output: - // Outputs must be linked to each prefix (tag) - tuple val(prefix), val("${customDB.baseName}"), path("${prefix}_${customDB.baseName}*.summary.txt") - tuple val(prefix), path("${customDB.baseName}_custom_db.gff") - path('*.txt') // Grab all - path(customDB) + tuple val(prefix), val("${customDB.baseName}"), path("${prefix}_${customDB.baseName}*.summary.txt"), emit: summary + tuple val(prefix), path("${customDB.baseName}_custom_db.gff") , emit: gff + path('*.txt') , emit: all + path(customDB) , emit: db script: """ diff --git a/modules/generic/custom_database_report.nf b/modules/generic/custom_database_report.nf index 2946466a..83b71e0f 100644 --- a/modules/generic/custom_database_report.nf +++ b/modules/generic/custom_database_report.nf @@ -10,7 +10,7 @@ process CUSTOM_DATABASE_REPORT { tuple val(prefix), val(customDB), file(custom_blast), file(custom_gff) output: - file '*.html' + path '*.html', emit: results script: """ diff --git a/modules/generic/gff2gbk.nf b/modules/generic/gff2gbk.nf index b90d535f..df73cb82 100644 --- a/modules/generic/gff2gbk.nf +++ b/modules/generic/gff2gbk.nf @@ -7,7 +7,7 @@ process GFF2GBK { tuple val(prefix), file(gff), file(input) output: - file "*.genbank" + path "*.genbank", emit: results """ # Activate env diff --git a/modules/generic/gff2sql.nf b/modules/generic/gff2sql.nf index a0e13e62..536609af 100644 --- a/modules/generic/gff2sql.nf +++ b/modules/generic/gff2sql.nf @@ -10,8 +10,8 @@ process CREATE_SQL { tuple val(prefix), file(gff), file(genes_nt), file(genes_aa), file(genome), file("digIS.gff"), file("digIS.fa"), file("digIS.faa") output: - file "${prefix}.sqlite" - file "run_server.sh" + path "${prefix}.sqlite", emit: results + path "run_server.sh" , emit: script script: """ diff --git a/modules/generic/jbrowse.nf b/modules/generic/jbrowse.nf index dd8e30f0..ea2cc183 100644 --- a/modules/generic/jbrowse.nf +++ b/modules/generic/jbrowse.nf @@ -7,7 +7,7 @@ process JBROWSE { tuple val(prefix), file(merged_gff), file(draft), file("prokka_gff"), file(barrnap), file(gc_bedGraph), file(gc_chrSizes), file(resfinder_gff), file(phigaro), file(genomic_islands), file("methylation"), file("chr.sizes"), file(phispy_tsv), file(digIS_gff), file(antiSMASH), file(custom_annotations) output: - file "*" + path "*", emit: results script: """ diff --git a/modules/generic/mash.nf b/modules/generic/mash.nf index 21f11286..6264fdb6 100644 --- a/modules/generic/mash.nf +++ b/modules/generic/mash.nf @@ -10,9 +10,8 @@ process REFSEQ_MASHER { tuple val(prefix), path(genome) output: - // Grab results - tuple val(prefix), path("refseq_masher_results.txt") - path("*_version.txt") + tuple val(prefix), path("refseq_masher_results.txt"), emit: results + path("*_version.txt") , emit: version script: """ diff --git a/modules/generic/merge_annotations.nf b/modules/generic/merge_annotations.nf index 2210a8cc..1835dfbb 100644 --- a/modules/generic/merge_annotations.nf +++ b/modules/generic/merge_annotations.nf @@ -7,10 +7,10 @@ process MERGE_ANNOTATIONS { tuple val(prefix), file('prokka_gff'), file(kofamscan), file(vfdb), file(victors), file(amrfinder), file(resfinder), file(rgi), file(iceberg), file(phast), file('digis_gff'), file(custom_databases) output: - tuple val(prefix), path("${prefix}.gff") - tuple val(prefix), path("transposable_elements_digis.gff") - tuple val(prefix), path("custom_database_*.gff") optional true - path("*.gff") + tuple val(prefix), path("${prefix}.gff") , emit: gff + tuple val(prefix), path("transposable_elements_digis.gff"), emit: digis_gff + tuple val(prefix), path("custom_database_*.gff") , emit: customdb_gff optional true + path("*.gff") , emit: all script: """ @@ -81,7 +81,7 @@ process MERGE_ANNOTATIONS { #### Resfinder if [ ! \$(cat $resfinder | wc -l) -eq 0 ] then - bedtools intersect -a $resfinder -b ${prefix}.gff -wo > resfinder_intersected.txt ; + bedtools intersect -a $resfinder -b ${prefix}.gff -wo | sort -k19,19 -r | awk -F '\\t' '!seen[\$9]++' > resfinder_intersected.txt ; addBedtoolsIntersect.R -g ${prefix}.gff -t resfinder_intersected.txt --type Resistance --source Resfinder -o ${prefix}.gff ; grep "Resfinder" ${prefix}.gff > resistance_resfinder.gff ; rm -f resfinder_intersected.txt ; @@ -93,7 +93,7 @@ process MERGE_ANNOTATIONS { if [ ! \$(cat \$file | wc -l) -eq 0 ] then db=\${file%%_custom_db.gff} ; - bedtools intersect -a \${file} -b ${prefix}.gff -wo > bedtools_intersected.txt ; + bedtools intersect -a \${file} -b ${prefix}.gff -wo | sort -k19,19 -r | awk -F '\\t' '!seen[\$9]++' > bedtools_intersected.txt ; addBedtoolsIntersect.R -g ${prefix}.gff -t bedtools_intersected.txt --type "CDS" --source "\${db}" -o ${prefix}.gff ; grep "\${db}" ${prefix}.gff > custom_database_\${db}.gff ; rm -f bedtools_intersected.txt ; diff --git a/modules/generic/methylation.nf b/modules/generic/methylation.nf index b3f39115..26c614fa 100644 --- a/modules/generic/methylation.nf +++ b/modules/generic/methylation.nf @@ -10,12 +10,11 @@ process CALL_METHYLATION { tuple val(prefix), file(draft), file(reads), file(fast5) output: - // Grab all outputs - file "*_calls.tsv" optional true - file "*_frequency.tsv" optional true - tuple val(prefix), file("methylation_frequency.bedGraph") optional true - tuple val(prefix), file("chr.sizes") optional true - file('nanopolish_version.txt') + path "*_calls.tsv" , emit: results optional true + path "*_frequency.tsv" , emit: frequencies optional true + tuple val(prefix), path("methylation_frequency.bedGraph"), emit: bedgraph optional true + tuple val(prefix), path("chr.sizes") , emit: chr_sizes optional true + path('nanopolish_version.txt') , emit: version when: // When an entry does not exist, it is created as 'input' diff --git a/modules/generic/mlst.nf b/modules/generic/mlst.nf index 2cbd8ff4..ec4b16b4 100644 --- a/modules/generic/mlst.nf +++ b/modules/generic/mlst.nf @@ -11,9 +11,9 @@ process MLST { file(bacannot_db) output: - tuple val(prefix), path("${prefix}_mlst_analysis.txt") optional true - tuple val(prefix), path("${prefix}_novel_alleles.fasta") optional true - path('mlst_version.txt') + tuple val(prefix), path("${prefix}_mlst_analysis.txt") , emit: results optional true + tuple val(prefix), path("${prefix}_novel_alleles.fasta"), emit: alleles optional true + path('mlst_version.txt') , emit: version script: """ diff --git a/modules/generic/ncbi_protein.nf b/modules/generic/ncbi_protein.nf index 260a1be5..96cd132b 100644 --- a/modules/generic/ncbi_protein.nf +++ b/modules/generic/ncbi_protein.nf @@ -5,7 +5,7 @@ process GET_NCBI_PROTEIN { file(ncbi_accs) output: - path("ncbi_protein.faa") + path("ncbi_protein.faa"), emit: proteins script: """ diff --git a/modules/generic/prokka.nf b/modules/generic/prokka.nf index faa9417d..219c5974 100644 --- a/modules/generic/prokka.nf +++ b/modules/generic/prokka.nf @@ -13,17 +13,17 @@ process PROKKA { output: // Grab all outputs - path("annotation") + path("annotation"), emit: all // Outputs must be linked to each prefix (tag) - tuple val(prefix), path("annotation/${prefix}.gff") - tuple val(prefix), path("annotation/${prefix}.gbk") - tuple val(prefix), path("annotation/${prefix}.fna") - tuple val(prefix), path("annotation/${prefix}.faa") - tuple val(prefix), path("annotation/${prefix}.ffn") - tuple val(prefix), path("annotation/${prefix}.fna"), path("${lreads}"), path("${fast5}") - tuple val(prefix), path("annotation/${prefix}.fna"), val("${resfinder_species}") - tuple val(prefix), path("annotation/${prefix}.txt") - path('prokka_version.txt') + tuple val(prefix), path("annotation/${prefix}.gff"), emit: gff + tuple val(prefix), path("annotation/${prefix}.gbk"), emit: gbk + tuple val(prefix), path("annotation/${prefix}.fna"), emit: genome + tuple val(prefix), path("annotation/${prefix}.faa"), emit: proteins + tuple val(prefix), path("annotation/${prefix}.ffn"), emit: genes + tuple val(prefix), path("annotation/${prefix}.fna"), path("${lreads}"), path("${fast5}"), emit: genome_with_fast5 + tuple val(prefix), path("annotation/${prefix}.fna"), val("${resfinder_species}"), emit: genome_with_species + tuple val(prefix), path("annotation/${prefix}.txt"), emit: summary + path('prokka_version.txt'), emit: version script: kingdom = (params.prokka_kingdom) ? "--kingdom ${params.prokka_kingdom}" : '' diff --git a/modules/generic/reports.nf b/modules/generic/reports.nf index ae9e4451..29e2d418 100644 --- a/modules/generic/reports.nf +++ b/modules/generic/reports.nf @@ -7,7 +7,7 @@ process REPORT { tuple val(prefix), file('annotation_stats.tsv'), file(gff), file(barrnap), file(mlst), file(keggsvg), file(refseq_masher_txt), file(amrfinder), file(rgi), file(rgi_parsed), file(rgi_heatmap), file(argminer_out), file(resfinder_tab), file(resfinder_point), file(resfinder_phenotable), file(vfdb_blastn), file(victors_blastp), file(phigaro_txt), file(phispy_tsv), file(iceberg_blastp), file(iceberg_blastn), file(plasmids_tsv), file(platon_tsv), file(gi_image), file(phast_blastp), file(digIS) output: - file '*.html' + path '*.html', emit: results script: def generic_annotator = (params.bakta_db) ? "bakta" : "prokka" diff --git a/modules/generic/sequenceserver.nf b/modules/generic/sequenceserver.nf index 06e12e58..174f816a 100644 --- a/modules/generic/sequenceserver.nf +++ b/modules/generic/sequenceserver.nf @@ -8,10 +8,10 @@ process SEQUENCESERVER { tuple val(prefix), file(genome), file(genes), file(proteins) output: - file("*") - file("${genome}") - file("${genes}") - file("${proteins}") + path("*") , emit: all + path("${genome}") , emit: genome + path("${genes}") , emit: genes + path("${proteins}"), emit: proteins script: """ diff --git a/modules/prophages/phast.nf b/modules/prophages/phast.nf index e91fa39b..86bdc101 100644 --- a/modules/prophages/phast.nf +++ b/modules/prophages/phast.nf @@ -8,10 +8,9 @@ process PHAST { file(bacannot_db) output: - // Outputs must be linked to each prefix (tag) - tuple val(prefix), path("${prefix}_phast_blastp_onGenes.summary.txt") - tuple val(prefix), path("${prefix}_phast_blastp_onGenes.txt") - path('*.txt') + tuple val(prefix), path("${prefix}_phast_blastp_onGenes.summary.txt"), emit: summary + tuple val(prefix), path("${prefix}_phast_blastp_onGenes.txt") , emit: results + path('*.txt') , emit: all script: """ diff --git a/modules/prophages/phigaro.nf b/modules/prophages/phigaro.nf index 71a58d94..8585e9ad 100644 --- a/modules/prophages/phigaro.nf +++ b/modules/prophages/phigaro.nf @@ -11,11 +11,10 @@ process PHIGARO { file(bacannot_db) output: - // Outputs must be linked to each prefix (tag) - tuple val(prefix), path("${prefix}_phigaro.tsv") - tuple val(prefix), path("${prefix}_phigaro.bed") - tuple val(prefix), path("${prefix}_phigaro.html") optional true - path('phigaro_version.txt') + tuple val(prefix), path("${prefix}_phigaro.tsv") , emit: tsv + tuple val(prefix), path("${prefix}_phigaro.bed") , emit: bed + tuple val(prefix), path("${prefix}_phigaro.html"), emit: html optional true + path('phigaro_version.txt') , emit: version script: """ diff --git a/modules/prophages/phispy.nf b/modules/prophages/phispy.nf index 180c1045..8cc2dc81 100644 --- a/modules/prophages/phispy.nf +++ b/modules/prophages/phispy.nf @@ -11,9 +11,9 @@ process PHISPY { tuple val(prefix), file(input) output: - tuple val(prefix), path("PhiSpy") - tuple val(prefix), path("PhiSpy/prophage.tsv") - tuple val(prefix), path("phispy_version.txt") + tuple val(prefix), path("PhiSpy") , emit: all + tuple val(prefix), path("PhiSpy/prophage.tsv"), emit: results + tuple val(prefix), path("phispy_version.txt") , emit: version script: """ diff --git a/modules/resistance/amrfinder.nf b/modules/resistance/amrfinder.nf index 9d349ef2..d0525be1 100644 --- a/modules/resistance/amrfinder.nf +++ b/modules/resistance/amrfinder.nf @@ -11,11 +11,10 @@ process AMRFINDER { file(bacannot_db) output: - // Outputs must be linked to each prefix (tag) - tuple val(prefix), file("AMRFinder_resistance-only.tsv") - tuple val(prefix), file("AMRFinder_complete.tsv") - file("${prefix}_args.faa") - file("amrfinder_version.txt") + tuple val(prefix), path("AMRFinder_resistance-only.tsv"), emit: resistance_results + tuple val(prefix), path("AMRFinder_complete.tsv") , emit: complete_results + path("${prefix}_args.faa") , emit: proteins + path("amrfinder_version.txt") , emit: version script: resistance_minid = params.blast_resistance_minid / 100.00 diff --git a/modules/resistance/argminer.nf b/modules/resistance/argminer.nf index 884d8857..c0aba462 100644 --- a/modules/resistance/argminer.nf +++ b/modules/resistance/argminer.nf @@ -8,9 +8,8 @@ process ARGMINER { file(bacannot_db) output: - // Outputs must be linked to each prefix (tag) - tuple val(prefix), file("${prefix}_argminer_blastp_onGenes.summary.txt") - file('*.txt') // Grab summaries + tuple val(prefix), path("${prefix}_argminer_blastp_onGenes.summary.txt"), emit: summary + path('*.txt') , emit: all script: """ diff --git a/modules/resistance/resfinder.nf b/modules/resistance/resfinder.nf index 091c35fc..e244cc39 100644 --- a/modules/resistance/resfinder.nf +++ b/modules/resistance/resfinder.nf @@ -8,12 +8,11 @@ process RESFINDER { file(bacannot_db) output: - // Outputs must be linked to each prefix (tag) - tuple val(prefix), file("resfinder/ResFinder_results_tab.txt") - tuple val(prefix), file("resfinder/PointFinder_results.txt") - tuple val(prefix), file("resfinder/args_pheno_table.txt") - tuple val(prefix), file("resfinder/results_tab.gff") - file("resfinder/*") // Grab everything + tuple val(prefix), path("resfinder/ResFinder_results_tab.txt"), emit: results + tuple val(prefix), path("resfinder/PointFinder_results.txt") , emit: pointfinder_results + tuple val(prefix), path("resfinder/args_pheno_table.txt") , emit: pheno_table + tuple val(prefix), path("resfinder/results_tab.gff") , emit: gff + path("resfinder/*") , emit: all when: (resfinder_species && resfinder_species != "missing_resfinder") diff --git a/modules/resistance/rgi_annotation.nf b/modules/resistance/rgi_annotation.nf index d064903c..5588f53a 100644 --- a/modules/resistance/rgi_annotation.nf +++ b/modules/resistance/rgi_annotation.nf @@ -12,14 +12,12 @@ process CARD_RGI { path(bacannot_db) output: - // Grab all outputs - path "*RGI_${prefix}*" optional true - // Outputs must be linked to each prefix (tag) - tuple val(prefix), path("Parsed_RGI_${prefix}_hits.txt") optional true - tuple val(prefix), path("RGI_${prefix}.txt") optional true - tuple val(prefix), path("heatmap/RGI*heatmap*.png") optional true - path("heatmap") optional true - path("*_version.txt") + path "*RGI_${prefix}*" , emit: all optional true + tuple val(prefix), path("Parsed_RGI_${prefix}_hits.txt"), emit: parsed_hits optional true + tuple val(prefix), path("RGI_${prefix}.txt") , emit: raw_hits optional true + tuple val(prefix), path("heatmap/RGI*heatmap*.png") , emit: heatmap_png optional true + path("heatmap") , emit: heatmap optional true + path("*_version.txt") , emit: version script: """ diff --git a/modules/virulence/vfdb.nf b/modules/virulence/vfdb.nf index 841cd676..cca4dd68 100644 --- a/modules/virulence/vfdb.nf +++ b/modules/virulence/vfdb.nf @@ -8,10 +8,9 @@ process VFDB { file(bacannot_db) output: - // Outputs must be linked to each prefix (tag) - tuple val(prefix), path("${prefix}_vfdb_blastn_onGenes.summary.txt") - tuple val(prefix), path("${prefix}_vfdb_blastn_onGenes.txt") - path('*.txt') + tuple val(prefix), path("${prefix}_vfdb_blastn_onGenes.summary.txt"), emit: summary + tuple val(prefix), path("${prefix}_vfdb_blastn_onGenes.txt") , emit: results + path('*.txt') , emit: all script: """ diff --git a/modules/virulence/victors.nf b/modules/virulence/victors.nf index e51bf4c8..917d3553 100644 --- a/modules/virulence/victors.nf +++ b/modules/virulence/victors.nf @@ -8,10 +8,9 @@ process VICTORS { file(bacannot_db) output: - // Outputs must be linked to each prefix (tag) - tuple val(prefix), path("${prefix}_victors_blastp_onGenes.summary.txt") - tuple val(prefix), path("${prefix}_victors_blastp_onGenes.txt") - path('*.txt') + tuple val(prefix), path("${prefix}_victors_blastp_onGenes.summary.txt"), emit: summary + tuple val(prefix), path("${prefix}_victors_blastp_onGenes.txt") , emit: results + path('*.txt') , emit: all script: """ diff --git a/nextflow.config b/nextflow.config index 15af031d..c129c8c3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -104,7 +104,7 @@ manifest { homePage = "https://github.com/fmalmeida/bacannot" mainScript = "main.nf" nextflowVersion = ">=20.10.0" - version = '3.1.3' + version = '3.1.5' } // Function to ensure that resource requirements don't go beyond diff --git a/workflows/bacannot.nf b/workflows/bacannot.nf index 51fec3d4..afa46268 100644 --- a/workflows/bacannot.nf +++ b/workflows/bacannot.nf @@ -2,107 +2,42 @@ * Include modules (Execution setup) */ -// Unicycler assembly -include { UNICYCLER } from '../modules/assembly/unicycler.nf' - -// Flye assembly -include { FLYE } from '../modules/assembly/flye.nf' - -// Species identification -include { REFSEQ_MASHER } from '../modules/generic/mash.nf' - -// Prokka annotation -include { PROKKA } from '../modules/generic/prokka.nf' - -// Bakta annotation -include { BAKTA } from '../modules/generic/bakta.nf' - -// MLST annotation -include { MLST } from '../modules/generic/mlst.nf' - -// rRNA annotation -include { BARRNAP } from '../modules/generic/barrnap.nf' - -// Calculate GC content -include { COMPUTE_GC } from '../modules/generic/compute_gc.nf' - -// KOFAM annotation -include { KOFAMSCAN } from '../modules/KOs/kofamscan.nf' - -// KEGG decoder -include { KEGG_DECODER } from '../modules/KOs/kegg-decoder.nf' - -// Plasmid annotation with plasmidfinder -include { PLASMIDFINDER } from '../modules/MGEs/plasmidfinder.nf' - -// Plasmid annotation with platon -include { PLATON } from '../modules/MGEs/platon.nf' - -// Virulence annotation with VFDB -include { VFDB } from '../modules/virulence/vfdb.nf' - -// Virulence annotation with Victors -include { VICTORS } from '../modules/virulence/victors.nf' - -// Prophage annotation with PHAST -include { PHAST } from '../modules/prophages/phast.nf' - -// Prophage annotation with PHIGARO -include { PHIGARO } from '../modules/prophages/phigaro.nf' - -// Prophage annotation with phispy -include { PHISPY } from '../modules/prophages/phispy.nf' - -// ICE annotation with ICEberg db -include { ICEBERG } from '../modules/MGEs/iceberg.nf' - -// Genomic Islands detection with Islandpath-DIMOB -include { ISLANDPATH } from '../modules/MGEs/islandpath.nf' -include { DRAW_GIS } from '../modules/MGEs/draw_gis.nf' - -// IS identification -include { DIGIS } from '../modules/MGEs/digIS.nf' - -// AMR annotation with ARGMiner -include { ARGMINER } from '../modules/resistance/argminer.nf' - -// AMR annotation with Resfinder -include { RESFINDER } from '../modules/resistance/resfinder.nf' - -// AMR annotation with AMRFinderPlus -include { AMRFINDER } from '../modules/resistance/amrfinder.nf' - -// AMR annotation with CARD-RGI -include { CARD_RGI } from '../modules/resistance/rgi_annotation.nf' - -// Methylation calling (Nanopolish) -include { CALL_METHYLATION } from '../modules/generic/methylation.nf' - -// User's custom db annotation +include { UNICYCLER } from '../modules/assembly/unicycler.nf' +include { FLYE } from '../modules/assembly/flye.nf' +include { REFSEQ_MASHER } from '../modules/generic/mash.nf' +include { PROKKA } from '../modules/generic/prokka.nf' +include { BAKTA } from '../modules/generic/bakta.nf' +include { MLST } from '../modules/generic/mlst.nf' +include { BARRNAP } from '../modules/generic/barrnap.nf' +include { COMPUTE_GC } from '../modules/generic/compute_gc.nf' +include { KOFAMSCAN } from '../modules/KOs/kofamscan.nf' +include { KEGG_DECODER } from '../modules/KOs/kegg-decoder.nf' +include { PLASMIDFINDER } from '../modules/MGEs/plasmidfinder.nf' +include { PLATON } from '../modules/MGEs/platon.nf' +include { VFDB } from '../modules/virulence/vfdb.nf' +include { VICTORS } from '../modules/virulence/victors.nf' +include { PHAST } from '../modules/prophages/phast.nf' +include { PHIGARO } from '../modules/prophages/phigaro.nf' +include { PHISPY } from '../modules/prophages/phispy.nf' +include { ICEBERG } from '../modules/MGEs/iceberg.nf' +include { ISLANDPATH } from '../modules/MGEs/islandpath.nf' +include { DRAW_GIS } from '../modules/MGEs/draw_gis.nf' +include { DIGIS } from '../modules/MGEs/digIS.nf' +include { ARGMINER } from '../modules/resistance/argminer.nf' +include { RESFINDER } from '../modules/resistance/resfinder.nf' +include { AMRFINDER } from '../modules/resistance/amrfinder.nf' +include { CARD_RGI } from '../modules/resistance/rgi_annotation.nf' +include { CALL_METHYLATION } from '../modules/generic/methylation.nf' include { CUSTOM_DATABASE } from '../modules/generic/custom_database.nf' include { CUSTOM_DATABASE_REPORT } from '../modules/generic/custom_database_report.nf' include { GET_NCBI_PROTEIN } from '../modules/generic/ncbi_protein.nf' - -// Merging annotation in GFF -include { MERGE_ANNOTATIONS } from '../modules/generic/merge_annotations.nf' - -// Convert GFF to GBK -include { GFF2GBK } from '../modules/generic/gff2gbk.nf' - -// Convert GFF to SQL -include { CREATE_SQL } from '../modules/generic/gff2sql.nf' - -// JBrowse -include { JBROWSE } from '../modules/generic/jbrowse.nf' - -// Output reports -include { REPORT } from '../modules/generic/reports.nf' - -// sequenceserver generation -include { SEQUENCESERVER } from '../modules/generic/sequenceserver.nf' - -// antiSMASH -include { ANTISMASH } from '../modules/generic/antismash.nf' +include { MERGE_ANNOTATIONS } from '../modules/generic/merge_annotations.nf' +include { GFF2GBK } from '../modules/generic/gff2gbk.nf' +include { CREATE_SQL } from '../modules/generic/gff2sql.nf' +include { JBROWSE } from '../modules/generic/jbrowse.nf' +include { REPORT } from '../modules/generic/reports.nf' +include { SEQUENCESERVER } from '../modules/generic/sequenceserver.nf' +include { ANTISMASH } from '../modules/generic/antismash.nf' /* DEF WORKFLOW @@ -134,30 +69,33 @@ workflow BACANNOT { // First step -- Prokka or Bakta annotation if (params.bakta_db) { BAKTA( - parsed_inputs.annotation_ch.mix(FLYE.out[1], UNICYCLER.out[1]), + parsed_inputs.annotation_ch.mix( FLYE.out.results, UNICYCLER.out.results ), file(params.bakta_db, checkIfExists: true) ) annotation_out_ch = BAKTA.out } else { - PROKKA(parsed_inputs.annotation_ch.mix(FLYE.out[1], UNICYCLER.out[1]), dbs_ch) + PROKKA( + parsed_inputs.annotation_ch.mix( FLYE.out.results, UNICYCLER.out.results ), + dbs_ch + ) annotation_out_ch = PROKKA.out } // Second step -- MLST analysis - MLST( annotation_out_ch[3], dbs_ch ) + MLST( annotation_out_ch.genome, dbs_ch ) // Third step -- rRNA annotation - BARRNAP( annotation_out_ch[3] ) + BARRNAP( annotation_out_ch.genome ) // Fouth step -- calculate GC content for JBrowse - COMPUTE_GC( annotation_out_ch[3] ) + COMPUTE_GC( annotation_out_ch.genome ) // Fifth step -- run kofamscan if (params.skip_kofamscan == false) { - KOFAMSCAN( annotation_out_ch[4], dbs_ch ) - KEGG_DECODER( KOFAMSCAN.out[1] ) - kofamscan_output_ch = KOFAMSCAN.out[1] - kegg_decoder_svg_ch = KEGG_DECODER.out[1] + KOFAMSCAN( annotation_out_ch.proteins, dbs_ch ) + KEGG_DECODER( KOFAMSCAN.out.results ) + kofamscan_output_ch = KOFAMSCAN.out.results + kegg_decoder_svg_ch = KEGG_DECODER.out.results } else { kofamscan_output_ch = Channel.empty() kegg_decoder_svg_ch = Channel.empty() @@ -170,27 +108,27 @@ workflow BACANNOT { // plasmids if (params.skip_plasmid_search == false) { // plasmidfinder - PLASMIDFINDER( annotation_out_ch[3], dbs_ch ) - plasmidfinder_output_ch = PLASMIDFINDER.out[1] + PLASMIDFINDER( annotation_out_ch.genome, dbs_ch ) + plasmidfinder_output_ch = PLASMIDFINDER.out.results // platon - PLATON( annotation_out_ch[3], dbs_ch ) - platon_output_ch = PLATON.out[1] + PLATON( annotation_out_ch.genome, dbs_ch ) + platon_output_ch = PLATON.out.results } else { plasmidfinder_output_ch = Channel.empty() - platon_output_ch = Channel.empty() + platon_output_ch = Channel.empty() } // IslandPath software - ISLANDPATH(annotation_out_ch[2]) + ISLANDPATH( annotation_out_ch.gbk ) // Virulence search if (params.skip_virulence_search == false) { // VFDB - VFDB( annotation_out_ch[5], dbs_ch ) - vfdb_output_ch = VFDB.out[1] + VFDB( annotation_out_ch.genes, dbs_ch ) + vfdb_output_ch = VFDB.out.results // Victors db - VICTORS( annotation_out_ch[4], dbs_ch ) - victors_output_ch = VICTORS.out[1] + VICTORS( annotation_out_ch.proteins, dbs_ch ) + victors_output_ch = VICTORS.out.results } else { vfdb_output_ch = Channel.empty() victors_output_ch = Channel.empty() @@ -199,15 +137,15 @@ workflow BACANNOT { // Prophage search if (params.skip_prophage_search == false) { // PHAST db - PHAST( annotation_out_ch[4], dbs_ch ) - phast_output_ch = PHAST.out[1] + PHAST( annotation_out_ch.proteins, dbs_ch ) + phast_output_ch = PHAST.out.results // Phigaro software - PHIGARO( annotation_out_ch[3], dbs_ch ) - phigaro_output_tsv_ch = PHIGARO.out[0] - phigaro_output_bed_ch = PHIGARO.out[1] + PHIGARO( annotation_out_ch.genome, dbs_ch ) + phigaro_output_tsv_ch = PHIGARO.out.tsv + phigaro_output_bed_ch = PHIGARO.out.bed // PhiSpy - PHISPY( annotation_out_ch[2] ) - phispy_output_ch = PHISPY.out[1] + PHISPY( annotation_out_ch.gbk ) + phispy_output_ch = PHISPY.out.results } else { phast_output_ch = Channel.empty() phigaro_output_tsv_ch = Channel.empty() @@ -218,9 +156,9 @@ workflow BACANNOT { // ICEs search if (params.skip_iceberg_search == false) { // ICEberg db - ICEBERG( annotation_out_ch[4], annotation_out_ch[3], dbs_ch ) - iceberg_output_blastp_ch = ICEBERG.out[1] - iceberg_output_blastn_ch = ICEBERG.out[2] + ICEBERG( annotation_out_ch.proteins, annotation_out_ch.genome, dbs_ch ) + iceberg_output_blastp_ch = ICEBERG.out.results + iceberg_output_blastn_ch = ICEBERG.out.genome_summary } else { iceberg_output_blastp_ch = Channel.empty() iceberg_output_blastn_ch = Channel.empty() @@ -229,22 +167,22 @@ workflow BACANNOT { // AMR search if (params.skip_resistance_search == false) { // AMRFinderPlus - AMRFINDER( annotation_out_ch[4], dbs_ch ) - amrfinder_output_ch = AMRFINDER.out[0] + AMRFINDER( annotation_out_ch.proteins, dbs_ch ) + amrfinder_output_ch = AMRFINDER.out.resistance_results // CARD-RGI - CARD_RGI( annotation_out_ch[4], dbs_ch ) - rgi_output_ch = CARD_RGI.out[2] - rgi_output_parsed_ch = CARD_RGI.out[1] - rgi_heatmap_ch = CARD_RGI.out[3] + CARD_RGI( annotation_out_ch.proteins, dbs_ch ) + rgi_output_ch = CARD_RGI.out.raw_hits + rgi_output_parsed_ch = CARD_RGI.out.parsed_hits + rgi_heatmap_ch = CARD_RGI.out.heatmap_png // ARGMiner - ARGMINER( annotation_out_ch[4], dbs_ch ) - argminer_output_ch = ARGMINER.out[0] + ARGMINER( annotation_out_ch.proteins, dbs_ch ) + argminer_output_ch = ARGMINER.out.summary // Resfinder - RESFINDER( annotation_out_ch[7], dbs_ch ) - resfinder_output_tab_ch = RESFINDER.out[0] - resfinder_output_pointfinder_ch = RESFINDER.out[1] - resfinder_phenotable_ch = RESFINDER.out[2] - resfinder_gff_ch = RESFINDER.out[3] + RESFINDER( annotation_out_ch.genome_with_species, dbs_ch ) + resfinder_output_tab_ch = RESFINDER.out.results + resfinder_output_pointfinder_ch = RESFINDER.out.pointfinder_results + resfinder_phenotable_ch = RESFINDER.out.pheno_table + resfinder_gff_ch = RESFINDER.out.gff } else { rgi_output_ch = Channel.empty() rgi_output_parsed_ch = Channel.empty() @@ -260,9 +198,7 @@ workflow BACANNOT { /* Seventh step -- Methylation call */ - CALL_METHYLATION( annotation_out_ch[6] ) - methylation_out_1_ch = CALL_METHYLATION.out[2] - methylation_out_2_ch = CALL_METHYLATION.out[3] + CALL_METHYLATION( annotation_out_ch.genome_with_fast5 ) /* @@ -271,23 +207,24 @@ workflow BACANNOT { */ // species identification - REFSEQ_MASHER( annotation_out_ch[3] ) + REFSEQ_MASHER( annotation_out_ch.genome ) // IS identification - DIGIS( annotation_out_ch[3].join(annotation_out_ch[2]) ) + DIGIS( annotation_out_ch.genome.join(annotation_out_ch.gbk) ) // antiSMASH if (params.skip_antismash == false) { - ANTISMASH( annotation_out_ch[2], dbs_ch ) - antismash_output_ch = ANTISMASH.out[0] + ANTISMASH( annotation_out_ch.gbk, dbs_ch ) + antismash_output_ch = ANTISMASH.out.gff } else { antismash_output_ch = Channel.empty() } // sequenceserver SEQUENCESERVER( - annotation_out_ch[3].join(annotation_out_ch[5]) - .join(annotation_out_ch[4]) + annotation_out_ch.genome + .join( annotation_out_ch.genes ) + .join( annotation_out_ch.proteins ) ) // custom databases annotation @@ -295,90 +232,100 @@ workflow BACANNOT { if (params.custom_db || params.ncbi_proteins) { GET_NCBI_PROTEIN( ncbi_accs ) CUSTOM_DATABASE( - annotation_out_ch[1].join(annotation_out_ch[3]), - custom_db.mix(GET_NCBI_PROTEIN.out[0]) + annotation_out_ch.gff.join( annotation_out_ch.genome ), + custom_db.mix( GET_NCBI_PROTEIN.out.proteins ) ) - ch_custom_databases_annotations = CUSTOM_DATABASE.out[1].groupTuple() + ch_custom_databases_annotations = CUSTOM_DATABASE.out.gff.groupTuple() } /* Eighth step -- Merge all annotations */ MERGE_ANNOTATIONS( - annotation_out_ch[1].join(kofamscan_output_ch, remainder: true) - .join(vfdb_output_ch, remainder: true) - .join(victors_output_ch, remainder: true) - .join(amrfinder_output_ch, remainder: true) - .join(resfinder_gff_ch, remainder: true) - .join(rgi_output_ch, remainder: true) - .join(iceberg_output_blastp_ch, remainder: true) - .join(phast_output_ch, remainder: true) - .join(DIGIS.out[1], remainder: true) - .join(ch_custom_databases_annotations, remainder: true) + annotation_out_ch.gff + .join(kofamscan_output_ch, remainder: true) + .join(vfdb_output_ch, remainder: true) + .join(victors_output_ch, remainder: true) + .join(amrfinder_output_ch, remainder: true) + .join(resfinder_gff_ch, remainder: true) + .join(rgi_output_ch, remainder: true) + .join(iceberg_output_blastp_ch, remainder: true) + .join(phast_output_ch, remainder: true) + .join(DIGIS.out.gff, remainder: true) + .join(ch_custom_databases_annotations, remainder: true) ) /* Final step -- Create genome browser and reports' files */ // Plot genomic islands - DRAW_GIS( MERGE_ANNOTATIONS.out[0].join(ISLANDPATH.out[0]) ) + DRAW_GIS( + MERGE_ANNOTATIONS.out.gff.join( ISLANDPATH.out.results ) + ) // Convert GFF file to GBK file - GFF2GBK( MERGE_ANNOTATIONS.out[0].join(annotation_out_ch[3]) ) + GFF2GBK( + MERGE_ANNOTATIONS.out.gff.join( annotation_out_ch.genome ) + ) // Convert GFF file to sqldb CREATE_SQL( - MERGE_ANNOTATIONS.out[0].join(annotation_out_ch[5]) - .join(annotation_out_ch[4]) - .join(annotation_out_ch[3]) - .join(DIGIS.out[2] ) + MERGE_ANNOTATIONS.out.gff + .join( annotation_out_ch.genes ) + .join( annotation_out_ch.proteins ) + .join( annotation_out_ch.genome ) + .join( DIGIS.out.gff_and_sequences ) ) JBROWSE( - MERGE_ANNOTATIONS.out[0].join(annotation_out_ch[3]) - .join(annotation_out_ch[1]) - .join(BARRNAP.out[0]) - .join(COMPUTE_GC.out[0]) - .join(resfinder_gff_ch, remainder: true) - .join(phigaro_output_bed_ch,remainder: true) - .join(ISLANDPATH.out[0], remainder: true) - .join(methylation_out_1_ch, remainder: true) - .join(methylation_out_2_ch, remainder: true) - .join(phispy_output_ch, remainder: true) - .join(MERGE_ANNOTATIONS.out[1]) // parsed digIS - .join(antismash_output_ch, remainder: true) - .join(MERGE_ANNOTATIONS.out[2].groupTuple(), remainder: true) // parsed custom db + MERGE_ANNOTATIONS.out.gff + .join( annotation_out_ch.genome ) + .join( annotation_out_ch.gff ) + .join( BARRNAP.out.gff ) + .join( COMPUTE_GC.out.results ) + .join( resfinder_gff_ch, remainder: true ) + .join( phigaro_output_bed_ch, remainder: true ) + .join( ISLANDPATH.out.results, remainder: true ) + .join( CALL_METHYLATION.out.bedgraph, remainder: true ) + .join( CALL_METHYLATION.out.chr_sizes, remainder: true ) + .join( phispy_output_ch, remainder: true ) + .join( MERGE_ANNOTATIONS.out.digis_gff ) + .join( antismash_output_ch, remainder: true ) + .join( MERGE_ANNOTATIONS.out.customdb_gff.groupTuple(), remainder: true ) ) // Render reports if (params.custom_db || params.ncbi_proteins) { - CUSTOM_DATABASE_REPORT( CUSTOM_DATABASE.out[0].join( MERGE_ANNOTATIONS.out[0], remainder:true ) ) + CUSTOM_DATABASE_REPORT( + CUSTOM_DATABASE.out.summary.join( MERGE_ANNOTATIONS.out.gff, remainder:true ) + ) } REPORT( - annotation_out_ch[8].join(MERGE_ANNOTATIONS.out[0]) - .join(BARRNAP.out[0]) - .join(MLST.out[0]) - .join(kegg_decoder_svg_ch, remainder: true) - .join(REFSEQ_MASHER.out[0]) - .join(amrfinder_output_ch, remainder: true) - .join(rgi_output_ch, remainder: true) - .join(rgi_output_parsed_ch, remainder: true) - .join(rgi_heatmap_ch, remainder: true) - .join(argminer_output_ch, remainder: true) - .join(resfinder_output_tab_ch, remainder: true) - .join(resfinder_output_pointfinder_ch, remainder: true) - .join(resfinder_phenotable_ch, remainder: true) - .join(vfdb_output_ch, remainder: true) - .join(victors_output_ch, remainder: true) - .join(phigaro_output_tsv_ch, remainder: true) - .join(phispy_output_ch, remainder: true) - .join(iceberg_output_blastp_ch, remainder: true) - .join(iceberg_output_blastn_ch, remainder: true) - .join(plasmidfinder_output_ch, remainder: true) - .join(platon_output_ch, remainder: true) - .join(DRAW_GIS.out[1], remainder: true) - .join(phast_output_ch, remainder: true) - .join(MERGE_ANNOTATIONS.out[1]) // parsed digIS + annotation_out_ch[8] + .join( MERGE_ANNOTATIONS.out.gff ) + .join( BARRNAP.out.gff ) + .join( MLST.out.results ) + .join( kegg_decoder_svg_ch, remainder: true ) + .join( REFSEQ_MASHER.out.results ) + .join( amrfinder_output_ch, remainder: true ) + .join( rgi_output_ch, remainder: true ) + .join( rgi_output_parsed_ch, remainder: true ) + .join( rgi_heatmap_ch, remainder: true ) + .join( argminer_output_ch, remainder: true ) + .join( resfinder_output_tab_ch, remainder: true ) + .join( resfinder_output_pointfinder_ch, remainder: true ) + .join( resfinder_phenotable_ch, remainder: true ) + .join( vfdb_output_ch, remainder: true ) + .join( victors_output_ch, remainder: true ) + .join( phigaro_output_tsv_ch, remainder: true ) + .join( phispy_output_ch, remainder: true ) + .join( iceberg_output_blastp_ch, remainder: true ) + .join( iceberg_output_blastn_ch, remainder: true ) + .join( plasmidfinder_output_ch, remainder: true ) + .join( platon_output_ch, remainder: true ) + .join( DRAW_GIS.out.example, remainder: true ) + .join( phast_output_ch, remainder: true ) + .join( MERGE_ANNOTATIONS.out.digis_gff ) ) }