Skip to content

Commit

Permalink
adding rule to intersect bed (#65)
Browse files Browse the repository at this point in the history
* when using bedtools intersect, keep only the highest intersection of annotation

* Update merge_annotations.nf

* updating versioning information

* change modules to use named outputs for better readability

* fixed optional emissions

* fix awk print statement

* Update CHANGELOG.md

* Update CHANGELOG.md
  • Loading branch information
fmalmeida committed Sep 17, 2022
1 parent 620b3a5 commit 8841acf
Show file tree
Hide file tree
Showing 40 changed files with 277 additions and 343 deletions.
2 changes: 1 addition & 1 deletion .zenodo.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"description": "<p>The pipeline</p>\n\n<p>bacannot, is a customisable, easy to use, pipeline that uses state-of-the-art software for comprehensively annotating prokaryotic genomes having only Docker and Nextflow as dependencies. It is able to annotate and detect virulence and resistance genes, plasmids, secondary metabolites, genomic islands, prophages, ICEs, KO, and more, while providing nice an beautiful interactive documents for results exploration.</p>",
"license": "other-open",
"title": "fmalmeida/bacannot: A generic but comprehensive bacterial annotation pipeline",
"version": "v3.1.4",
"version": "v3.1.5",
"upload_type": "software",
"creators": [
{
Expand Down
6 changes: 6 additions & 0 deletions markdown/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

The tracking for changes started in v2.1

## v3.1.5 [17-September-2022]

* Fixes https://github.com/fmalmeida/bacannot/issues/64 reported by @fmalmeida, which highlights that the resfinder annotation were sometimes being duplicated because a single gene had intersection to more then one CDS regions, but, only one being "true".
- To solve such issue, intersections are now sorted by length, and only the first occurence (per gene) is kept.
* As reported in issue #39, named outputs are now used in modules for better readability.

## v3.1.4 [13-September-2022]

* Fixes https://github.com/fmalmeida/bacannot/issues/62 reported by @rujinlong, where Island-Path tool was failling because it was running on genbank files with no true CDS. This was hapenning because Bakta writes in the comments that the GBK has 0 CDS and, at first, the module was selecting GBK by checking if the CDS string was there. It has now been modified to also work with Bakta.
Expand Down
5 changes: 2 additions & 3 deletions modules/KOs/kegg-decoder.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@ process KEGG_DECODER {
tuple val(prefix), path('input_mapper.txt')

output:
// Grab all outputs
path("*") // Get all files to input directory
tuple val(prefix), path("*.svg") // get svg
path("*") , emit: all // Get all files to input directory
tuple val(prefix), path("*.svg"), emit: results // get svg

script:
"""
Expand Down
4 changes: 2 additions & 2 deletions modules/KOs/kofamscan.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ process KOFAMSCAN {

output:
// Grab all outputs
file("KOfamscan")
tuple val(prefix), file("KOfamscan/${prefix}_ko_forKEGGMapper.txt")
path("KOfamscan"), emit: all
tuple val(prefix), path("KOfamscan/${prefix}_ko_forKEGGMapper.txt"), emit: results

script:
"""
Expand Down
7 changes: 3 additions & 4 deletions modules/MGEs/digIS.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@ process DIGIS {
tuple val(prefix), path(genome), path(genbank)

output:
// Grab results
file("digIS")
tuple val(prefix), path("digIS/results/${prefix}.gff")
tuple val(prefix), path("${prefix}_IS.gff"), path("digIS/results/fastas/${prefix}_IS.fa"), path("digIS/results/fastas/${prefix}_IS.faa")
path("digIS") , emit: all
tuple val(prefix), path("digIS/results/${prefix}.gff"), emit: gff
tuple val(prefix), path("${prefix}_IS.gff"), path("digIS/results/fastas/${prefix}_IS.fa"), path("digIS/results/fastas/${prefix}_IS.faa"), emit: gff_and_sequences

script:
"""
Expand Down
5 changes: 2 additions & 3 deletions modules/MGEs/draw_gis.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ process DRAW_GIS {
tuple val(prefix), file(gff), file(gis_bed)

output:
// Outputs must be linked to each prefix (tag)
tuple val(prefix), file("plots") optional true
tuple val(prefix), file("teste.png") optional true
tuple val(prefix), file("plots") , emit: all optional true
tuple val(prefix), file("teste.png"), emit: example optional true

script:
"""
Expand Down
9 changes: 4 additions & 5 deletions modules/MGEs/iceberg.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@ process ICEBERG {
file(bacannot_db)

output:
// Outputs must be linked to each prefix (tag)
tuple val(prefix), file("${prefix}_iceberg_blastp_onGenes.summary.txt")
tuple val(prefix), file("${prefix}_iceberg_blastp_onGenes.txt")
tuple val(prefix), file("${prefix}_iceberg_blastn_onGenome.summary.txt")
file('*.txt') // Grab all
tuple val(prefix), path("${prefix}_iceberg_blastp_onGenes.summary.txt") , emit: genes_summary
tuple val(prefix), path("${prefix}_iceberg_blastp_onGenes.txt") , emit: results
tuple val(prefix), path("${prefix}_iceberg_blastn_onGenome.summary.txt"), emit: genome_summary
path('*.txt') , emit: all

script:
"""
Expand Down
3 changes: 1 addition & 2 deletions modules/MGEs/islandpath.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ process ISLANDPATH {
tuple val(prefix), file("annotation.gbk")

output:
// Outputs must be linked to each prefix (tag)
tuple val(prefix), path("${prefix}_predicted_GIs.bed")
tuple val(prefix), path("${prefix}_predicted_GIs.bed"), emit: results

script:
"""
Expand Down
4 changes: 2 additions & 2 deletions modules/MGEs/plasmidfinder.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ process PLASMIDFINDER {
file(bacannot_db)

output:
tuple val(prefix), path("plasmidfinder")
tuple val(prefix), path("plasmidfinder/results_tab.tsv")
tuple val(prefix), path("plasmidfinder") , emit: all
tuple val(prefix), path("plasmidfinder/results_tab.tsv"), emit: results

script:
"""
Expand Down
6 changes: 3 additions & 3 deletions modules/MGEs/platon.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ process PLATON {
file(bacannot_db)

output:
path("platon")
tuple val(prefix), path("platon/${prefix}.tsv")
path("platon_version.txt")
path("platon") , emit: all
tuple val(prefix), path("platon/${prefix}.tsv"), emit: results
path("platon_version.txt") , emit: version

script:
"""
Expand Down
6 changes: 3 additions & 3 deletions modules/assembly/flye.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ process FLYE {
tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), val(assembly), val(resfinder_species)

output:
file "flye_${prefix}" // Saves all files
path "flye_${prefix}", emit: all // Saves all files
// Keep tuple structure to mixing channels
tuple val("${prefix}"), val("${entrypoint}"), val("${sread1}"), val("${sread2}"), val("${sreads}"), file("${lreads}"), val("${lr_type}"), file("${fast5}"), file("flye_${prefix}.fasta"), val("${resfinder_species}")
file('flye_version.txt')
tuple val("${prefix}"), val("${entrypoint}"), val("${sread1}"), val("${sread2}"), val("${sreads}"), path("${lreads}"), val("${lr_type}"), path("${fast5}"), path("flye_${prefix}.fasta"), val("${resfinder_species}"), emit: results
path('flye_version.txt'), emit: version

script:
lr = (lr_type == 'nanopore') ? '--nano-raw' : '--pacbio-raw'
Expand Down
6 changes: 3 additions & 3 deletions modules/assembly/unicycler.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ process UNICYCLER {
tuple val(prefix), val(entrypoint), file(sread1), file(sread2), file(sreads), file(lreads), val(lr_type), file(fast5), val(assembly), val(resfinder_species)

output:
file "unicycler_${prefix}" // Save everything
path "unicycler_${prefix}", emit: all // Save everything
// Keep tuple structure to mixing channels
tuple val("${prefix}"), val("${entrypoint}"), val("${sread1}"), val("${sread2}"), val("${sreads}"), file("${lreads}"), val("${lr_type}"), file("${fast5}"), file("unicycler_${prefix}.fasta"), val("${resfinder_species}")
file('unicycler_version.txt')
tuple val("${prefix}"), val("${entrypoint}"), val("${sread1}"), val("${sread2}"), val("${sreads}"), path("${lreads}"), val("${lr_type}"), path("${fast5}"), path("unicycler_${prefix}.fasta"), val("${resfinder_species}"), emit: results
path('unicycler_version.txt'), emit: version

script:
unpaired_param = (sreads.getName() != "input.3") ? "-s $sreads" : ""
Expand Down
7 changes: 3 additions & 4 deletions modules/generic/antismash.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@ process ANTISMASH {
file(bacannot_db)

output:
// Grab results
tuple val(prefix), path("antiSMASH/regions.gff")
path("antiSMASH")
path("*_version.txt")
tuple val(prefix), path("antiSMASH/regions.gff"), emit: gff
path("antiSMASH") , emit: all
path("*_version.txt") , emit: version

script:
def gbk_suffix = (params.bakta_db) ? "gbff" : "gbk"
Expand Down
20 changes: 10 additions & 10 deletions modules/generic/bakta.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ process BAKTA {

output:
// Grab all outputs
file "annotation"
path "annotation", emit: all
// Outputs must be linked to each prefix (tag)
tuple val(prefix), file("annotation/${prefix}.gff3") // annotation in gff format
tuple val(prefix), file("annotation/${prefix}.gbff") // annotation in gbk format
tuple val(prefix), file("annotation/${prefix}.fna") // renamed genome
tuple val(prefix), file("annotation/${prefix}.faa") // gene aa sequences
tuple val(prefix), file("annotation/${prefix}.ffn") // gene nt sequences
tuple val(prefix), file("annotation/${prefix}.fna"), file("${lreads}"), file("${fast5}") // For methylation calling
tuple val(prefix), file("annotation/${prefix}.fna"), val("${resfinder_species}") // For resfinder
tuple val(prefix), file("annotation/${prefix}.txt") // bakta stats
file('bakta_version.txt') // Save bakta version
tuple val(prefix), path("annotation/${prefix}.gff3"), emit: gff // annotation in gff format
tuple val(prefix), path("annotation/${prefix}.gbff"), emit: gbk // annotation in gbk format
tuple val(prefix), path("annotation/${prefix}.fna") , emit: genome // renamed genome
tuple val(prefix), path("annotation/${prefix}.faa") , emit: proteins // gene aa sequences
tuple val(prefix), path("annotation/${prefix}.ffn") , emit: genes // gene nt sequences
tuple val(prefix), path("annotation/${prefix}.fna"), path("${lreads}"), path("${fast5}"), emit: genome_with_fast5 // For methylation calling
tuple val(prefix), path("annotation/${prefix}.fna"), val("${resfinder_species}"), emit: genome_with_species // For resfinder
tuple val(prefix), path("annotation/${prefix}.txt") , emit: summary // bakta stats
path('bakta_version.txt'), emit: version // Save bakta version

script:
"""
Expand Down
6 changes: 3 additions & 3 deletions modules/generic/barrnap.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ process BARRNAP {
tuple val(prefix), file(genome)

output:
tuple val(prefix), path("${prefix}_rRNA.gff")
tuple val(prefix), path("${prefix}_rRNA.fa")
path('barrnap_version.txt')
tuple val(prefix), path("${prefix}_rRNA.gff"), emit: gff
tuple val(prefix), path("${prefix}_rRNA.fa") , emit: fasta
path('barrnap_version.txt') , emit: version

script:
"""
Expand Down
3 changes: 1 addition & 2 deletions modules/generic/compute_gc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ process COMPUTE_GC {
tuple val(prefix), file(genome)

output:
// Outputs must be linked to each prefix (tag)
tuple val(prefix), path("input_GC_500_bps.sorted.bedGraph"), path("input.sizes")
tuple val(prefix), path("input_GC_500_bps.sorted.bedGraph"), path("input.sizes"), emit: results

script:
"""
Expand Down
9 changes: 4 additions & 5 deletions modules/generic/custom_database.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@ process CUSTOM_DATABASE {
each file(customDB)

output:
// Outputs must be linked to each prefix (tag)
tuple val(prefix), val("${customDB.baseName}"), path("${prefix}_${customDB.baseName}*.summary.txt")
tuple val(prefix), path("${customDB.baseName}_custom_db.gff")
path('*.txt') // Grab all
path(customDB)
tuple val(prefix), val("${customDB.baseName}"), path("${prefix}_${customDB.baseName}*.summary.txt"), emit: summary
tuple val(prefix), path("${customDB.baseName}_custom_db.gff") , emit: gff
path('*.txt') , emit: all
path(customDB) , emit: db

script:
"""
Expand Down
2 changes: 1 addition & 1 deletion modules/generic/custom_database_report.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ process CUSTOM_DATABASE_REPORT {
tuple val(prefix), val(customDB), file(custom_blast), file(custom_gff)

output:
file '*.html'
path '*.html', emit: results

script:
"""
Expand Down
2 changes: 1 addition & 1 deletion modules/generic/gff2gbk.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ process GFF2GBK {
tuple val(prefix), file(gff), file(input)

output:
file "*.genbank"
path "*.genbank", emit: results

"""
# Activate env
Expand Down
4 changes: 2 additions & 2 deletions modules/generic/gff2sql.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ process CREATE_SQL {
tuple val(prefix), file(gff), file(genes_nt), file(genes_aa), file(genome), file("digIS.gff"), file("digIS.fa"), file("digIS.faa")

output:
file "${prefix}.sqlite"
file "run_server.sh"
path "${prefix}.sqlite", emit: results
path "run_server.sh" , emit: script

script:
"""
Expand Down
2 changes: 1 addition & 1 deletion modules/generic/jbrowse.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ process JBROWSE {
tuple val(prefix), file(merged_gff), file(draft), file("prokka_gff"), file(barrnap), file(gc_bedGraph), file(gc_chrSizes), file(resfinder_gff), file(phigaro), file(genomic_islands), file("methylation"), file("chr.sizes"), file(phispy_tsv), file(digIS_gff), file(antiSMASH), file(custom_annotations)

output:
file "*"
path "*", emit: results

script:
"""
Expand Down
5 changes: 2 additions & 3 deletions modules/generic/mash.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@ process REFSEQ_MASHER {
tuple val(prefix), path(genome)

output:
// Grab results
tuple val(prefix), path("refseq_masher_results.txt")
path("*_version.txt")
tuple val(prefix), path("refseq_masher_results.txt"), emit: results
path("*_version.txt") , emit: version

script:
"""
Expand Down
12 changes: 6 additions & 6 deletions modules/generic/merge_annotations.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ process MERGE_ANNOTATIONS {
tuple val(prefix), file('prokka_gff'), file(kofamscan), file(vfdb), file(victors), file(amrfinder), file(resfinder), file(rgi), file(iceberg), file(phast), file('digis_gff'), file(custom_databases)

output:
tuple val(prefix), path("${prefix}.gff")
tuple val(prefix), path("transposable_elements_digis.gff")
tuple val(prefix), path("custom_database_*.gff") optional true
path("*.gff")
tuple val(prefix), path("${prefix}.gff") , emit: gff
tuple val(prefix), path("transposable_elements_digis.gff"), emit: digis_gff
tuple val(prefix), path("custom_database_*.gff") , emit: customdb_gff optional true
path("*.gff") , emit: all

script:
"""
Expand Down Expand Up @@ -81,7 +81,7 @@ process MERGE_ANNOTATIONS {
#### Resfinder
if [ ! \$(cat $resfinder | wc -l) -eq 0 ]
then
bedtools intersect -a $resfinder -b ${prefix}.gff -wo > resfinder_intersected.txt ;
bedtools intersect -a $resfinder -b ${prefix}.gff -wo | sort -k19,19 -r | awk -F '\\t' '!seen[\$9]++' > resfinder_intersected.txt ;
addBedtoolsIntersect.R -g ${prefix}.gff -t resfinder_intersected.txt --type Resistance --source Resfinder -o ${prefix}.gff ;
grep "Resfinder" ${prefix}.gff > resistance_resfinder.gff ;
rm -f resfinder_intersected.txt ;
Expand All @@ -93,7 +93,7 @@ process MERGE_ANNOTATIONS {
if [ ! \$(cat \$file | wc -l) -eq 0 ]
then
db=\${file%%_custom_db.gff} ;
bedtools intersect -a \${file} -b ${prefix}.gff -wo > bedtools_intersected.txt ;
bedtools intersect -a \${file} -b ${prefix}.gff -wo | sort -k19,19 -r | awk -F '\\t' '!seen[\$9]++' > bedtools_intersected.txt ;
addBedtoolsIntersect.R -g ${prefix}.gff -t bedtools_intersected.txt --type "CDS" --source "\${db}" -o ${prefix}.gff ;
grep "\${db}" ${prefix}.gff > custom_database_\${db}.gff ;
rm -f bedtools_intersected.txt ;
Expand Down
11 changes: 5 additions & 6 deletions modules/generic/methylation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@ process CALL_METHYLATION {
tuple val(prefix), file(draft), file(reads), file(fast5)

output:
// Grab all outputs
file "*_calls.tsv" optional true
file "*_frequency.tsv" optional true
tuple val(prefix), file("methylation_frequency.bedGraph") optional true
tuple val(prefix), file("chr.sizes") optional true
file('nanopolish_version.txt')
path "*_calls.tsv" , emit: results optional true
path "*_frequency.tsv" , emit: frequencies optional true
tuple val(prefix), path("methylation_frequency.bedGraph"), emit: bedgraph optional true
tuple val(prefix), path("chr.sizes") , emit: chr_sizes optional true
path('nanopolish_version.txt') , emit: version

when:
// When an entry does not exist, it is created as 'input'
Expand Down
6 changes: 3 additions & 3 deletions modules/generic/mlst.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ process MLST {
file(bacannot_db)

output:
tuple val(prefix), path("${prefix}_mlst_analysis.txt") optional true
tuple val(prefix), path("${prefix}_novel_alleles.fasta") optional true
path('mlst_version.txt')
tuple val(prefix), path("${prefix}_mlst_analysis.txt") , emit: results optional true
tuple val(prefix), path("${prefix}_novel_alleles.fasta"), emit: alleles optional true
path('mlst_version.txt') , emit: version

script:
"""
Expand Down
2 changes: 1 addition & 1 deletion modules/generic/ncbi_protein.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ process GET_NCBI_PROTEIN {
file(ncbi_accs)

output:
path("ncbi_protein.faa")
path("ncbi_protein.faa"), emit: proteins

script:
"""
Expand Down
20 changes: 10 additions & 10 deletions modules/generic/prokka.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ process PROKKA {

output:
// Grab all outputs
path("annotation")
path("annotation"), emit: all
// Outputs must be linked to each prefix (tag)
tuple val(prefix), path("annotation/${prefix}.gff")
tuple val(prefix), path("annotation/${prefix}.gbk")
tuple val(prefix), path("annotation/${prefix}.fna")
tuple val(prefix), path("annotation/${prefix}.faa")
tuple val(prefix), path("annotation/${prefix}.ffn")
tuple val(prefix), path("annotation/${prefix}.fna"), path("${lreads}"), path("${fast5}")
tuple val(prefix), path("annotation/${prefix}.fna"), val("${resfinder_species}")
tuple val(prefix), path("annotation/${prefix}.txt")
path('prokka_version.txt')
tuple val(prefix), path("annotation/${prefix}.gff"), emit: gff
tuple val(prefix), path("annotation/${prefix}.gbk"), emit: gbk
tuple val(prefix), path("annotation/${prefix}.fna"), emit: genome
tuple val(prefix), path("annotation/${prefix}.faa"), emit: proteins
tuple val(prefix), path("annotation/${prefix}.ffn"), emit: genes
tuple val(prefix), path("annotation/${prefix}.fna"), path("${lreads}"), path("${fast5}"), emit: genome_with_fast5
tuple val(prefix), path("annotation/${prefix}.fna"), val("${resfinder_species}"), emit: genome_with_species
tuple val(prefix), path("annotation/${prefix}.txt"), emit: summary
path('prokka_version.txt'), emit: version

script:
kingdom = (params.prokka_kingdom) ? "--kingdom ${params.prokka_kingdom}" : ''
Expand Down
2 changes: 1 addition & 1 deletion modules/generic/reports.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ process REPORT {
tuple val(prefix), file('annotation_stats.tsv'), file(gff), file(barrnap), file(mlst), file(keggsvg), file(refseq_masher_txt), file(amrfinder), file(rgi), file(rgi_parsed), file(rgi_heatmap), file(argminer_out), file(resfinder_tab), file(resfinder_point), file(resfinder_phenotable), file(vfdb_blastn), file(victors_blastp), file(phigaro_txt), file(phispy_tsv), file(iceberg_blastp), file(iceberg_blastn), file(plasmids_tsv), file(platon_tsv), file(gi_image), file(phast_blastp), file(digIS)

output:
file '*.html'
path '*.html', emit: results

script:
def generic_annotator = (params.bakta_db) ? "bakta" : "prokka"
Expand Down

0 comments on commit 8841acf

Please sign in to comment.