Skip to content

Commit

Permalink
refactor: Improve the way Bakta is used (#54)
Browse files Browse the repository at this point in the history
* chore: Update bakta module

* fix: Change path to bakta module

* refactor: Rework how use bakta param is used

- Now its a bool whether to run Bakta or not
- If no --bakta-db is set but --use_bakta is set, the baktadbdownload
  module will run
- By default, prokka is still the one used

* feat: Add config for process single

* test: Use bakta in annotation test

* refactor: Use light bakta db in test

* refactor: Use light db by default

* Revert "test: Use bakta in annotation test"

This reverts commit 343ab08.

* test: Add light to ann test

* refactor: Make Bakta the default ann software
  • Loading branch information
jvfe committed Mar 7, 2023
1 parent fa56027 commit 3cb354c
Show file tree
Hide file tree
Showing 14 changed files with 166 additions and 58 deletions.
5 changes: 5 additions & 0 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ process {
// adding in your local modules too.
// TODO nf-core: Customise requirements for specific processes.
// See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
withLabel:process_single {
cpus = { check_max( 1 , 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
}
withLabel:process_low {
cpus = { check_max( 6 * task.attempt, 'cpus' ) }
memory = { check_max( 20.GB * task.attempt, 'memory' ) }
Expand Down
4 changes: 4 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ process {
]
}

withName: BAKTADBDOWNLOAD {
ext.args = '--type light'
}

withName: BAKTA {
publishDir = [
path: { "${params.outdir}/annotation/bakta/${meta.id}" },
Expand Down
7 changes: 6 additions & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ params {

// Input data
input_sample_table = "https://raw.githubusercontent.com/beiko-lab/arete/master/test/test_dataset.csv"
use_bakta = false
db_cache = false
use_full_alignment = false
use_fasttree = true
use_prokka = true
skip_kraken = true
skip_poppunk = true
light = true
Expand All @@ -31,6 +31,11 @@ process {
memory = 6.GB
time = 4.h

withLabel:process_single {
cpus = 1
memory = 6.GB
time = 4.h
}
withLabel:process_low {
cpus = 2
memory = 6.GB
Expand Down
9 changes: 7 additions & 2 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,14 @@
"https://github.com/nf-core/modules.git": {
"modules": {
"nf-core": {
"bakta": {
"bakta/bakta": {
"branch": "master",
"git_sha": "ade45f05a2659b5c130a483e09f50b7f33d075b2",
"git_sha": "280c5c86b3da7dfcc92ebd5420584dd6ff26c4a8",
"installed_by": ["modules"]
},
"bakta/baktadbdownload": {
"branch": "master",
"git_sha": "280c5c86b3da7dfcc92ebd5420584dd6ff26c4a8",
"installed_by": ["modules"]
},
"blast/blastn": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
process BAKTA {
process BAKTA_BAKTA {
tag "$meta.id"
label 'process_medium'

conda (params.enable_conda ? "bioconda::bakta=1.4.0" : null)
conda "bioconda::bakta=1.7.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bakta:1.4.0--pyhdfd78af_1' :
'quay.io/biocontainers/bakta:1.4.0--pyhdfd78af_1' }"
'https://depot.galaxyproject.org/singularity/bakta:1.7.0--pyhdfd78af_1' :
'quay.io/biocontainers/bakta:1.7.0--pyhdfd78af_1' }"

input:
tuple val(meta), path(fasta)
Expand Down Expand Up @@ -33,20 +33,20 @@ process BAKTA {
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def proteins_opt = proteins ? "--proteins ${proteins[0]}" : ""
def prodigal_opt = prodigal_tf ? "--prodigal-tf ${prodigal_tf[0]}" : ""
def prodigal_tf = prodigal_tf ? "--prodigal-tf ${prodigal_tf[0]}" : ""
"""
bakta \\
$fasta \\
$args \\
--threads $task.cpus \\
--prefix $prefix \\
--db $db \\
$proteins_opt \\
$prodigal_tf \\
$fasta
--db $db
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bakta: \$( echo \$(bakta --version 2>&1) | sed 's/^.*bakta //' )
bakta: \$(echo \$(bakta --version) 2>&1 | cut -f '2' -d ' ')
END_VERSIONS
"""

Expand All @@ -66,7 +66,7 @@ process BAKTA {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bakta: \$( echo \$(bakta --version 2>&1) | sed 's/^.*bakta //' )
bakta: \$(echo \$(bakta --version) 2>&1 | cut -f '2' -d ' ')
END_VERSIONS
"""
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: bakta
name: bakta_bakta
description: Annotation of bacterial genomes (isolates, MAGs) and plasmids
keywords:
- annotation
Expand Down Expand Up @@ -26,7 +26,7 @@ input:
- db:
type: file
description: |
Path to the Bakta database
Path to the Bakta database. Must have amrfinderplus database directory already installed within it (in a directory called 'amrfinderplus-db/').
- proteins:
type: file
description: FASTA/GenBank file of trusted proteins to first annotate from (optional)
Expand Down Expand Up @@ -88,3 +88,4 @@ output:
authors:
- "@rpetit3"
- "@oschwengers"
- "@jfy133"
43 changes: 43 additions & 0 deletions modules/nf-core/bakta/baktadbdownload/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
process BAKTA_BAKTADBDOWNLOAD {
label 'process_single'

conda "bioconda::bakta=1.7.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bakta:1.7.0--pyhdfd78af_1' :
'quay.io/biocontainers/bakta:1.7.0--pyhdfd78af_1' }"

output:
path "db*" , emit: db
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
"""
bakta_db \\
download \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bakta: \$(echo \$(bakta_db --version) 2>&1 | cut -f '2' -d ' ')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
"""
echo "bakta_db \\
download \\
$args"
mkdir db
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bakta: \$(echo \$(bakta_db --version) 2>&1 | cut -f '2' -d ' ')
END_VERSIONS
"""
}
31 changes: 31 additions & 0 deletions modules/nf-core/bakta/baktadbdownload/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: "bakta_baktadbdownload"
description: Downloads BAKTA database from Zenodo
keywords:
- bakta
- annotation
- fasta
- bacteria
- database
- download
tools:
- bakta:
description: Rapid & standardized annotation of bacterial genomes, MAGs & plasmids
homepage: https://github.com/oschwengers/bakta
documentation: https://github.com/oschwengers/bakta
tool_dev_url: https://github.com/oschwengers/bakta
doi: "10.1099/mgen.0.000685"
licence: ["GPL v3"]

output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- db:
type: directory
description: BAKTA database directory
pattern: "db*/"

authors:
- "@jfy133"
- "@jasmezz"
3 changes: 2 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ params {
use_fasttree = true

// Annotation parameters
use_bakta = null
use_prokka = false
bakta_db = null
light = false

// References
Expand Down
31 changes: 22 additions & 9 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@
"properties": {
"skip_kraken": {
"type": "boolean",
"description": "Don't run Kraken2 taxonomic classification"
"description": "Don't run Kraken2 taxonomic classification",
"fa_icon": "fas fa-forward"
}
},
"fa_icon": "fas fa-align-left"
Expand All @@ -86,15 +87,21 @@
"default": "",
"fa_icon": "fas fa-edit",
"properties": {
"use_bakta": {
"bakta_db": {
"type": "string",
"default": "None",
"description": "Location to the bakta database"
"fa_icon": "fas fa-database"
},
"light": {
"type": "boolean",
"description": "Only run one DIAMOND annotation step - against VFDB",
"hidden": true
"hidden": true,
"fa_icon": "fas fa-feather-alt"
},
"use_prokka": {
"type": "boolean",
"fa_icon": "fas fa-bacterium",
"description": "Use Prokka (not Bakta) for annotating assemblies"
}
}
},
Expand All @@ -107,11 +114,14 @@
"properties": {
"use_full_alignment": {
"type": "boolean",
"description": "Use full alignment"
"description": "Use full alignment",
"fa_icon": "fas fa-align-justify"
},
"use_fasttree": {
"type": "boolean",
"description": "Use FastTree"
"description": "Use FastTree",
"fa_icon": "fas fa-tree",
"default": true
}
}
},
Expand All @@ -124,16 +134,19 @@
"properties": {
"skip_poppunk": {
"type": "boolean",
"description": "Skip PopPunk"
"description": "Skip PopPunk",
"fa_icon": "fas fa-forward"
},
"poppunk_model": {
"type": "string",
"default": "None",
"description": "Which PopPunk model to use (bgmm, dbscan, refine, threshold or lineage)"
"description": "Which PopPunk model to use (bgmm, dbscan, refine, threshold or lineage)",
"fa_icon": "fas fa-indent"
},
"run_poppunk_qc": {
"type": "boolean",
"description": "Whether to run the QC step for PopPunk"
"description": "Whether to run the QC step for PopPunk",
"fa_icon": "fas fa-industry"
}
}
},
Expand Down
40 changes: 24 additions & 16 deletions subworkflows/local/annotation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
// MODULE: Installed directly from nf-core/modules
//
include { PROKKA } from '../../modules/nf-core/prokka/main'
include { BAKTA } from '../../modules/nf-core/bakta/main'
include { BAKTA_BAKTA as BAKTA } from '../../modules/nf-core/bakta/bakta/main'
include { BAKTA_BAKTADBDOWNLOAD as BAKTADBDOWNLOAD } from '../../modules/nf-core/bakta/baktadbdownload/main'
include { GET_CAZYDB;
GET_VFDB;
GET_BACMET} from '../../modules/local/blast_databases.nf'
Expand Down Expand Up @@ -43,6 +44,7 @@ workflow ANNOTATE_ASSEMBLIES {
main:

//if (params.input_sample_table){ ch_input = file(params.input_sample_table) } else { exit 1, 'Input samplesheet not specified!' }
ch_multiqc_files = Channel.empty()
ch_software_versions = Channel.empty()
/*
* SUBWORKFLOW: Read in samplesheet, validate and stage input files
Expand Down Expand Up @@ -104,23 +106,33 @@ workflow ANNOTATE_ASSEMBLIES {
/*
* Run gene finding software (Prokka or Bakta)
*/
ch_ffn_files = Channel.empty()
ch_gff_files = Channel.empty()
if (bakta_db){
BAKTA(assemblies, bakta_db, [], [])
ch_software_versions = ch_software_versions.mix(BAKTA.out.versions.first().ifEmpty(null))
ch_ffn_files = ch_ffn_files.mix(BAKTA.out.ffn)
ch_gff_files = ch_gff_files.mix(BAKTA.out.gff)
}
else{
if (params.use_prokka) {

PROKKA (
assemblies,
[],
[]
) //Assembly, protein file, pre-trained prodigal
ch_software_versions = ch_software_versions.mix(PROKKA.out.versions.first().ifEmpty(null))
ch_ffn_files = ch_ffn_files.mix(PROKKA.out.ffn)
ch_gff_files = ch_gff_files.mix(PROKKA.out.gff)
ch_ffn_files = PROKKA.out.ffn
ch_gff_files = PROKKA.out.gff
ch_multiqc_files = ch_multiqc_files.mix(PROKKA.out.txt.collect{it[1]}.ifEmpty([]))

}
else {

if (bakta_db){
BAKTA(assemblies, bakta_db, [], [])
} else {
BAKTADBDOWNLOAD()
BAKTADBDOWNLOAD.out.db.set { bakta_db }
BAKTA(assemblies, bakta_db, [], [])
}

ch_software_versions = ch_software_versions.mix(BAKTA.out.versions.first().ifEmpty(null))
ch_ffn_files = BAKTA.out.ffn
ch_gff_files = BAKTA.out.gff

}

/*
Expand Down Expand Up @@ -151,10 +163,6 @@ workflow ANNOTATE_ASSEMBLIES {
CAZY_FILTER(DIAMOND_BLAST_CAZY.out.txt, "CAZY", blast_columns)
}
ch_software_versions = ch_software_versions.mix(DIAMOND_MAKE_VFDB.out.versions.ifEmpty(null))
ch_multiqc_files = Channel.empty()
if(!bakta_db){
ch_multiqc_files = ch_multiqc_files.mix(PROKKA.out.txt.collect{it[1]}.ifEmpty([]))
}

emit:
annotation_software = ch_software_versions
Expand Down
8 changes: 5 additions & 3 deletions tests/nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ params {
cpus = 2
memory = 4.GB
time = 4.h

skip_kraken = true
light = true
}

process {
withLabel:process_single {
cpus = 1
memory = 6.GB
time = 4.h
}
withLabel:process_low {
cpus = 2
memory = 4.GB
Expand Down
2 changes: 2 additions & 0 deletions tests/subworkflows/local/annotation.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ nextflow_workflow {
when {
params {
// define parameters here. Example:
light = true
use_prokka = true
outdir = "$outputDir"
}
workflow {
Expand Down
Loading

0 comments on commit 3cb354c

Please sign in to comment.