Skip to content

Commit

Permalink
add gtdb tests, all large db tools allow tarball now
Browse files Browse the repository at this point in the history
  • Loading branch information
rpetit3 committed Nov 26, 2022
1 parent 17c5633 commit ebfd2e3
Show file tree
Hide file tree
Showing 70 changed files with 334 additions and 90 deletions.
1 change: 1 addition & 0 deletions conda/recipe/meta.yaml
Expand Up @@ -23,6 +23,7 @@ requirements:
- coreutils
- executor
- gsl <=2.6
- importlib-metadata <5
- mamba
- mash
- ncbi-amrfinderplus 3.10.30
Expand Down
2 changes: 2 additions & 0 deletions conf/tests.config
Expand Up @@ -28,9 +28,11 @@ params {
primers = "${params.test_data_dir}/datasets/blast/primers"
}
eggnog = "${params.test_data_dir}/datasets/eggnog"
eggnog_tarball = "${params.test_data_dir}/datasets/eggnog/eggnog.tar.gz"
gtdb = "${params.test_data_dir}/datasets/gtdb"
gtdb_tarball = "${params.test_data_dir}/datasets/gtdb/gtdb.tar.gz"
kraken2 = "${params.test_data_dir}/datasets/kraken2"
kraken2_tarball = "${params.test_data_dir}/datasets/kraken2/kraken2.tar.gz"
mapping = "${params.test_data_dir}/datasets/mapping-sequences"
minmer = "${params.test_data_dir}/datasets/minmer"
mlst = "${params.test_data_dir}/datasets/mlst"
Expand Down
30 changes: 27 additions & 3 deletions lib/WorkflowBactopiaTools.groovy
Expand Up @@ -51,15 +51,23 @@ class WorkflowBactopiaTools {
} else if (params.wf == "bakta") {
if (params.bakta_db) {
if (Utils.isLocal(params.bakta_db)) {
error += Utils.fileNotFound(params.bakta_db, 'bakta_db', log)
if (!params.bakta_db.endsWith(".tar.gz")) {
error += Utils.fileNotFound(params.bakta_db, 'bakta_db', log)
} else {
error += Utils.fileNotFound("${params.bakta_db}/bakta.db", 'bakta_db', log)
}
}
} else {
missing_required += "--bakta_db"
}
} else if (params.wf == "eggnog") {
if (params.eggnog) {
if (Utils.isLocal(params.eggnog)) {
missing_file += Utils.fileNotFound("${params.eggnog}/eggnog.db", 'eggnog', log)
if (!params.eggnog.endsWith(".tar.gz")) {
missing_file += Utils.fileNotFound(params.eggnog, 'eggnog', log)
} else {
missing_file += Utils.fileNotFound("${params.eggnog}/eggnog.db", 'eggnog', log)
}
if (missing_file > 0 && params.download_eggnog == false) {
missing_required += "--eggnog"
}
Expand All @@ -71,12 +79,28 @@ class WorkflowBactopiaTools {
if (params.gtdb) {
if (!params.download_gtdb) {
if (Utils.isLocal(params.gtdb)) {
error += Utils.fileNotFound(params.gtdb, 'gtdb', log)
if (!params.gtdb.endsWith(".tar.gz")) {
error += Utils.fileNotFound(params.gtdb, 'gtdb', log)
} else {
error += Utils.fileNotFound("${params.gtdb}/metadata/metadata.txt", 'gtdb', log)
}
}
}
} else {
missing_required += "--gtdb"
}
} else if (params.wf == "kraken2") {
if (params.kraken2_db) {
if (Utils.isLocal(params.kraken2_db)) {
if (!params.kraken2_db.endsWith(".tar.gz")) {
error += Utils.fileNotFound(params.kraken2_db, 'kraken2_db', log)
} else {
error += Utils.fileNotFound("${params.kraken2_db}/hash.k2d", 'kraken2_db', log)
}
}
} else {
missing_required += "--kraken2_db"
}
} else if (params.wf == "mashdist" || params.wf == "merlin") {
if (params.mash_sketch) {
if (Utils.isLocal(params.mash_sketch)) {
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/abricate/run/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/abricate/run/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/abricate/run/params.json",
"title": "Abricate Module",
"description": "A module for mass screening of contigs for antimicrobial and virulence genes",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/agrvate/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/agrvate/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/agrvate/params.json",
"title": "AgrVATE Module",
"description": "A module for determining the agr group of Staphylococcus aureus",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/amrfinderplus/run/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/amrfinderplus/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/amrfinderplus/params.json",
"title": "AMRFinder+ Module",
"description": "A module to identify antimicrobial resistance in genes or proteins",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/ariba/getref/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/ariba/getref/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/ariba/getref/params.json",
"title": "Ariba GetRef Module",
"description": "A module for gene identification through local assemblies",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/ariba/run/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/ariba/run/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/ariba/run/params.json",
"title": "Ariba Run Module",
"description": "A module for gene identification through local assemblies",
"type": "object",
Expand Down
4 changes: 2 additions & 2 deletions modules/nf-core/bakta/download/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/bakta/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/bakta/params.json",
"title": "Bakta Module",
"description": "A module for rapidly annotating baterial genomes",
"type": "object",
Expand All @@ -14,7 +14,7 @@
"properties": {
"bakta_db": {
"type": "string",
"description": "Path to the Bakta database",
"description": "Tarball or path to the Bakta database",
"fa_icon": "fas fa-file-alt",
"is_required": true
},
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/bakta/run/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/bakta/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/bakta/params.json",
"title": "Bakta Module",
"description": "A module for rapidly annotating baterial genomes",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/busco/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/busco/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/busco/params.json",
"title": "BUSCO Module",
"description": "A module for assessing genome assembly and annotation completeness",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/checkm/lineagewf/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/checkm/lineagewf/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/checkm/lineagewf/params.json",
"title": "CheckM Module",
"description": "A module to assess the assembly quality of your samples",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/clonalframeml/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/clonalframeml/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/clonalframeml/params.json",
"title": "ClonalFrameML Module",
"description": "A module for identifying recombination events in bacteria",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/ectyper/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/ectyper/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/ectyper/params.json",
"title": "ECTyper Module",
"description": "A module for serotyping Escherichia coli genomes",
"type": "object",
Expand Down
16 changes: 11 additions & 5 deletions modules/nf-core/eggnog/download/main.nf
Expand Up @@ -17,10 +17,11 @@ process EGGNOG_DOWNLOAD {
'quay.io/biocontainers/eggnog-mapper:2.1.9--pyhdfd78af_0' }"

output:
path("eggnog/*") , emit: db
path "*.{log,err}", emit: logs , optional: true
path ".command.*" , emit: nf_logs
path "versions.yml" , emit: versions
path("eggnog/*") , emit: db, optional: true
path("eggnog.tar.gz"), emit: db_tarball, optional: true
path "*.{log,err}" , emit: logs, optional: true
path ".command.*" , emit: nf_logs
path "versions.yml" , emit: versions

script:
"""
Expand All @@ -30,9 +31,14 @@ process EGGNOG_DOWNLOAD {
-y \\
--data_dir eggnog/
if [ "!{params.eggnog_save_as_tarball}" == "true" ]; then
tar -czf eggnog.tar.gz eggnog/
rm -rf eggnog/
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
eggnog-mapper: \$( echo \$(emapper.py --version 2>&1)| sed 's/.* emapper-//')
eggnog-mapper: \$( echo \$(emapper.py --version 2>&1)| sed 's/.* emapper-//;s/ .*//')
END_VERSIONS
"""
}
1 change: 1 addition & 0 deletions modules/nf-core/eggnog/download/params.config
Expand Up @@ -5,6 +5,7 @@ This file includes default parameter values.
params {
// eggnog-mapper db download
eggnog = null
eggnog_save_as_tarball = false
download_eggnog = false
skip_diamond = false
install_hmm = false
Expand Down
10 changes: 8 additions & 2 deletions modules/nf-core/eggnog/download/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/eggnog/download/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/eggnog/download/params.json",
"title": "eggNOG Downloader Module",
"description": "A module for downloading eggNOG databases",
"type": "object",
Expand All @@ -14,7 +14,7 @@
"properties": {
"eggnog": {
"type": "string",
"description": "Path to existing or destination for eggNOG databases",
"description": "Tarball or path to eggNOG databases",
"fa_icon": "fas fa-expand-arrows-alt",
"is_required": true
},
Expand All @@ -24,6 +24,12 @@
"description": "Required if downloading latest eggNOG database, will overwrite existing databases.",
"fa_icon": "fas fa-expand-arrows-alt"
},
"eggnog_save_as_tarball": {
"type": "string",
"description": "Save the eggNOG database as a single tarball",
"fa_icon": "fas fa-expand-arrows-alt",
"is_required": true
},
"skip_diamond": {
"type": "boolean",
"default": false,
Expand Down
13 changes: 11 additions & 2 deletions modules/nf-core/eggnog/mapper/main.nf
Expand Up @@ -38,17 +38,26 @@ process EGGNOG_MAPPER {

script:
def prefix = options.suffix ? "${options.suffix}" : "${meta.id}"
def is_tarball = db.getName().endsWith(".tar.gz") ? true : false
"""
if [ "$is_tarball" == "true" ]; then
mkdir database
tar -xzf $db -C database
EGGNOG_DB=\$(find database/ -name "eggnog.db" | sed 's=eggnog.db==')
else
EGGNOG_DB=\$(find $db/ -name "eggnog.db" | sed 's=eggnog.db==')
fi
emapper.py \\
$options.args \\
--cpu $task.cpus \\
--data_dir ./ \\
--data_dir \$EGGNOG_DB \\
--output $prefix \\
-i $fasta
cat <<-END_VERSIONS > versions.yml
"${task.process}":
eggnog-mapper: \$( echo \$(emapper.py --version 2>&1)| sed 's/.* emapper-//')
eggnog-mapper: \$( echo \$(emapper.py --version 2>&1)| sed 's/.* emapper-//;s/ .*//')
END_VERSIONS
"""
}
2 changes: 1 addition & 1 deletion modules/nf-core/emmtyper/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/emmtyper/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/emmtyper/params.json",
"title": "emmtyper Module",
"description": "A module for emm-typing of Streptococcus pyogenes",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/fastani/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/fastani/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/fastani/params.json",
"title": "fastANI Module",
"description": "A module for alignment-free computation Average Nucleotide Identity (ANI)",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/gamma/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/gamma/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/gamma/params.json",
"title": "GAMMA Module",
"description": "A module for identification, classification, and annotation of translated gene matches",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/genotyphi/parse/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/hpsuissero/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/genotyphi/parse/params.json",
"title": "GenoTyphi Module",
"description": "A module to genotype Salmonella Typhi from Mykrobe outputs",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/gtdbtk/classifywf/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/gtdbtk/classifywf/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/gtdbtk/classifywf/params.json",
"title": "GTDB Classify Module",
"description": "Identify marker genes and assign taxonomic classifications",
"type": "object",
Expand Down
4 changes: 2 additions & 2 deletions modules/nf-core/gtdbtk/setupdb/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/gtdbtk/setupdb/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/gtdbtk/setupdb/params.json",
"title": "GTDB Setup Module",
"description": "Identify marker genes and assign taxonomic classifications",
"type": "object",
Expand All @@ -14,7 +14,7 @@
"properties": {
"gtdb": {
"type": "string",
"description": "Location of a GTDB database. If a database is not found, you must use '--download_gtdb'",
"description": "Tarball or path of a GTDB database. If a database is not found, you must use '--download_gtdb'",
"fa_icon": "fas fa-expand-arrows-alt",
"is_required": true
},
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/gubbins/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/gubbins/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/gubbins/params.json",
"title": "Gubbins Module",
"description": "A module for identifying recombination events in bacteria",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/hicap/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/hicap/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/hicap/params.json",
"title": "hicap Module",
"description": "A module for in-silico typing of the H. influenzae capsule locus",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/hpsuissero/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/hpsuissero/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/hpsuissero/params.json",
"title": "HpsuisSero Module",
"description": "A module serotype prediction of Haemophilus parasuis assemblies",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/iqtree/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/iqtree/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/iqtree/params.json",
"title": "IQ-TREE Module",
"description": "Phylogeny from a multiple sequence alignment using the maxium likelihood algorithm.",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/ismapper/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/ismapper/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/ismapper/params.json",
"title": "ISMapper Module",
"description": "A module for identifying insertion sites",
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/kleborate/params.json
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/modules/kleborate/params.json",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/kleborate/params.json",
"title": "Kleborate Module",
"description": "A module to screen for MLST, sub-species, and other Klebsiella related genes of interest",
"type": "object",
Expand Down
11 changes: 10 additions & 1 deletion modules/nf-core/kraken2/main.nf
Expand Up @@ -36,9 +36,18 @@ process KRAKEN2 {
def paired = meta.single_end ? "" : "--paired"
def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq"
def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
def is_tarball = db.getName().endsWith(".tar.gz") ? true : false
"""
if [ "$is_tarball" == "true" ]; then
mkdir database
tar -xzf $db -C database
KRAKEN_DB=\$(find database/ -name "hash.k2d" | sed 's=hash.k2d==')
else
KRAKEN_DB=\$(find $db/ -name "hash.k2d" | sed 's=hash.k2d==')
fi
kraken2 \\
--db $db \\
--db \$KRAKEN_DB \\
--threads $task.cpus \\
--unclassified-out $unclassified \\
--classified-out $classified \\
Expand Down

0 comments on commit ebfd2e3

Please sign in to comment.