Skip to content

Commit

Permalink
add bactopia tool for defense-finder
Browse files Browse the repository at this point in the history
  • Loading branch information
rpetit3 committed Mar 27, 2024
1 parent 3055c95 commit 018f189
Show file tree
Hide file tree
Showing 18 changed files with 735 additions and 6 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ description: A full list of Bactopia releases and a description of the changes.

## v3.0.2 bactopia/bactopia "" 2024/??/??

### `Added`
- Bactopia Tools (`bactopia --wf <NAME>`)
- `defensefinder` - Systematic search of all known anti-phage systems

### `Fixed`

- missing schema for clean-yer-reads

### `Enhancements to OSS`

- pinn macsyfinder version in defense-finder [bioconda/bioconda-recipes#46824](https://github.com/bioconda/bioconda-recipes/pull/46824)

## v3.0.1 bactopia/bactopia "That's My Girl" - 2024/03/25

### `Added`
Expand Down
1 change: 1 addition & 0 deletions conf/tests.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ params {
proteins = "${params.test_data_dir}/datasets/blast/proteins"
primers = "${params.test_data_dir}/datasets/blast/primers"
}
defensefinder = "${params.test_data_dir}/datasets/defensefinder/defensefinder.tar"
eggnog = "${params.test_data_dir}/datasets/eggnog"
eggnog_tarball = "${params.test_data_dir}/datasets/eggnog/eggnog.tar.gz"
gtdb = "${params.test_data_dir}/datasets/gtdb/gtdbtk"
Expand Down
17 changes: 11 additions & 6 deletions conf/workflows.config
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ params {
]
modules = [
'abricate', 'abritamr', 'agrvate', 'amrfinderplus', 'blastn', 'blastp', 'blastx',
'btyper3', 'busco', 'bracken', 'checkm', 'ectyper', 'emmtyper', 'fastani', 'gamma',
'genotyphi', 'hicap', 'hpsuissero', 'ismapper', 'kleborate', 'kraken2', 'legsta',
'lissero', 'mashdist', 'mcroni', 'meningotype', 'midas', 'mlst', 'mobsuite',
'mykrobe', 'ngmaster', 'pasty', 'pbptyper', 'phispy', 'plasmidfinder',
'pneumocat', 'quast', 'rgi', 'seqsero2', 'seroba', 'shigatyper',
'btyper3', 'busco', 'bracken', 'checkm', 'defensefinder', 'ectyper', 'emmtyper',
'fastani', 'gamma', 'genotyphi', 'hicap', 'hpsuissero', 'ismapper', 'kleborate',
'kraken2', 'legsta', 'lissero', 'mashdist', 'mcroni', 'meningotype', 'midas',
'mlst', 'mobsuite', 'mykrobe', 'ngmaster', 'pasty', 'pbptyper', 'phispy',
'plasmidfinder', 'pneumocat', 'quast', 'rgi', 'seqsero2', 'seroba', 'shigatyper',
'shigeifinder', 'sistr', 'spatyper', 'ssuissero', 'staphopiasccmec',
'stecfinder', 'tblastn', 'tblastx', 'updater'
]
Expand Down Expand Up @@ -57,7 +57,7 @@ params {
description = "Use Bactopia's read QC steps to Clean-Yer-Reads"
includes = ["gather", "qc", "scrubber"]
is_workflow = true
modules = ["gather", "qc", "srahumanscrubber_initdb", "srahumanscrubber_scrub"]
modules = ["gather", "qc", "kraken2"]
}

'teton' {
Expand Down Expand Up @@ -245,6 +245,11 @@ params {
'custom_wget' {
path = "modules/nf-core/custom/wget"
}
'defensefinder' {
description = "Systematic search of all known anti-phage systems"
ext = "fna"
path = "modules/nf-core/defensefinder/run"
}
'ectyper' {
description = "In-silico prediction of Escherichia coli serotype"
ext = "fna"
Expand Down
74 changes: 74 additions & 0 deletions modules/nf-core/defensefinder/run/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Import generic module functions
include { get_resources; initOptions; saveFiles } from '../../../../lib/nf/functions'
RESOURCES = get_resources(workflow.profile, params.max_memory, params.max_cpus)
options = initOptions(params.containsKey("options") ? params.options : [:], 'defensefinder')
options.btype = options.btype ?: "tools"
conda_tools = "bioconda::defense-finder=1.2.2"
conda_name = conda_tools.replace("=", "-").replace(":", "-").replace(" ", "-")
conda_env = file("${params.condadir}/${conda_name}").exists() ? "${params.condadir}/${conda_name}" : conda_tools
DF_VERSION = "1.2.2"
DF_MODELS_VERSION = "1.2.4"
CASFINDER_VERSION = "3.1.0"

process DEFENSEFINDER_RUN {
tag "$meta.id"
label 'process_low'

conda (params.enable_conda ? conda_env : null)
container "${ workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/defense-finder:1.2.2--pyhdfd78af_1' :
'quay.io/biocontainers/defense-finder:1.2.2--pyhdfd78af_1' }"

input:
tuple val(meta), path(fasta)
each path(db)

output:
tuple val(meta), path("*_defense_finder_genes.tsv") , emit: genes_tsv
tuple val(meta), path("*_defense_finder_hmmer.tsv") , emit: hmmer_tsv
tuple val(meta), path("*_defense_finder_systems.tsv"), emit: systems_tsv
tuple val(meta), path("*.prt") , emit: proteins
tuple val(meta), path("*.prt.idx") , emit: proteins_index
tuple val(meta), path("${prefix}.macsydata.tar.gz") , emit: macsydata_raw, optional: true
path "*.{log,err}", emit: logs, optional: true
path ".command.*", emit: nf_logs
path "versions.yml", emit: versions

script:
prefix = options.suffix ? "${options.suffix}" : "${meta.id}"
def is_compressed = fasta.getName().endsWith(".gz") ? true : false
def fasta_name = fasta.getName().replace(".gz", "")
"""
set -x
# Extract database
tar -xf $db
macsydata \\
install \\
--target defense-finder/ \\
models/defense-finder-models-v${DF_MODELS_VERSION}.tar.gz
macsydata \\
install \\
--target defense-finder/ \\
models/CasFinder-${CASFINDER_VERSION}.tar.gz
defense-finder \\
run \\
$options.args \\
--workers $task.cpus \\
--models-dir defense-finder/ \\
$fasta
if [ "${params.df_preserveraw}" == "true" ]; then
tar -czf ${prefix}.macsydata.tar.gz defense-finder-tmp/
rm -rf defense-finder-tmp/
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
defense-finder: ${DF_VERSION}
defense-finder-models: ${DF_MODELS_VERSION}
casfinder-models: ${CASFINDER_VERSION}
END_VERSIONS
"""
}
58 changes: 58 additions & 0 deletions modules/nf-core/defensefinder/run/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: "plasmidfinder"
description: Identify plasmids in bacterial sequences and assemblies
keywords:
- fasta
- fastq
- plasmid
tools:
- "plasmidfinder":
description: "PlasmidFinder allows identification of plasmids in total or partial sequenced isolates of bacteria."
homepage: "https://cge.cbs.dtu.dk/services/PlasmidFinder/"
documentation: "https://bitbucket.org/genomicepidemiology/plasmidfinder"
tool_dev_url: "https://bitbucket.org/genomicepidemiology/plasmidfinder"
doi: "10.1128/AAC.02412-14"
licence: "['Apache-2.0']"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- seqs:
type: file
description: Input FASTA or FASTQ formatted genome sequences
pattern: "*.{fastq.gz,fq.gz,fastq.gz,fna.gz,fa.gz}"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- json:
type: file
description: The results from analysis in JSON format
pattern: "*.json"
- txt:
type: file
description: The summary of results from analysis
pattern: "*.txt"
- tsv:
type: file
description: The results from analysis in TSV format
pattern: "*.tsv"
- genome_seq:
type: file
description: FASTA of sequences in the input with a hit
pattern: "*-hit_in_genome_seq.fsa"
- plasmid_seq:
type: file
description: FASTA of plasmid sequences with a hit against the input
pattern: "*-plasmid_seqs.fsa"
authors:
- "@rpetit3"
11 changes: 11 additions & 0 deletions modules/nf-core/defensefinder/run/params.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*
This file includes default parameter values.
*/

params {
// defense-finder
df_preserveraw = false
df_nocutga = false
df_coverage = 0.4
df_dbtype = "ordered_replicon"
}
52 changes: 52 additions & 0 deletions modules/nf-core/defensefinder/run/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/defensefinder/run/params.json",
"title": "defense-finder Module",
"description": "A module to identify anti-phage systems in assemblies",
"type": "object",
"definitions": {
"defensefinder_parameters": {
"title": "defense-finder Parameters",
"type": "object",
"description": "",
"default": "",
"fa_icon": "fas fa-exclamation-circle",
"properties": {
"df_coverage": {
"type": "number",
"description": "Minimal percentage of coverage for each profiles",
"default": 0.4,
"fa_icon": "fas fa-file-alt"
},
"df_dbtype": {
"type": "string",
"description": "The macsyfinder --db-type option",
"default": "ordered_replicon",
"fa_icon": "fas fa-italic",
"enum": [
"ordered_replicon",
"gembase",
"unordered"
]
},
"df_preserveraw": {
"type": "boolean",
"description": "Preserve raw MacsyFinder outputs alongside Defense Finder results inside the output directory",
"fa_icon": "fas fa-fast-forward",
"hidden": true
},
"df_nocutga": {
"type": "boolean",
"description": "Advanced! Run macsyfinder in no-cut-ga mode. The validity of the genes and systems found is not guaranteed!",
"fa_icon": "fas fa-fast-forward",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/defensefinder_parameters"
}
]
}
47 changes: 47 additions & 0 deletions modules/nf-core/defensefinder/update/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Import generic module functions
include { get_resources; initOptions; saveFiles } from '../../../../lib/nf/functions'
RESOURCES = get_resources(workflow.profile, params.max_memory, params.max_cpus)
options = initOptions(params.containsKey("options") ? params.options : [:], 'defensefinder_update')
conda_tools = "bioconda::defense-finder=1.2.2"
conda_name = conda_tools.replace("=", "-").replace(":", "-").replace(" ", "-")
conda_env = file("${params.condadir}/${conda_name}").exists() ? "${params.condadir}/${conda_name}" : conda_tools
DF_VERSION = "1.2.2"
DF_MODELS_VERSION = "1.2.4"
CASFINDER_VERSION = "3.1.0"

process DEFENSEFINDER_UPDATE {
tag "update"
label 'process_low'
storeDir params.datasets_cache
publishDir params.datasets_cache

conda (params.enable_conda ? conda_env : null)
container "${ workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/defense-finder:1.2.2--pyhdfd78af_0' :
'quay.io/biocontainers/defense-finder:1.2.2--pyhdfd78af_0' }"

output:
path "defense-finder-models-${DF_MODELS_VERSION}.tar", emit: db

script:
prefix = "defense-finder"
"""
mkdir models
wget \\
-O models/defense-finder-models-v${DF_MODELS_VERSION}.tar.gz \\
https://github.com/mdmparis/defense-finder-models/archive/refs/tags/${DF_MODELS_VERSION}.tar.gz
wget \\
-O models/CasFinder-${CASFINDER_VERSION}.tar.gz \\
https://github.com/macsy-models/CasFinder/archive/refs/tags/${CASFINDER_VERSION}.tar.gz
tar -cvf defense-finder-models-${DF_MODELS_VERSION}.tar models/
cat <<-END_VERSIONS > versions.yml
"${task.process}":
defense-finder: ${DF_VERSION}
defense-finder-models: ${DF_MODELS_VERSION}
casfinder-models: ${CASFINDER_VERSION}
END_VERSIONS
"""
}
58 changes: 58 additions & 0 deletions modules/nf-core/defensefinder/update/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: "plasmidfinder"
description: Identify plasmids in bacterial sequences and assemblies
keywords:
- fasta
- fastq
- plasmid
tools:
- "plasmidfinder":
description: "PlasmidFinder allows identification of plasmids in total or partial sequenced isolates of bacteria."
homepage: "https://cge.cbs.dtu.dk/services/PlasmidFinder/"
documentation: "https://bitbucket.org/genomicepidemiology/plasmidfinder"
tool_dev_url: "https://bitbucket.org/genomicepidemiology/plasmidfinder"
doi: "10.1128/AAC.02412-14"
licence: "['Apache-2.0']"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- seqs:
type: file
description: Input FASTA or FASTQ formatted genome sequences
pattern: "*.{fastq.gz,fq.gz,fastq.gz,fna.gz,fa.gz}"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- json:
type: file
description: The results from analysis in JSON format
pattern: "*.json"
- txt:
type: file
description: The summary of results from analysis
pattern: "*.txt"
- tsv:
type: file
description: The results from analysis in TSV format
pattern: "*.tsv"
- genome_seq:
type: file
description: FASTA of sequences in the input with a hit
pattern: "*-hit_in_genome_seq.fsa"
- plasmid_seq:
type: file
description: FASTA of plasmid sequences with a hit against the input
pattern: "*-plasmid_seqs.fsa"
authors:
- "@rpetit3"
7 changes: 7 additions & 0 deletions modules/nf-core/defensefinder/update/params.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
This file includes default parameter values.
*/

params {
// defense-finder update has no parameters
}

0 comments on commit 018f189

Please sign in to comment.