-
Notifications
You must be signed in to change notification settings - Fork 62
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
399 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
// Import generic module functions | ||
include { get_resources; initOptions; saveFiles } from '../../../lib/nf/functions' | ||
RESOURCES = get_resources(workflow.profile, params.max_memory, params.max_cpus) | ||
options = initOptions(params.containsKey("options") ? params.options : [:], 'bracken') | ||
options.btype = options.btype ?: "tools" | ||
conda_tools = "bioconda::bactopia-teton=1.0.0" | ||
conda_name = conda_tools.replace("=", "-").replace(":", "-").replace(" ", "-") | ||
conda_env = file("${params.condadir}/${conda_name}").exists() ? "${params.condadir}/${conda_name}" : conda_tools | ||
|
||
process BRACKEN { | ||
tag "$meta.id" | ||
label 'process_high' | ||
|
||
conda (params.enable_conda ? conda_env : null) | ||
container "${ workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/bactopia-teton:1.0.0--hdfd78af_0' : | ||
'quay.io/biocontainers/bactopia-teton:1.0.0--hdfd78af_0' }" | ||
|
||
input: | ||
tuple val(meta), path(reads) | ||
path db | ||
|
||
output: | ||
tuple val(meta), path("${prefix}.bracken.tsv") , emit: tsv | ||
tuple val(meta), path('*classified*') , emit: classified | ||
tuple val(meta), path('*unclassified*') , emit: unclassified | ||
tuple val(meta), path("${prefix}.kraken2.report.txt"), emit: kraken2_report | ||
tuple val(meta), path("${prefix}.bracken.report.txt"), emit: bracken_report | ||
tuple val(meta), path("*.abundances.txt") , emit: abundances | ||
tuple val(meta), path("*.krona.html") , emit: krona | ||
path "*.{log,err}" , emit: logs, optional: true | ||
path ".command.*" , emit: nf_logs | ||
path "versions.yml", emit: versions | ||
|
||
script: | ||
prefix = options.suffix ? "${options.suffix}" : "${meta.id}" | ||
def paired = meta.single_end ? "" : "--paired" | ||
classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" | ||
unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" | ||
def is_tarball = db.getName().endsWith(".tar.gz") ? true : false | ||
def BRACKEN_VERSION = "2.7" | ||
def KRAKENTOOLS_VERSION = "1.2" | ||
""" | ||
if [ "$is_tarball" == "true" ]; then | ||
mkdir database | ||
tar -xzf $db -C database | ||
KRAKEN_DB=\$(find database/ -name "hash.k2d" | sed 's=hash.k2d==') | ||
else | ||
KRAKEN_DB=\$(find $db/ -name "hash.k2d" | sed 's=hash.k2d==') | ||
fi | ||
kraken2 \\ | ||
--db \$KRAKEN_DB \\ | ||
--threads $task.cpus \\ | ||
--unclassified-out $unclassified \\ | ||
--classified-out $classified \\ | ||
--report ${prefix}.kraken2.report.txt \\ | ||
--gzip-compressed \\ | ||
$paired \\ | ||
$options.args \\ | ||
$reads > kracken.out | ||
# Get read length | ||
if [ "${params.bracken_read_length}" == "0" ]; then | ||
OBS_READ_LENGTH=\$(zcat ${reads[0]} | fastq-scan -q | jq -r '.qc_stats.read_median') | ||
echo \$OBS_READ_LENGTH | ||
# Pre-built Bracken databases come with 50,75,100,150,200,250,300, split the difference | ||
if [ "\$OBS_READ_LENGTH" -gt 275 ]; then | ||
READ_LENGTH="300" | ||
elif [ "\$OBS_READ_LENGTH" -gt 225 ]; then | ||
READ_LENGTH="250" | ||
elif [ "\$OBS_READ_LENGTH" -gt 175 ]; then | ||
READ_LENGTH="200" | ||
elif [ "\$OBS_READ_LENGTH" -gt 125 ]; then | ||
READ_LENGTH="150" | ||
elif [ "\$OBS_READ_LENGTH" -gt 85 ]; then | ||
READ_LENGTH="100" | ||
elif [ "\$OBS_READ_LENGTH" -gt 65 ]; then | ||
READ_LENGTH="75" | ||
else | ||
READ_LENGTH="50" | ||
fi | ||
else | ||
# use user defined read length | ||
READ_LENGTH="${params.bracken_read_length}" | ||
fi | ||
bracken \\ | ||
$options.args2 \\ | ||
-d \$KRAKEN_DB \\ | ||
-r \$READ_LENGTH \\ | ||
-i ${prefix}.kraken2.report.txt \\ | ||
-w ${prefix}.bracken.report.txt \\ | ||
-o bracken.temp | ||
# Sort bracken report by 'fraction_total_reads' (column 7) | ||
head -n 1 bracken.temp > ${prefix}.bracken.abundances.txt | ||
grep -v "fraction_total_reads\$" bracken.temp | sort -k 7 -rn >> ${prefix}.bracken.abundances.txt | ||
# Compress Kraken FASTQs | ||
pigz -p $task.cpus *.fastq | ||
# Adjust bracken to include unclassified and produce summary | ||
kraken-bracken-summary.py \\ | ||
${prefix} \\ | ||
${prefix}.kraken2.report.txt \\ | ||
${prefix}.bracken.report.txt \\ | ||
${prefix}.bracken.abundances.txt | ||
# Create a Krona report from reports | ||
if [ "${params.skip_krona}" == "false" ]; then | ||
# Kraken2 | ||
kreport2krona.py \\ | ||
--report ${prefix}.kraken2.report.txt \\ | ||
--output kraken2-krona.temp | ||
ktImportText -o ${prefix}.kraken2.krona.html kraken2-krona.temp | ||
# Bracken | ||
kreport2krona.py \\ | ||
--report ${prefix}.bracken.report.txt \\ | ||
--output bracken-krona.temp | ||
ktImportText -o ${prefix}.bracken.krona.html bracken-krona.temp | ||
rm *-krona.temp | ||
fi | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
bracken: ${BRACKEN_VERSION} | ||
fastq-scan: \$(echo \$(fastq-scan -v 2>&1) | sed 's/fastq-scan //') | ||
jq: \$(echo \$(jq --version 2>&1) | sed 's/jq-//') | ||
kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') | ||
krakentools: ${KRAKENTOOLS_VERSION} | ||
krona: \$( echo \$(ktImportText 2>&1) | sed 's/^.*KronaTools //g; s/- ktImportText.*\$//g') | ||
pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) | ||
python: \$(echo \$(python --version 2>&1) | sed 's/Python //') | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
name: kraken2 | ||
description: Classifies metagenomic sequence data | ||
keywords: | ||
- classify | ||
- metagenomics | ||
- fastq | ||
- db | ||
tools: | ||
- kraken2: | ||
description: | | ||
Kraken2 is a taxonomic sequence classifier that assigns taxonomic labels to sequence reads | ||
homepage: https://ccb.jhu.edu/software/kraken2/ | ||
documentation: https://github.com/DerrickWood/kraken2/wiki/Manual | ||
doi: 10.1186/s13059-019-1891-0 | ||
licence: ['MIT'] | ||
input: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. [ id:'test', single_end:false ] | ||
- reads: | ||
type: file | ||
description: | | ||
List of input FastQ files of size 1 and 2 for single-end and paired-end data, | ||
respectively. | ||
- db: | ||
type: directory | ||
description: Kraken2 database | ||
output: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. [ id:'test', single_end:false ] | ||
- classified: | ||
type: file | ||
description: | | ||
Reads classified to belong to any of the taxa | ||
on the Kraken2 database. | ||
pattern: "*{fastq.gz}" | ||
- unclassified: | ||
type: file | ||
description: | | ||
Reads not classified to belong to any of the taxa | ||
on the Kraken2 database. | ||
pattern: "*{fastq.gz}" | ||
- report: | ||
type: file | ||
description: | | ||
Kraken2 report containing stats about classified | ||
and not classifed reads. | ||
pattern: "*.{report.txt}" | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
authors: | ||
- "@joseespinosa" | ||
- "@drpatelh" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
/* | ||
This file includes default parameter values. | ||
*/ | ||
|
||
params { | ||
// Kraken2 | ||
kraken2_db = null | ||
kraken2_quick_mode = false | ||
kraken2_confidence = 0.0 | ||
kraken2_minimum_base_quality = 0 | ||
kraken2_use_mpa_style = false | ||
kraken2_report_zero_counts = false | ||
kraken2_report_minimizer_data = false | ||
kraken2_use_names = false | ||
kraken2_memory_mapping = false | ||
kraken2_minimum_hit_groups = 2 | ||
bracken_read_length = 0 | ||
bracken_level = "S" | ||
bracken_threshold = 0 | ||
skip_krona = false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
{ | ||
"$schema": "http://json-schema.org/draft-07/schema", | ||
"$id": "https://raw.githubusercontent.com/bactopia/bactopia/master/modules/nf-core/kraken2/params.json", | ||
"title": "Kraken2 Module", | ||
"description": "A module for taxonomic classification of sequence reads", | ||
"type": "object", | ||
"definitions": { | ||
"kraken2_bracken_parameters": { | ||
"title": "Kraken2 and Bracken Parameters", | ||
"type": "object", | ||
"description": "", | ||
"default": "", | ||
"fa_icon": "fas fa-exclamation-circle", | ||
"properties": { | ||
"kraken2_db": { | ||
"type": "string", | ||
"description": "The a single tarball or path to a Kraken2 formatted database", | ||
"fa_icon": "fas fa-expand-arrows-alt", | ||
"is_required": true | ||
}, | ||
"kraken2_quick_mode": { | ||
"type": "boolean", | ||
"default": false, | ||
"description": "Quick operation (use first hit or hits)", | ||
"fa_icon": "fas fa-expand-arrows-alt", | ||
"hidden": true | ||
}, | ||
"kraken2_confidence": { | ||
"type": "number", | ||
"default": 0.0, | ||
"description": "Confidence score threshold between 0 and 1", | ||
"fa_icon": "fas fa-expand-arrows-alt" | ||
}, | ||
"kraken2_minimum_base_quality": { | ||
"type": "integer", | ||
"default": 0, | ||
"description": "Minimum base quality used in classification", | ||
"fa_icon": "fas fa-expand-arrows-alt", | ||
"hidden": true | ||
}, | ||
"kraken2_use_mpa_style": { | ||
"type": "boolean", | ||
"default": false, | ||
"description": "Format report output like Kraken 1's kraken-mpa-report", | ||
"fa_icon": "fas fa-expand-arrows-alt" | ||
}, | ||
"kraken2_report_zero_counts": { | ||
"type": "boolean", | ||
"default": false, | ||
"description": "Report counts for ALL taxa, even if counts are zero", | ||
"fa_icon": "fas fa-expand-arrows-alt" | ||
}, | ||
"kraken2_report_minimizer_data": { | ||
"type": "boolean", | ||
"default": false, | ||
"description": "Include minimizer and distinct minimizer count information in report", | ||
"fa_icon": "fas fa-expand-arrows-alt", | ||
"hidden": true | ||
}, | ||
"kraken2_use_names": { | ||
"type": "boolean", | ||
"default": false, | ||
"description": "Print scientific names instead of just taxids", | ||
"fa_icon": "fas fa-expand-arrows-alt", | ||
"hidden": true | ||
}, | ||
"kraken2_memory_mapping": { | ||
"type": "boolean", | ||
"default": false, | ||
"description": "Avoid loading database into RAM", | ||
"fa_icon": "fas fa-expand-arrows-alt", | ||
"hidden": true | ||
}, | ||
"kraken2_minimum_hit_groups": { | ||
"type": "integer", | ||
"default": 2, | ||
"description": "Minimum number of hit groups needed to make a call", | ||
"fa_icon": "fas fa-expand-arrows-alt", | ||
"hidden": true | ||
}, | ||
"bracken_read_length": { | ||
"type": "integer", | ||
"default": 0, | ||
"description": "Read length to get all classifications for (0 = determine at runtime)", | ||
"fa_icon": "fas fa-expand-arrows-alt", | ||
"hidden": true | ||
}, | ||
"bracken_level": { | ||
"type": "string", | ||
"default": "S", | ||
"description": "Level to estimate abundance at", | ||
"fa_icon": "fas fa-expand-arrows-alt", | ||
"hidden": true | ||
}, | ||
"bracken_threshold": { | ||
"type": "integer", | ||
"default": 0, | ||
"description": "Reads required PRIOR to abundance estimation to perform re-estimation", | ||
"fa_icon": "fas fa-expand-arrows-alt", | ||
"hidden": true | ||
}, | ||
"skip_krona": { | ||
"type": "boolean", | ||
"default": false, | ||
"description": "Skip the creation of a Krona report", | ||
"fa_icon": "fas fa-expand-arrows-alt", | ||
"hidden": true | ||
}, | ||
} | ||
} | ||
}, | ||
"allOf": [ | ||
{ | ||
"$ref": "#/definitions/kraken2_bracken_parameters" | ||
} | ||
] | ||
} |
Oops, something went wrong.