Skip to content

Commit

Permalink
fix: Make CheckM auto-detect file extensions (#76)
Browse files Browse the repository at this point in the history
* test: Compress test data FASTA files

* test: Change tests to use Channel.of

* Revert "test: Compress test data FASTA files"

This reverts commit 673fc7e.

* test: Remove duplicated assignment

* refactor: Move assemblyqc subwf into separate wf file

* fix: Change imports

* feat: Auto-detect fasta file extension

* test: Add assemblyqc test

* test: Change paths to file values

* test: Remove vibrant from testing

- Because its db fills up the runner storage

* refactor: Remove meta.id from checkm outdir

* Revert "refactor: Remove meta.id from checkm outdir"

This reverts commit ec98f36.

* test: Skip assemblyqc test on the CI

* tests: Fix trace size

* chore: Bump version and changelog
  • Loading branch information
jvfe committed Apr 10, 2023
1 parent aef2f6c commit 4f20672
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 43 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## v2.0.1 dev - April 10, 2023

### `Fixed`

- Makes CheckM auto-detect FASTA file extensions
- So it doesn't fail with files without `.fna`

### `Added`

- Moves Assembly QC subworkflow into its own file

## v2.0 dev - April 5, 2023

### `Added`
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ manifest {
description = 'AMR/VF LGT-focused bacterial genomics workflow'
mainScript = 'main.nf'
nextflowVersion = '!>=21.04.0'
version = '2.0dev'
version = '2.0.1dev'
}

// Function to ensure that resource requirements don't go beyond
Expand Down
60 changes: 60 additions & 0 deletions subworkflows/local/assemblyqc.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
include { GET_DB_CACHE } from '../../modules/local/get_db_cache'
include { QUAST } from '../../modules/nf-core/quast/main'
include { KRAKEN2_KRAKEN2 as KRAKEN2_RUN } from '../../modules/nf-core/kraken2/kraken2/main'
include { CHECKM_LINEAGEWF } from '../../modules/nf-core/checkm/lineagewf/main'

workflow CHECK_ASSEMBLIES {
take:
assemblies
krakendb_cache
reference_genome
use_reference_genome

main:

ch_multiqc_files = Channel.empty()
ch_software_versions = Channel.empty()

///*
// * MODULE: Run Kraken2
// */
if (!params.skip_kraken) {
if(krakendb_cache) {
GET_DB_CACHE(krakendb_cache)
KRAKEN2_RUN(assemblies, GET_DB_CACHE.out.minikraken, false, true)
} else {
KRAKEN2_DB()
KRAKEN2_RUN(assemblies, KRAKEN2_DB.out.minikraken, false, true)
}

ch_software_versions = ch_software_versions.mix(KRAKEN2_RUN.out.versions.first().ifEmpty(null))
ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_RUN.out.report.collect{it[1]}.ifEmpty([]))
}

fasta_extension = assemblies.map{ id, path -> path.getExtension() }.first()

/*
* Module: CheckM Quality Check
*/
CHECKM_LINEAGEWF(assemblies, fasta_extension, [])
ch_software_versions = ch_software_versions.mix(CHECKM_LINEAGEWF.out.versions.first().ifEmpty(null))
/*
* Module: QUAST quality check
*/
// Need to reformat assembly channel for QUAST
// pattern adapted from nf-core/bacass
ch_assembly = Channel.empty()
ch_assembly = ch_assembly.mix(assemblies.dump(tag: 'assembly'))
ch_assembly
.map { meta, fasta -> fasta } //QUAST doesn't take the meta tag
.collect()
.set { ch_to_quast }
QUAST(ch_to_quast, reference_genome, [], use_reference_genome, false)
ch_software_versions = ch_software_versions.mix(QUAST.out.versions.ifEmpty(null))

ch_multiqc_files = ch_multiqc_files.mix(QUAST.out.tsv.collect())

emit:
assemblyqc_software = ch_software_versions
multiqc = ch_multiqc_files
}
38 changes: 38 additions & 0 deletions tests/subworkflows/local/assemblyqc.nf.test.skip
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
nextflow_workflow {

name "Test Workflow CHECK_ASSEMBLIES"
script "subworkflows/local/assemblyqc.nf"
workflow "CHECK_ASSEMBLIES"

test("Assembly QC runs without failures") {

when {
params {
skip_kraken = true
outdir = "$outputDir"
}
workflow {
"""
// define inputs of the workflow here. Example:
input[0] = Channel.of(
[[id:'SRR14022735'], file("$baseDir/test/SRR14022735_T1.scaffolds.fa")],
[[id:'SRR14022737'], file("$baseDir/test/SRR14022737_T1.scaffolds.fa")],
[[id:'SRR14022754'], file("$baseDir/test/SRR14022754_T1.scaffolds.fa")],
[[id:'SRR14022764'], file("$baseDir/test/SRR14022764_T1.scaffolds.fa")],
)
input[1] = []
// No ref genome
input[2] = []
input[3] = false
"""
}
}

then {
assert workflow.success
assert workflow.trace.tasks().size() >= 2
}

}

}
4 changes: 2 additions & 2 deletions tests/subworkflows/local/phylo.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ nextflow_workflow {
workflow {
"""
// define inputs of the workflow here. Example:
input[0] = ch = Channel.from([
input[0] = Channel.of(
[[id:'SRR14022735'], '$baseDir/test/SRR14022735_T1.gff'],
[[id:'SRR14022764'], '$baseDir/test/SRR14022764_T1.gff'],
[[id:'SRR14022754'], '$baseDir/test/SRR14022754_T1.gff']
])
)
// Don't use full alignment
input[1] = false
// Use fasttree
Expand Down
4 changes: 2 additions & 2 deletions tests/subworkflows/local/poppunk.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ nextflow_workflow {
workflow {
"""
// define inputs of the workflow here. Example:
input[0] = Channel.from([
input[0] = Channel.of(
[[id:'SRR14022735'], "$baseDir/test/SRR14022735_T1.scaffolds.fa"],
[[id:'SRR14022737'], "$baseDir/test/SRR14022737_T1.scaffolds.fa"],
[[id:'SRR14022754'], "$baseDir/test/SRR14022754_T1.scaffolds.fa"],
[[id:'SRR14022764'], "$baseDir/test/SRR14022764_T1.scaffolds.fa"],
])
)
"""
}
}
Expand Down
47 changes: 9 additions & 38 deletions workflows/arete.nf
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ include { INPUT_CHECK;

include { ASSEMBLE_SHORTREADS } from '../subworkflows/local/assembly'
include { ANNOTATE_ASSEMBLIES } from '../subworkflows/local/annotation'
include { CHECK_ASSEMBLIES } from '../subworkflows/local/assemblyqc'
include { PHYLOGENOMICS } from '../subworkflows/local/phylo'
include { RUN_POPPUNK } from '../subworkflows/local/poppunk'
include { SUBSET_GENOMES } from '../subworkflows/local/subsample'
Expand All @@ -59,8 +60,6 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { FASTQC as TRIM_FASTQC } from '../modules/nf-core/fastqc/main'
include { FASTP } from '../modules/nf-core/fastp/main'
include { UNICYCLER } from '../modules/nf-core/unicycler/main'
include { QUAST } from '../modules/nf-core/quast/main'
include { KRAKEN2_KRAKEN2 as KRAKEN2_RUN } from '../modules/nf-core/kraken2/kraken2/main'
include { PROKKA } from '../modules/nf-core/prokka/main'
include { GET_CAZYDB;
GET_VFDB;
Expand All @@ -73,7 +72,6 @@ include { DIAMOND_BLASTX as DIAMOND_BLAST_CAZY;
DIAMOND_BLASTX as DIAMOND_BLAST_BACMET } from '../modules/nf-core/diamond/blastx/main'
include { IQTREE } from '../modules/nf-core/iqtree/main'
include { SNPSITES } from '../modules/nf-core/snpsites/main'
include { CHECKM_LINEAGEWF } from '../modules/nf-core/checkm/lineagewf/main'
//
// MODULE: Local to the pipeline
//
Expand Down Expand Up @@ -446,41 +444,14 @@ workflow QUALITYCHECK{
*/
ANNOTATION_INPUT_CHECK(ch_input)

///*
// * MODULE: Run Kraken2
// */
if (!params.skip_kraken) {
if(db_cache) {
GET_DB_CACHE(db_cache)
KRAKEN2_RUN(ANNOTATION_INPUT_CHECK.out.genomes, GET_DB_CACHE.out.minikraken, false, true)
} else {
KRAKEN2_DB()
KRAKEN2_RUN(ANNOTATION_INPUT_CHECK.out.genomes, KRAKEN2_DB.out.minikraken, false, true)
}

ch_software_versions = ch_software_versions.mix(KRAKEN2_RUN.out.versions.first().ifEmpty(null))
ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_RUN.out.report.collect{it[1]}.ifEmpty([]))
}
/*
* Module: CheckM Quality Check
*/
CHECKM_LINEAGEWF(ANNOTATION_INPUT_CHECK.out.genomes, "fna", []) //todo figure out a way to infer the file extension during input check
ch_software_versions = ch_software_versions.mix(CHECKM_LINEAGEWF.out.versions.first().ifEmpty(null))
/*
* Module: QUAST quality check
*/
// Need to reformat assembly channel for QUAST
// pattern adapted from nf-core/bacass
ch_assembly = Channel.empty()
ch_assembly = ch_assembly.mix(ANNOTATION_INPUT_CHECK.out.genomes.dump(tag: 'assembly'))
ch_assembly
.map { meta, fasta -> fasta } //QUAST doesn't take the meta tag
.collect()
.set { ch_to_quast }
QUAST(ch_to_quast, ch_reference_genome, [], use_reference_genome, false)
ch_software_versions = ch_software_versions.mix(QUAST.out.versions.ifEmpty(null))

ch_multiqc_files = ch_multiqc_files.mix(QUAST.out.tsv.collect())
CHECK_ASSEMBLIES(
ANNOTATION_INPUT_CHECK.out.genomes,
db_cache,
ch_reference_genome,
use_reference_genome
)
ch_software_versions = ch_software_versions.mix(CHECK_ASSEMBLIES.out.assemblyqc_software)
ch_multiqc_files = ch_multiqc_files.mix(CHECK_ASSEMBLIES.out.multiqc)

MULTIQC(
ch_multiqc_files.collect(),
Expand Down

0 comments on commit 4f20672

Please sign in to comment.