Skip to content

Commit

Permalink
feat: Add gene order entry (#176)
Browse files Browse the repository at this point in the history
* feat: Add gene order entry

* docs: Add gene order entry

* docs: Update diagram
  • Loading branch information
jvfe committed Nov 18, 2023
1 parent 6b7e268 commit b22988b
Show file tree
Hide file tree
Showing 10 changed files with 260 additions and 122 deletions.
Binary file modified assets/arete.diagram.light.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified assets/arete.diagram.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/assets/arete.diagram.light.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/assets/arete.diagram.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
276 changes: 154 additions & 122 deletions docs/assets/arete.drawio.xml

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,26 @@ nextflow run beiko-lab/ARETE \
-profile docker
```

### Gene Order Entry

To execute the Gene Order analysis on pre-existing assemblies and RGI annotations:

```bash
nextflow run beiko-lab/ARETE \
-entry gene_order \
--input_sample_table gene_order_samplesheet.csv \
-profile docker
```

- `--input_sample_table` - A samplesheet containing a fasta file, a genbank file and an RGI output file for each assembly:

```
sample,fna_file_path,gbk,rgi
SAMD00052607,SAMD00052607.faa,SAMD00052607.gbk,SAMD00052607_rgi.txt
SAMEA1466699,SAMEA1466699.faa,SAMEA1466699.gbk,SAMEA1466699_rgi.txt
SAMEA1486355,SAMEA1486355.faa,SAMEA1486355.gbk,SAMEA1486355_rgi.txt
```

## Updating the pipeline

When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
Expand Down
5 changes: 5 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ include { POPPUNK } from './workflows/arete'
include { RUN_RSPR } from './workflows/arete'
include { RUN_EVOLCCM } from './workflows/arete'
include { RUN_RECOMBINATION } from './workflows/arete'
include { RUN_GENE_ORDER } from './workflows/arete'

//
// WORKFLOW: Run main nf-core/arete analysis pipeline
Expand Down Expand Up @@ -81,6 +82,10 @@ workflow evolccm {
workflow recombination {
RUN_RECOMBINATION()
}

workflow gene_order {
RUN_GENE_ORDER()
}
/*
========================================================================================
RUN ALL WORKFLOWS
Expand Down
47 changes: 47 additions & 0 deletions subworkflows/local/geneorder_input_check.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
workflow GENEORDER_INPUT_CHECK {
take:
samplesheet

main:
samplesheet
.splitCsv(header: true)
.map { it -> get_sample_info_geneorder(it) }
.set { geneorder_input }

geneorder_input
.map { meta, assemblies, gbks, rgis -> meta.id }
.subscribe { if ( "$it".contains(".") ) exit 1, "Please review data input, sampleIDs may not contain dots, but \"$it\" does." }


emit:
geneorder_input
}

def get_sample_info_geneorder(row) {
def meta = [:]
meta.id = row.sample
meta.single_end = true //Bit of a hack; call assemblies "single end" to allow passing to kraken

def array = []
if (!file(row.fna_file_path).exists()) {
print("***")
print(row.fna_file_path)
print("***")
exit 1, "ERROR: Please check input samplesheet -> Assembly file does not exist!\n${row.fna_file_path}"
}
if (!file(row.gbk).exists()) {
print("***")
print(row.gbk)
print("***")
exit 1, "ERROR: Please check input samplesheet -> GenBank file does not exist!\n${row.gbk}"
}
if (!file(row.rgi).exists()) {
print("***")
print(row.rgi)
print("***")
exit 1, "ERROR: Please check input samplesheet -> RGI file does not exist!\n${row.rgi}"
}
array = [ meta, file(row.fna_file_path), file(row.gbk), file(row.rgi) ]

return array
}
4 changes: 4 additions & 0 deletions test/gene-order/gene_order_samplesheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
sample,fna_file_path,gbk,rgi
SAMD00052607,test/gene-order/FAA/SAMD00052607.faa,test/gene-order/GBK/SAMD00052607.gbk,test/gene-order/RGI/SAMD00052607_rgi.txt
SAMEA1466699,test/gene-order/FAA/SAMEA1466699.faa,test/gene-order/GBK/SAMEA1466699.gbk,test/gene-order/RGI/SAMEA1466699_rgi.txt
SAMEA1486355,test/gene-order/FAA/SAMEA1486355.faa,test/gene-order/GBK/SAMEA1486355.gbk,test/gene-order/RGI/SAMEA1486355_rgi.txt
30 changes: 30 additions & 0 deletions workflows/arete.nf
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { PHYLO_INPUT_CHECK } from '../subworkflows/local/phylo_input_check'
include { ANNOTATION_INPUT_CHECK } from '../subworkflows/local/annotation_input_check'
include { RSPR_INPUT_CHECK } from '../subworkflows/local/rspr_input_check'
include { GENEORDER_INPUT_CHECK } from '../subworkflows/local/geneorder_input_check'
include { ASSEMBLE_SHORTREADS } from '../subworkflows/local/assembly'
include { ANNOTATE_ASSEMBLIES } from '../subworkflows/local/annotation'
include { CHECK_ASSEMBLIES } from '../subworkflows/local/assemblyqc'
Expand Down Expand Up @@ -683,6 +684,35 @@ workflow RUN_RECOMBINATION {
ch_software_versions = ch_software_versions.mix(MULTIQC.out.versions.ifEmpty(null))

}

workflow RUN_GENE_ORDER {
if (params.input_sample_table){ ch_input = Channel.of(file(params.input_sample_table)) } else { exit 1, 'Input samplesheet not specified!' }

GENEORDER_INPUT_CHECK (
ch_input
)

GENEORDER_INPUT_CHECK.out.geneorder_input
.set { all_inputs }

all_inputs
.map { it -> [it[0], it[1]] }
.set { assemblies }

all_inputs
.map { it -> [it[0], it[2]] }
.set { gbks }

all_inputs
.map { it -> [it[0], it[3]] }
.set { rgis }

GENE_ORDER (
assemblies,
gbks,
rgis
)
}
/*
========================================================================================
COMPLETION EMAIL AND SUMMARY
Expand Down

0 comments on commit b22988b

Please sign in to comment.