genomic-medicine-sweden · TomasCumlin · Jun 12, 2023 · Jun 13, 2023 · Sep 4, 2023 · Sep 4, 2023
@@ -1,2 +1,3 @@
 id,platform,read1,read2
-p1000,illumina,/path/to/JASEN/assets/test_data/sequencing_data/saureus_100k/saureus_large_R1_001.fastq.gz,/path/to/JASEN/assets/test_data/sequencing_data/saureus_100k/saureus_large_R2_001.fastq.gz
+p1000,illumina,/projects/mikro/nobackup/gms/jasen2/JASEN/assets/test_data/sequencing_data/saureus_100k/saureus_large_R1_001.fastq.gz,/projects/mikro/nobackup/gms/jasen2/JASEN/assets/test_data/sequencing_data/saureus_100k/saureus_large_R2_001.fastq.gz
+p1000,illumina,/projects/mikro/nobackup/gms/jasen2/JASEN/assets/test_data/sequencing_data/saureus_100k/saureus_large_R1_001.fastq.gz,/projects/mikro/nobackup/gms/jasen2/JASEN/assets/test_data/sequencing_data/saureus_100k/saureus_large_R2_001.fastq.gz
@@ -1,22 +1,22 @@
 params {
-	root = "/filepath/to/JASEN" //edit
+	root = "/projects/mikro/nobackup/gms/jasen2/JASEN" //edit
 	amrfinderDb = "${root}/assets/amrfinder_db/latest"
 	resfinderDb = "${root}/assets/resfinder_db"
 	pointfinderDb = "${root}/assets/pointfinder_db"
 	virulencefinderDb = "${root}/assets/virulencefinder_db"
 	mlstBlastDb = "${root}/assets/mlst_db/blast"
-	krakenDb = "/filepath/to/kraken_db" //edit if useKraken = true
-	workDir = "/filepath/to/workdir" //edit
+	krakenDb = "/projects/mikro/nobackup/gms/jasen2/JASEN/assets/krakenstd" //edit if useKraken = true
+	workDir = "/projects/mikro/nobackup/gms/jasen2/work" //edit
 	publishDir = ""
 	publishDirMode = 'copy'
 	publishDirOverwrite = true
-	outdir = "/filepath/to/outdir" //edit
+	outdir = "/projects/mikro/nobackup/gms/jasen2/results" //edit
 	scratch = true
 	containerDir = "${root}/container"
 	args = ""
 	prefix = ""
 	useKraken = true
-	useSkesa = true // To use spades set useSkesa = false
+	useSkesa = false // To use spades set useSkesa = false
 }
 
 profiles {
@@ -215,7 +215,7 @@ process {
 }
 
 singularity {
-	//runOptions = '--bind /local/' // Bind/mount directories to image //Remove "//" and edit directories you wish to bind
+	runOptions = '--bind /projects --bind /beegfs-storage' // Bind/mount directories to image //Remove "//" and edit directories you wish to bind
 	//autoMounts = true
 	enabled = true
 

@@ -1,14 +1,179 @@
 #!/usr/bin/env nextflow
 
 nextflow.enable.dsl=2
+idList = []     // Global variable to save IDs from samplesheet
 
 include { CALL_STAPHYLOCOCCUS_AUREUS        } from './workflows/staphylococcus_aureus.nf'
 include { CALL_MYCOBACTERIUM_TUBERCULOSIS   } from './workflows/mycobacterium_tuberculosis.nf'
 
+<<<<<<< HEAD
+
+
+// Function for platform and paired-end or single-end
+def get_meta(LinkedHashMap row) {
+  platforms = ["illumina", "nanopore", "pacbio", "iontorrent"]
+  idErrorMessage = "ERROR: Please check input samplesheet -> ID '${row.id}' is not a unique name in the samplesheet."
+  platformErrorMessage = "ERROR: Please check input samplesheet -> Platform is not one of the following:!\n-${platforms.join('\n-')}"
+
+  idList.add(row.id)
+  identicalIds=idList.clone().unique().size() != idList.size()
+  correctPlatform = (row.platform in platforms)
+
+  if (correctPlatform && !identicalIds) {
+    if (row.read2) {
+      meta = tuple(row.id, tuple(file(row.read1), file(row.read2)), row.platform)
+    } else {
+      meta = tuple(row.id, tuple(file(row.read1)), row.platform)
+    }
+  } else if (!correctPlatform && identicalIds) {
+    exit 1, platformErrorMessage + "\n\n" + idErrorMessage
+  } else if (!correctPlatform && !identicalIds) {
+    exit 1, platformErrorMessage
+  } else if (correctPlatform && identicalIds) {
+    exit 1, idErrorMessage
+  }
+  return meta
+}
+
+workflow bacterial_default {
+  Channel.fromPath(params.csv).splitCsv(header:true)
+    .map{ row -> get_meta(row) }
+    .branch {
+      iontorrent: it[2] == "iontorrent"
+      illumina: it[2] == "illumina"
+    }
+    .set{ meta }
+
+  // load references 
+  genomeReference = file(params.genomeReference, checkIfExists: true)
+  genomeReferenceDir = file(genomeReference.getParent(), checkIfExists: true)
+  // databases
+  amrfinderDb = file(params.amrfinderDb, checkIfExists: true)
+  mlstDb = file(params.mlstBlastDb, checkIfExists: true)
+  chewbbacaDb = file(params.chewbbacaDb, checkIfExists: true)
+  coreLociBed = file(params.coreLociBed, checkIfExists: true)
+  trainingFile = file(params.trainingFile, checkIfExists: true)
+  resfinderDb = file(params.resfinderDb, checkIfExists: true)
+  pointfinderDb = file(params.pointfinderDb, checkIfExists: true)
+  virulencefinderDb = file(params.virulencefinderDb, checkIfExists: true)
+
+  main:
+    // reads trim and clean
+    assembly_trim_clean(meta.iontorrent).set{ clean_meta }
+    meta.illumina.mix(clean_meta).set{ input_meta }
+    input_meta.map { sampleName, reads, platform -> [ sampleName, reads ] }.set{ reads }
+
+    // analysis metadata
+    save_analysis_metadata(input_meta)
+
+    // assembly and qc processing
+    bwa_mem_ref(reads, genomeReferenceDir)
+    samtools_index_ref(bwa_mem_ref.out.bam)
+
+    bwa_mem_ref.out.bam
+      .join(samtools_index_ref.out.bai)
+      .multiMap { id, bam, bai -> 
+        bam: tuple(id, bam)
+        bai: bai
+      }
+      .set{ post_align_qc_ch }
+    post_align_qc(post_align_qc_ch.bam, post_align_qc_ch.bai, coreLociBed)
+
+    // assembly
+    skesa(input_meta)
+    spades_illumina(input_meta)
+    spades_iontorrent(input_meta)
+
+    Channel.empty().mix(skesa.out.fasta, spades_illumina.out.fasta, spades_iontorrent.out.fasta).set{ assembly }
+
+    // mask polymorph regions
+    bwa_index(assembly)
+    reads
+      .join(bwa_index.out.idx)
+      .multiMap { id, reads, bai -> 
+        reads: tuple(id, reads)
+        bai: bai
+      }
+      .set { bwa_mem_dedup_ch }
+    bwa_mem_dedup(bwa_mem_dedup_ch.reads, bwa_mem_dedup_ch.bai)
+    samtools_index_assembly(bwa_mem_dedup.out.bam)
+
+    // construct freebayes input channels
+    assembly
+      .join(bwa_mem_dedup.out.bam)
+      .join(samtools_index_assembly.out.bai)
+      .multiMap { id, fasta, bam, bai -> 
+        assembly: tuple(id, fasta)
+        mapping: tuple(bam, bai)
+      }
+      .set { freebayes_ch }
+
+    freebayes(freebayes_ch.assembly, freebayes_ch.mapping)
+    mask_polymorph_assembly(assembly.join(freebayes.out.vcf))
+    quast(assembly, genomeReference)
+    mlst(assembly, params.species, mlstDb)
+    // split assemblies and id into two seperate channels to enable re-pairing
+    // of results and id at a later stage. This to allow batch cgmlst/wgmlst analysis 
+    mask_polymorph_assembly.out.fasta
+      .multiMap { sampleName, filePath -> 
+        sampleName: sampleName
+        filePath: filePath
+      }
+      .set{ maskedAssemblyMap }
+
+    chewbbaca_create_batch_list(maskedAssemblyMap.filePath.collect())
+    chewbbaca_allelecall(maskedAssemblyMap.sampleName.collect(), chewbbaca_create_batch_list.out.list, chewbbacaDb, trainingFile)
+    chewbbaca_split_results(chewbbaca_allelecall.out.sampleName, chewbbaca_allelecall.out.calls)
+
+    // end point
+    export_to_cdm(chewbbaca_split_results.out.output.join(quast.out.qc).join(post_align_qc.out.qc))
+
+    // sourmash
+    sourmash(assembly)
+
+    // antimicrobial detection (amrfinderplus & abritamr)
+    amrfinderplus(assembly, amrfinderDb)
+    //abritamr(amrfinderplus.out.output)
+
+    // perform resistance prediction
+    resfinder(reads, params.species, resfinderDb, pointfinderDb)
+    virulencefinder(reads, params.useVirulenceDbs, virulencefinderDb)
+
+    // combine results for export
+    quast.out.qc
+      .join(post_align_qc.out.qc)
+      .join(mlst.out.json)
+      .join(chewbbaca_split_results.out.output)
+      .join(amrfinderplus.out.output)
+      .join(resfinder.out.json)
+      .join(resfinder.out.meta)
+      .join(virulencefinder.out.json)
+      .join(virulencefinder.out.meta)
+      .join(save_analysis_metadata.out.meta)
+      .set{ combinedOutput }
+
+    // Using kraken for species identificaiton
+    if ( params.useKraken ) {
+      krakenDb = file(params.krakenDb, checkIfExists: true)
+      kraken(reads, krakenDb)
+      bracken(kraken.out.report, krakenDb).output
+      combinedOutput = combinedOutput.join(bracken.out.output)
+      create_analysis_result(combinedOutput)
+	  } else {
+      emptyBrackenOutput = reads.map { sampleName, reads -> [ sampleName, [] ] }
+      combinedOutput = combinedOutput.join(emptyBrackenOutput)
+      create_analysis_result(combinedOutput)
+	  }
+
+  emit: 
+    pipeline_result = create_analysis_result.output
+    cdm_import = export_to_cdm.output
+=======
 workflow {
     if (workflow.profile == "staphylococcus_aureus") {
         CALL_STAPHYLOCOCCUS_AUREUS()
     } else if (workflow.profile == "mycobacterium_tuberculosis") {
         CALL_MYCOBACTERIUM_TUBERCULOSIS()
     }
+>>>>>>> master
 }
@@ -1,14 +1,45 @@
+idList = []     // Global variable to save IDs from samplesheet
+
 // Function for platform and paired-end or single-end
 def get_meta(LinkedHashMap row) {
-    platforms = ["illumina", "nanopore", "pacbio", "iontorrent"]
-    if (row.platform in platforms) {
-        if (row.read2) {
-        meta = tuple(row.id, tuple(file(row.read1), file(row.read2)), row.platform)
-        } else {
-        meta = tuple(row.id, tuple(file(row.read1)), row.platform)
-        }
+  platforms = ["illumina", "nanopore", "pacbio", "iontorrent"]
+
+  // Error messages
+  idErrorMessage = "\nERROR: Please check input samplesheet -> ID '${row.id}' is not a unique name in the samplesheet."
+  platformErrorMessage = "\nERROR: Please check input samplesheet -> Platform is not one of the following:\n-${platforms.join('\n-')}"
+  charErrorMessage = "\nERROR in ID '${row.id}': Please check input samplesheet -> ID must only consist of ASCII characters."
+  lengthErrorMessage = "\nERROR in ID '${row.id}': Please check input samplesheet -> ID must only be between 5-50 characters long."
+  errorMessages = []
+
+  // Check if rows fullfill requirements
+  idList.add(row.id)
+  identicalIds=idList.clone().unique().size() != idList.size()
+  correctPlatform = (row.platform in platforms)
+  correctCharacters = ((String) row.id) =~ /^[\x00-\x7F]+$/
+  correctLength = (row.id.length() >= 5 && row.id.length() <= 50)
+
+  // Append error messages if false
+  if (identicalIds){
+    errorMessages+=idErrorMessage
+  }
+  if (!correctPlatform){
+    errorMessages+=platformErrorMessage
+  }
+  if (!correctCharacters){
+    errorMessages+=charErrorMessage
+  }
+  if (!correctLength){
+    errorMessages+=lengthErrorMessage
+  }
+
+  if (correctPlatform && !identicalIds && correctCharacters && correctLength) {
+    if (row.read2) {
+      meta = tuple(row.id, tuple(file(row.read1), file(row.read2)), row.platform)
     } else {
-        exit 1, "ERROR: Please check input samplesheet -> Platform is not one of the following:!\n-${platforms.join('\n-')}"
+      meta = tuple(row.id, tuple(file(row.read1)), row.platform)
     }
-    return meta
+  } else {
+    exit 1, errorMessages.each { println it }
+  }
+  return meta
 }
@@ -59,14 +59,22 @@ process chewbbaca_create_batch_list {
   script:
     output = "batch_input.list"
     """
-    realpath $maskedAssembly > $output
+    touch /projects/mikro/nobackup/gms/jasen2/chewbbaca_temp.txt
+
+    for f in $maskedAssembly
+    do 
+      echo "Processing $f" > /projects/mikro/nobackup/gms/jasen2/chewbbaca_temp.txt
+    done
+
+
     """
+//realpath $maskedAssembly > $output
 
-  stub:
-    output = "batch_input.list"
-    """
-    touch $output
-    """
+  //stub:
+    //output = "batch_input.list"
+
+    //touch $output
+
 }
 
 process chewbbaca_split_results {