broadinstitute · bshifaw · Apr 11, 2023 · Apr 5, 2023 · Apr 5, 2023 · Apr 5, 2023
diff --git a/pipelines/ONT/Assembly/ONTAssembleWithCanu.wdl b/pipelines/ONT/Assembly/ONTAssembleWithCanu.wdl
@@ -1,11 +1,5 @@
 version 1.0
 
-######################################################################################
-## A workflow that performs single sample genome assembly on ONT reads from one or
-## more flow cells. The workflow merges multiple samples into a single BAM prior to
-## genome assembly and variant calling.
-######################################################################################
-
 import "../../../tasks/Utility/Utils.wdl" as Utils
 import "../../../tasks/Assembly/Canu.wdl" as Canu
 import "../../../tasks/Preprocessing/Medaka.wdl" as Medaka
@@ -14,22 +8,9 @@ import "../../../tasks/QC/Quast.wdl" as Quast
 import "../../../tasks/Utility/Finalize.wdl" as FF
 
 workflow ONTAssembleWithCanu {
-    input {
-        String gcs_fastq_dir
-
-        File ref_map_file
-
-        Float correct_error_rate = 0.15
-        Float trim_error_rate = 0.15
-        Float assemble_error_rate = 0.15
-        String medaka_model = "r941_prom_high_g360"
-
-        String participant_name
-        String prefix
-
-        String gcs_out_root_dir
+    meta {
+        description: "A workflow that performs single sample genome assembly on ONT reads from one or more flow cells. The workflow merges multiple samples into a single BAM prior to genome assembly and variant calling."
     }
-
     parameter_meta {
         gcs_fastq_dir:       "GCS path to unaligned CCS BAM files"
 
@@ -46,6 +27,22 @@ workflow ONTAssembleWithCanu {
         gcs_out_root_dir:    "GCS bucket to store the reads, variants, and metrics files"
     }
 
+    input {
+        String gcs_fastq_dir
+
+        File ref_map_file
+
+        Float correct_error_rate = 0.15
+        Float trim_error_rate = 0.15
+        Float assemble_error_rate = 0.15
+        String medaka_model = "r941_prom_high_g360"
+
+        String participant_name
+        String prefix
+
+        String gcs_out_root_dir
+    }
+
     Map[String, String] ref_map = read_map(ref_map_file)
 
     String outdir = sub(gcs_out_root_dir, "/$", "") + "/ONTAssembleWithCanu/~{prefix}"

diff --git a/pipelines/ONT/Assembly/ONTAssembleWithFlye.wdl b/pipelines/ONT/Assembly/ONTAssembleWithFlye.wdl
@@ -1,11 +1,5 @@
 version 1.0
 
-######################################################################################
-## A workflow that performs single sample genome assembly on ONT reads from one or
-## more flow cells. The workflow merges multiple samples into a single BAM prior to
-## genome assembly and variant calling.
-######################################################################################
-
 import "../../../tasks/Utility/Utils.wdl" as Utils
 import "../../../tasks/Assembly/Flye.wdl" as Flye
 import "../../../tasks/Preprocessing/Medaka.wdl" as Medaka
@@ -14,19 +8,9 @@ import "../../../tasks/QC/Quast.wdl" as Quast
 import "../../../tasks/Utility/Finalize.wdl" as FF
 
 workflow ONTAssembleWithFlye {
-    input {
-        String gcs_fastq_dir
-
-        File ref_map_file
-
-        String medaka_model = "r941_prom_high_g360"
-
-        String participant_name
-        String prefix
-
-        String gcs_out_root_dir
+    meta {
+        description: "Perform single sample genome assembly on ONT reads from one or more flow cells. The workflow merges multiple samples into a single BAM prior to genome assembly and variant calling."
     }
-
     parameter_meta {
         gcs_fastq_dir:       "GCS path to unaligned CCS BAM files"
 
@@ -40,6 +24,20 @@ workflow ONTAssembleWithFlye {
         gcs_out_root_dir:    "GCS bucket to store the reads, variants, and metrics files"
     }
 
+    input {
+        String gcs_fastq_dir
+
+        File ref_map_file
+
+        String medaka_model = "r941_prom_high_g360"
+
+        String participant_name
+        String prefix
+
+        String gcs_out_root_dir
+    }
+
+
     Map[String, String] ref_map = read_map(ref_map_file)
 
     String outdir = sub(gcs_out_root_dir, "/$", "") + "/ONTAssembleWithFlye/~{prefix}"

diff --git a/pipelines/ONT/Epigenomics/ONTMethylation.wdl b/pipelines/ONT/Epigenomics/ONTMethylation.wdl
@@ -7,6 +7,20 @@ import "../../../tasks/Preprocessing/Guppy.wdl" as Guppy
 import "../../../tasks/Utility/Finalize.wdl" as FF
 
 workflow ONTMethylation {
+
+    meta {
+        description: "ONT Methylation pipeline"
+    }
+    parameter_meta {
+        gcs_fast5_dir: "GCS directory containing fast5 files"
+        ref_map_file: "Reference map file"
+        variants: "VCF file containing variants"
+        variants_tbi: "Tabix index for VCF file"
+        participant_name: "Participant name"
+        prefix: "Prefix for output files"
+        gcs_out_root_dir: "GCS directory to write output files"
+    }
+
     input {
         String gcs_fast5_dir
 

diff --git a/pipelines/ONT/MultiAnalysis/ONTPfHrp2Hrp3Status.wdl b/pipelines/ONT/MultiAnalysis/ONTPfHrp2Hrp3Status.wdl
@@ -4,6 +4,15 @@ import "../../../structs/Structs.wdl"
 import "../../../tasks/Utility/Finalize.wdl" as FF
 
 workflow ONTPfHrp2Hrp3Status {
+
+    meta {
+        description: "Determine if HRP2 and HRP3 are deleted in a sample"
+    }
+    parameter_meta {
+        bam: "BAM file"
+        bai: "BAM index file"
+    }
+
     input {
         File bam
         File bai

diff --git a/pipelines/ONT/MultiAnalysis/ONTPfTypeDrugResistanceMarkers.wdl b/pipelines/ONT/MultiAnalysis/ONTPfTypeDrugResistanceMarkers.wdl
@@ -4,6 +4,16 @@ import "../../../structs/Structs.wdl"
 import "../../../tasks/Utility/Finalize.wdl" as FF
 
 workflow ONTPfTypeDrugResistanceMarkers {
+
+    meta {
+        description: "Workflow to generate a report of drug resistance markers"
+    }
+    parameter_meta {
+        vcf: "VCF file to process"
+        dir_prefix: "Prefix for output directory"
+        gcs_out_root_dir: "GCS output root directory"
+    }
+
     input {
         File vcf
 

diff --git a/pipelines/ONT/Preprocessing/ONTBasecall.wdl b/pipelines/ONT/Preprocessing/ONTBasecall.wdl
@@ -4,6 +4,18 @@ import "../../../tasks/Preprocessing/Guppy.wdl" as Guppy
 import "../../../tasks/Utility/Finalize.wdl" as FF
 
 workflow ONTBasecall {
+
+    meta {
+        description: "Basecall ONT reads"
+    }
+    parameter_meta {
+        gcs_fast5_dir: "GCS path to the directory containing fast5 files"
+        config: "Guppy config file"
+        barcode_kit: "Guppy barcode kit"
+        gcs_out_root_dir: "GCS path to the root directory for output"
+        prefix: "Prefix for output directory"
+    }
+
     input {
         String gcs_fast5_dir
         String config = "dna_r10.4.1_e8.2_400bps_sup.cfg"

diff --git a/pipelines/ONT/Preprocessing/ONTFlowcell.wdl b/pipelines/ONT/Preprocessing/ONTFlowcell.wdl
@@ -8,23 +8,10 @@ import "../../../tasks/Visualization/NanoPlot.wdl" as NP
 import "../../../tasks/Utility/Finalize.wdl" as FF
 
 workflow ONTFlowcell {
-    input {
-        File? final_summary
-        File? sequencing_summary
-        String? fastq_dir
-
-        File ref_map_file
-
-        String SM
-        String ID
-
-        Int num_shards = 300
-        String experiment_type
-        String dir_prefix
 
-        String gcs_out_root_dir
+    meta {
+        description: "Align ONT reads to a reference genome"
     }
-
     parameter_meta {
         final_summary:      "GCS path to '*final_summary*.txt*' file for basecalled fastq files"
         sequencing_summary: "GCS path to '*sequencing_summary*.txt*' file for basecalled fastq files"
@@ -42,6 +29,23 @@ workflow ONTFlowcell {
         gcs_out_root_dir:   "GCS bucket to store the reads, variants, and metrics files"
     }
 
+    input {
+        File? final_summary
+        File? sequencing_summary
+        String? fastq_dir
+
+        File ref_map_file
+
+        String SM
+        String ID
+
+        Int num_shards = 300
+        String experiment_type
+        String dir_prefix
+
+        String gcs_out_root_dir
+    }
+
     Map[String, String] ref_map = read_map(ref_map_file)
     Map[String, String] map_presets = {
         'DNA':  'map-ont',

diff --git a/pipelines/ONT/VariantCalling/ONTWholeGenome.wdl b/pipelines/ONT/VariantCalling/ONTWholeGenome.wdl
@@ -1,11 +1,5 @@
 version 1.0
 
-######################################################################################
-## A workflow that performs single sample variant calling on Oxford Nanopore reads
-## from one or more flow cells. The workflow merges multiple samples into a single BAM
-## prior to variant calling.
-######################################################################################
-
 import "../../../tasks/Utility/ONTUtils.wdl" as ONT
 import "../../../tasks/Utility/Utils.wdl" as Utils
 import "../../../tasks/VariantCalling/CallVariantsONT.wdl" as VAR
@@ -14,6 +8,31 @@ import "../../../tasks/Utility/Finalize.wdl" as FF
 import "../../../tasks/QC/SampleLevelAlignedMetrics.wdl" as COV
 
 workflow ONTWholeGenome {
+
+    meta {
+        description: "A workflow that performs single sample variant calling on Oxford Nanopore reads from one or more flow cells. The workflow merges multiple flowcells into a single BAM prior to variant calling."
+    }
+    parameter_meta {
+        aligned_bams:       "GCS path to aligned BAM files"
+        aligned_bais:       "GCS path to aligned BAM file indices"
+        participant_name:   "name of the participant from whom these samples were obtained"
+
+        ref_map_file:       "table indicating reference sequence and auxillary file locations"
+        gcs_out_root_dir:   "GCS bucket to store the reads, variants, and metrics files"
+
+        call_svs:               "whether to call SVs"
+        fast_less_sensitive_sv: "to trade less sensitive SV calling for faster speed"
+
+        call_small_variants: "whether to call small variants"
+        call_small_vars_on_mitochondria: "if false, will not attempt to call variants on mitochondria; if true, some samples might fail (caller feature) due to lack of signal"
+        sites_vcf:     "for use with Clair"
+        sites_vcf_tbi: "for use with Clair"
+
+        run_dv_pepper_analysis:  "to turn on DV-Pepper analysis or not (non-trivial increase in cost and runtime)"
+        ref_scatter_interval_list_locator: "A file holding paths to interval_list files; needed only when running DV-Pepper"
+        ref_scatter_interval_list_ids:     "A file that gives short IDs to the interval_list files; needed only when running DV-Pepper"
+    }
+
     input {
         Array[File] aligned_bams
         Array[File] aligned_bais
@@ -42,27 +61,6 @@ workflow ONTWholeGenome {
         File? ref_scatter_interval_list_ids
     }
 
-    parameter_meta {
-        aligned_bams:       "GCS path to aligned BAM files"
-        aligned_bais:       "GCS path to aligned BAM file indices"
-        participant_name:   "name of the participant from whom these samples were obtained"
-
-        ref_map_file:       "table indicating reference sequence and auxillary file locations"
-        gcs_out_root_dir:   "GCS bucket to store the reads, variants, and metrics files"
-
-        call_svs:               "whether to call SVs"
-        fast_less_sensitive_sv: "to trade less sensitive SV calling for faster speed"
-
-        call_small_variants: "whether to call small variants"
-        call_small_vars_on_mitochondria: "if false, will not attempt to call variants on mitochondria; if true, some samples might fail (caller feature) due to lack of signal"
-        sites_vcf:     "for use with Clair"
-        sites_vcf_tbi: "for use with Clair"
-
-        run_dv_pepper_analysis:  "to turn on DV-Pepper analysis or not (non-trivial increase in cost and runtime)"
-        ref_scatter_interval_list_locator: "A file holding paths to interval_list files; needed only when running DV-Pepper"
-        ref_scatter_interval_list_ids:     "A file that gives short IDs to the interval_list files; needed only when running DV-Pepper"
-    }
-
     Map[String, String] ref_map = read_map(ref_map_file)
 
     String outdir = sub(gcs_out_root_dir, "/$", "") + "/ONTWholeGenome/~{participant_name}"

diff --git a/pipelines/PacBio/Alignment/PBFlowcell.wdl b/pipelines/PacBio/Alignment/PBFlowcell.wdl
@@ -1,12 +1,5 @@
 version 1.0
 
-##########################################################################################
-## A workflow that performs CCS correction on PacBio HiFi reads from a single flow cell.
-## The workflow shards the subreads into clusters and performs CCS in parallel on each cluster.
-## Ultimately, all the corrected reads (and uncorrected) are gathered into a single BAM.
-## Various metrics are produced along the way.
-##########################################################################################
-
 import "../../../tasks/Utility/PBUtils.wdl" as PB
 import "../../../tasks/Alignment/AlignReads.wdl" as AR
 import "../../../tasks/Utility/Utils.wdl" as Utils
@@ -20,6 +13,30 @@ import "../../../tasks/Transcriptomics/MASSeq.wdl" as MAS
 import "../../../tasks/Utility/JupyterNotebooks.wdl" as JUPYTER
 
 workflow PBFlowcell {
+
+    meta {
+        description: "The workflow performs the alignment of an SMRT cell's worth of data to a reference. For genomic sequencing data, the workflow also optionally performs CCS correction if the data is from a CCS library but did not get corrected on-instrument. For MAS-seq transcriptome data, this workflow will determine the most likely MAS-seq model, then it will use that model to annotate, segment, and filter the CCS reads. These CCS reads will then be aligned to the reference in trascriptome alignemnt mode. Note: Currently the MAS-seq workflow separates CLR reads, but does not process them."
+    }
+    parameter_meta {
+        bam:                "GCS path to raw subread bam"
+        ccs_report_txt:     "GCS path to CCS report txt, required if on-instrument corrected, otherwise CCS is run in this workflow for CCS libraries"
+        pbi:                "GCS path to pbi index for raw subread bam"
+        ref_map_file:       "table indicating reference sequence and auxillary file locations"
+
+        SM:                 "the value to place in the BAM read group's SM field"
+        LB:                 "the value to place in the BAM read group's LB (library) field"
+
+        num_shards:         "number of shards into which fastq files should be batched"
+        experiment_type:    "type of experiment run (CLR, CCS, ISOSEQ, MASSEQ)"
+        dir_prefix:         "directory prefix for output files"
+
+        mas_seq_model:      "Longbow model to use for MAS-seq data."
+
+        DEBUG_MODE:         "[default valued] enables debugging tasks / subworkflows (default: false)"
+
+        gcs_out_root_dir:   "GCS bucket to store the reads, variants, and metrics files"
+    }
+
     input {
         File bam
         File pbi
@@ -45,26 +62,6 @@ workflow PBFlowcell {
         Boolean DEBUG_MODE = false
     }
 
-    parameter_meta {
-        bam:                "GCS path to raw subread bam"
-        ccs_report_txt:     "GCS path to CCS report txt, required if on-instrument corrected, otherwise CCS is run in this workflow for CCS libraries"
-        pbi:                "GCS path to pbi index for raw subread bam"
-        ref_map_file:       "table indicating reference sequence and auxillary file locations"
-
-        SM:                 "the value to place in the BAM read group's SM field"
-        LB:                 "the value to place in the BAM read group's LB (library) field"
-
-        num_shards:         "number of shards into which fastq files should be batched"
-        experiment_type:    "type of experiment run (CLR, CCS, ISOSEQ, MASSEQ)"
-        dir_prefix:         "directory prefix for output files"
-
-        mas_seq_model:      "Longbow model to use for MAS-seq data."
-
-        DEBUG_MODE:         "[default valued] enables debugging tasks / subworkflows (default: false)"
-
-        gcs_out_root_dir:   "GCS bucket to store the reads, variants, and metrics files"
-    }
-
     # Call our timestamp so we can store outputs without clobbering previous runs:
     call Utils.GetCurrentTimestampString as WdlExecutionStartTimestamp { input: }