Create GATK wrapper which includes a workflow for each WDL #63

Open
wants to merge 5 commits into
from
@@ -0,0 +1,72 @@
+# --------------------------------------------------------------------------------------------
+# This ASEReadCounter WDL task was generated on 10/04/16 for use with GATK version 3.6
+# For more information on using this wrapper, please see the WDL repository at
+# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md
+# Task Summary: Calculate read counts per allele for allele-specific expression analysis
+# --------------------------------------------------------------------------------------------
+
+task ASEReadCounter {
+ File gatk
+ File ref
+ File refIndex
+ File refDict
+ String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string
+ Array[String] input_file
+ Array[String] ? intervals
+ String unsafe
+ String ? countOverlapReadsType
+ String ? minBaseQuality
+ Int ? minDepthOfNonFilteredBase
+ Int ? minMappingQuality
+ String ? out
+ String ? outputFormat
+ String sitesVCFFile
+
+ command {
+ java -jar ${gatk} \
+ -T ASEReadCounter \
+ -R ${ref} \
+ --input_file ${input_file} \
+ ${default="" "--intervals " + intervals} \
+ --unsafe ${unsafe} \
+ -overlap ${default="COUNT_FRAGMENTS_REQUIRE_SAME_BASE" countOverlapReadsType} \
+ -mbq ${default="0" minBaseQuality} \
+ -minDepth ${default="-1" minDepthOfNonFilteredBase} \
+ -mmq ${default="0" minMappingQuality} \
+ -o ${default="stdout" out} \
+ outputFormat ${default="RTABLE" outputFormat} \
+ -sites ${sitesVCFFile} \
+ ${default="\n" userString}
+ }
+
+ output {
+ #To track additional outputs from your task, please manually add them below
+ String taskOut = "${out}"
+ }
+
+ runtime {
+ docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830"
+ }
+
+ parameter_meta {
+ gatk: "Executable jar for the GenomeAnalysisTK"
+ ref: "fasta file of reference genome"
+ refIndex: "Index file of reference genome"
+ refDict: "dict file of reference genome"
+ userString: "An optional parameter which allows the user to specify additions to the command line at run time"
+ countOverlapReadsType: "Handling of overlapping reads from the same fragment"
+ minBaseQuality: "Minimum base quality"
+ minDepthOfNonFilteredBase: "Minimum number of bases that pass filters"
+ minMappingQuality: "Minimum read mapping quality"
+ out: "An output file created by the walker. Will overwrite contents if file exists"
+ outputFormat: "Format of the output file, can be CSV, TABLE, RTABLE"
+ sitesVCFFile: "Undocumented option"
+ input_file: "Input file containing sequence data (BAM or CRAM)"
+ intervals: "One or more genomic intervals over which to operate"
+ unsafe: "Enable unsafe operations: nothing will be checked at runtime"
+ }
+}
+
+workflow ASEReadCounterWf {
+ call ASEReadCounter
+}
@@ -0,0 +1,60 @@
+# --------------------------------------------------------------------------------------------
+# This AnalyzeCovariates WDL task was generated on 10/04/16 for use with GATK version 3.6
+# For more information on using this wrapper, please see the WDL repository at
+# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md
+# Task Summary: Create plots to visualize base recalibration results
+# --------------------------------------------------------------------------------------------
+
+task AnalyzeCovariates {
+ File gatk
+ File ref
+ File refIndex
+ File refDict
+ String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string
+ File ? BQSR
+ File ? afterReportFile
+ File ? beforeReportFile
+ Boolean ? ignoreLastModificationTimes
+ File ? intermediateCsvFile
+ File ? plotsReportFile
+
+ command {
+ java -jar ${gatk} \
+ -T AnalyzeCovariates \
+ -R ${ref} \
+ ${default="" "--BQSR " + BQSR} \
+ ${default="" "-after " + afterReportFile} \
+ ${default="" "-before " + beforeReportFile} \
+ -ignoreLMT ${default="false" ignoreLastModificationTimes} \
+ ${default="" "-csv " + intermediateCsvFile} \
+ ${default="" "-plots " + plotsReportFile} \
+ ${default="\n" userString}
+ }
+
+ output {
+ #To track additional outputs from your task, please manually add them below
+ String taskOut = "${out}"
+ }
+
+ runtime {
+ docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830"
+ }
+
+ parameter_meta {
+ gatk: "Executable jar for the GenomeAnalysisTK"
+ ref: "fasta file of reference genome"
+ refIndex: "Index file of reference genome"
+ refDict: "dict file of reference genome"
+ userString: "An optional parameter which allows the user to specify additions to the command line at run time"
+ afterReportFile: "file containing the BQSR second-pass report file"
+ beforeReportFile: "file containing the BQSR first-pass report file"
+ ignoreLastModificationTimes: "do not emit warning messages related to suspicious last modification time order of inputs"
+ intermediateCsvFile: "location of the csv intermediate file"
+ plotsReportFile: "location of the output report"
+ BQSR: "Input covariates table file for on-the-fly base quality score recalibration"
+ }
+}
+
+workflow AnalyzeCovariatesWf {
+ call AnalyzeCovariates
+}
@@ -0,0 +1,80 @@
+# --------------------------------------------------------------------------------------------
+# This ApplyRecalibration WDL task was generated on 10/04/16 for use with GATK version 3.6
+# For more information on using this wrapper, please see the WDL repository at
+# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md
+# Task Summary: Apply a score cutoff to filter variants based on a recalibration table
+# --------------------------------------------------------------------------------------------
+
+task ApplyRecalibration {
+ File gatk
+ File ref
+ File refIndex
+ File refDict
+ String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string
+ Array[String] ? intervals
+ Int ? ntVal
+ Boolean ? excludeFiltered
+ Boolean ? ignore_all_filters
+ String ? ignore_filter
+ Array[String] task_input
+ Float ? lodCutoff
+ String ? mode
+ String ? out
+ String recal_file
+ File ? tranches_file
+ Float ? ts_filter_level
+ Boolean ? useAlleleSpecificAnnotations
+
+ command {
+ java -jar ${gatk} \
+ -T ApplyRecalibration \
+ -R ${ref} \
+ ${default="" "--intervals " + intervals} \
+ ${default="" "-nt" + ntVal} \
+ -ef ${default="false" excludeFiltered} \
+ -ignoreAllFilters ${default="false" ignore_all_filters} \
+ ${default="" "-ignoreFilter " + ignore_filter} \
+ -input ${task_input} \
+ ${default="" "-lodCutoff " + lodCutoff} \
+ -mode ${default="SNP" mode} \
+ -o ${default="stdout" out} \
+ -recalFile ${recal_file} \
+ ${default="" "-tranchesFile " + tranches_file} \
+ ${default="" "-ts_filter_level " + ts_filter_level} \
+ -AS ${default="false" useAlleleSpecificAnnotations} \
+ ${default="\n" userString}
+ }
+
+ output {
+ #To track additional outputs from your task, please manually add them below
+ String taskOut = "${out}"
+ }
+
+ runtime {
+ docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830"
+ }
+
+ parameter_meta {
+ gatk: "Executable jar for the GenomeAnalysisTK"
+ ref: "fasta file of reference genome"
+ refIndex: "Index file of reference genome"
+ refDict: "dict file of reference genome"
+ userString: "An optional parameter which allows the user to specify additions to the command line at run time"
+ excludeFiltered: "Don't output filtered loci after applying the recalibration"
+ ignore_all_filters: "If specified, the variant recalibrator will ignore all input filters. Useful to rerun the VQSR from a filtered output file."
+ ignore_filter: "If specified, the recalibration will be applied to variants marked as filtered by the specified filter name in the input VCF file"
+ task_input: "The raw input variants to be recalibrated"
+ lodCutoff: "The VQSLOD score below which to start filtering"
+ mode: "Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously."
+ out: "The output filtered and recalibrated VCF file in which each variant is annotated with its VQSLOD value"
+ recal_file: "The input recal file used by ApplyRecalibration"
+ tranches_file: "The input tranches file describing where to cut the data"
+ ts_filter_level: "The truth sensitivity level at which to start filtering"
+ useAlleleSpecificAnnotations: "If specified, the tool will attempt to apply a filter to each allele based on the input tranches and allele-specific .recal file."
+ intervals: "One or more genomic intervals over which to operate"
+ }
+}
+
+workflow ApplyRecalibrationWf {
+ call ApplyRecalibration
+}
@@ -0,0 +1,113 @@
+# --------------------------------------------------------------------------------------------
+# This BaseRecalibrator WDL task was generated on 10/04/16 for use with GATK version 3.6
+# For more information on using this wrapper, please see the WDL repository at
+# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md
+# Task Summary: Detect systematic errors in base quality scores
+# --------------------------------------------------------------------------------------------
+
+task BaseRecalibrator {
+ File gatk
+ File ref
+ File refIndex
+ File refDict
+ String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string
+ Array[String] input_file
+ Array[String] ? intervals
+ File ? BQSR
+ Int ? nctVal
+ String ? binary_tag_name
+ Float ? bqsrBAQGapOpenPenalty
+ String ? covariate
+ String ? deletions_default_quality
+ Int ? indels_context_size
+ String ? insertions_default_quality
+ Array[String] ? knownSites
+ Boolean ? list
+ String ? low_quality_tail
+ Boolean ? lowMemoryMode
+ Int ? maximum_cycle_value
+ Int ? mismatches_context_size
+ String ? mismatches_default_quality
+ Boolean ? no_standard_covs
+ File out
+ Int ? quantizing_levels
+ Boolean ? run_without_dbsnp_potentially_ruining_quality
+ String ? solid_nocall_strategy
+ String ? solid_recal_mode
+ Boolean ? sort_by_all_columns
+
+ command {
+ java -jar ${gatk} \
+ -T BaseRecalibrator \
+ -R ${ref} \
+ --input_file ${input_file} \
+ ${default="" "--intervals " + intervals} \
+ ${default="" "--BQSR " + BQSR} \
+ ${default="" "-nct" + nctVal} \
+ ${default="" "-bintag " + binary_tag_name} \
+ -bqsrBAQGOP ${default="40.0" bqsrBAQGapOpenPenalty} \
+ ${default="" "-cov " + covariate} \
+ -ddq ${default="45" deletions_default_quality} \
+ -ics ${default="3" indels_context_size} \
+ -idq ${default="45" insertions_default_quality} \
+ -knownSites ${default="[]" knownSites} \
+ -ls ${default="false" list} \
+ -lqt ${default="2" low_quality_tail} \
+ -lowMemoryMode ${default="false" lowMemoryMode} \
+ -maxCycle ${default="500" maximum_cycle_value} \
+ -mcs ${default="2" mismatches_context_size} \
+ -mdq ${default="-1" mismatches_default_quality} \
+ -noStandard ${default="false" no_standard_covs} \
+ -o ${out} \
+ -ql ${default="16" quantizing_levels} \
+ -run_without_dbsnp_potentially_ruining_quality ${default="false" run_without_dbsnp_potentially_ruining_quality} \
+ -solid_nocall_strategy ${default="THROW_EXCEPTION" solid_nocall_strategy} \
+ -sMode ${default="SET_Q_ZERO" solid_recal_mode} \
+ -sortAllCols ${default="false" sort_by_all_columns} \
+ ${default="\n" userString}
+ }
+
+ output {
+ #To track additional outputs from your task, please manually add them below
+ String taskOut = "${out}"
+ }
+
+ runtime {
+ docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830"
+ }
+
+ parameter_meta {
+ gatk: "Executable jar for the GenomeAnalysisTK"
+ ref: "fasta file of reference genome"
+ refIndex: "Index file of reference genome"
+ refDict: "dict file of reference genome"
+ userString: "An optional parameter which allows the user to specify additions to the command line at run time"
+ binary_tag_name: "the binary tag covariate name if using it"
+ bqsrBAQGapOpenPenalty: "BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets"
+ covariate: "One or more covariates to be used in the recalibration. Can be specified multiple times"
+ deletions_default_quality: "default quality for the base deletions covariate"
+ indels_context_size: "Size of the k-mer context to be used for base insertions and deletions"
+ insertions_default_quality: "default quality for the base insertions covariate"
+ knownSites: "A database of known polymorphic sites"
+ list: "List the available covariates and exit"
+ low_quality_tail: "minimum quality for the bases in the tail of the reads to be considered"
+ lowMemoryMode: "Reduce memory usage in multi-threaded code at the expense of threading efficiency"
+ maximum_cycle_value: "The maximum cycle value permitted for the Cycle covariate"
+ mismatches_context_size: "Size of the k-mer context to be used for base mismatches"
+ mismatches_default_quality: "default quality for the base mismatches covariate"
+ no_standard_covs: "Do not use the standard set of covariates, but rather just the ones listed using the -cov argument"
+ out: "The output recalibration table file to create"
+ quantizing_levels: "number of distinct quality scores in the quantized output"
+ run_without_dbsnp_potentially_ruining_quality: "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only."
+ solid_nocall_strategy: "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ"
+ solid_recal_mode: "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS"
+ sort_by_all_columns: "Sort the rows in the tables of reports"
+ input_file: "Input file containing sequence data (BAM or CRAM)"
+ intervals: "One or more genomic intervals over which to operate"
+ BQSR: "Input covariates table file for on-the-fly base quality score recalibration"
+ }
+}
+
+workflow BaseRecalibratorWf {
+ call BaseRecalibrator
+}
Oops, something went wrong.