From 230de7ceb94d5a2b80b6afdf2993cfd841f15e5f Mon Sep 17 00:00:00 2001 From: knoblett Date: Tue, 4 Oct 2016 14:03:59 -0400 Subject: [PATCH 1/5] Create gatkToWdlWrapper_Firecloud.py --- .../wrappers/gatk/gatkToWdlWrapper_Firecloud.py | 215 +++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 scripts/wrappers/gatk/gatkToWdlWrapper_Firecloud.py diff --git a/scripts/wrappers/gatk/gatkToWdlWrapper_Firecloud.py b/scripts/wrappers/gatk/gatkToWdlWrapper_Firecloud.py new file mode 100644 index 0000000..e5b516c --- /dev/null +++ b/scripts/wrappers/gatk/gatkToWdlWrapper_Firecloud.py @@ -0,0 +1,215 @@ +#!/bin/python +import json +import sys +import datetime +import os + +#The following python script is designed to take in a directory of json files containing GATK tools, +# and output a WDL task file for each one. + +def getWdlType(jsonType): + if(jsonType.lower() == "double"): + return "Float" + elif(jsonType.lower() == "integer" or jsonType.lower() == "int"): + return "Int" + elif(jsonType.lower() == "boolean"): + return "Boolean" + elif(jsonType.lower() == "file"): + return "File" + elif("list" in jsonType.lower()): + substring = jsonType[jsonType.find("[")+1:len(jsonType)-1] + return "Array[" + getWdlType(substring) + "]" + else: + return "String" + +def getArgIndexByName(nameStr, args): + for x in range(0,len(args)): + if args[x]["name"].lower() == nameStr.lower(): + return x + return -1 + +def checkName(nameStr): + index = nameStr.find(":") + if nameStr =="input": + return checkName("task_" + nameStr) + elif index>0: + return checkName(nameStr[0:index] + nameStr[index+1:]) + else: + return nameStr + +def checkQuotes(myStr): + index = myStr.find("\"") + if index < 0: + return myStr + else: + return checkQuotes(myStr[0:index]+myStr[index+1:]) + +#collect inputs +directory = sys.argv[1] +version = sys.argv[2] + +dirFiles = os.listdir(directory) +engineJson = directory + "/engine_args_per_tool.json" +cmdJson = directory + "/org_broadinstitute_gatk_engine_CommandLineGATK.php.json" + +#open files and grab data +with open(engineJson) as engineFile: + engineData = json.load(engineFile) +with open(cmdJson) as cmdFile: + cmdData = json.load(cmdFile) + +dirJsons = [] +for a in range(0, len(dirFiles)): + if ".json" in dirFiles[a] and "gatk_tools" in dirFiles[a]: + dirJsons.append(directory + "/" + dirFiles[a]) + +#iterate over each file in the directory of jsons +#TODO: change 3 to len(dirJsons) +for a in range(0,len(dirJsons)): + toolJson = dirJsons[a] + + #open the tool file, and load the data + with open(toolJson) as toolFile: + toolData = json.load(toolFile) + + #grab frequently used sections from toolData for easier access + toolName = toolData["name"] + toolArgs = toolData["arguments"] + cmdArgs = cmdData["arguments"] + + if("Tools" in toolData["group"]): + #open the output file + filename = directory + "/WDLTasks/" + toolName + "_" + version + ".wdl" + wdlFile = open(filename, 'w') + + #parse the summary text to cut after \n + cutIndex = toolData["summary"].find("\n") + if cutIndex > 0: + toolSummary = toolData["summary"][0:cutIndex] + else: + toolSummary = toolData["summary"] + + #Write header and declare task + wdlFile.write("# --------------------------------------------------------------------------------------------\n" + + "# This " + toolName + " WDL task was generated on " + datetime.date.today().strftime("%m/%d/%y") + + " for use with GATK version " + version + "\n" + + "# For more information on using this wrapper, please see the WDL repository at \n" + + "# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md\n" + + "# Task Summary: " + toolSummary + "\n" + + "# --------------------------------------------------------------------------------------------\n\n") + wdlFile.write("task " + toolName + " { \n") + + #declare default inputs + wdlFile.write("\tFile gatk\n" + + "\tFile ref\n" + + "\tFile refIndex\n" + + "\tFile refDict\n" + + "\tString ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string\n") + + #declare engine-level inputs + if toolName in engineData.keys(): + for b in range(0,len(engineData[toolName])): + if engineData[toolName][b][2].lower() == "required": + wdlFile.write("\t" + getWdlType(cmdArgs[getArgIndexByName(engineData[toolName][b][0], cmdArgs)]["type"]) + + " " + checkName(engineData[toolName][b][0][2:]) + "\n") + elif engineData[toolName][b][2].lower() == "optional": + wdlFile.write("\t" + getWdlType(cmdArgs[getArgIndexByName(engineData[toolName][b][0], cmdArgs)]["type"]) + + " ? " + checkName(engineData[toolName][b][0][2:]) + "\n") + else: + print("There was an error determining the optional status of an engine argument: Input received: " + + engineData[toolName][e][2] + " for the argument " + toolName + ": " + engineData[toolName][e][0]) + + #iterate through remaining inputs (non-defaults) + for b in range(0,len(toolData["parallel"])): + wdlFile.write("\tInt ? " + toolData["parallel"][b]["arg"][1:] + "Val\n") + for b in range(0,len(toolArgs)): + if toolArgs[b]["required"] == "yes": + wdlFile.write("\t" + getWdlType(toolArgs[b]["type"]) + " " + checkName(toolArgs[b]["name"][2:]) + "\n") + elif toolArgs[b]["required"] == "no": + wdlFile.write("\t" + getWdlType(toolArgs[b]["type"]) + " ? " + checkName(toolArgs[b]["name"][2:]) + "\n") + else: + print("There was an error determining optional status of an argument. Input received: " + + toolArgs[b]["required"] + " for the argument " + toolArgs[b]["synonyms"]) + + #write command + wdlFile.write("\n\tcommand {\n" + + "\t\tjava -jar ${gatk} \\\n" + + "\t\t\t-T " + toolName + " \\\n" + + "\t\t\t-R ${ref} \\\n") + + #add additional engine-level arguments + if toolName in engineData.keys(): + for b in range(0,len(engineData[toolName])): + if engineData[toolName][b][2].lower() == "required": + wdlFile.write("\t\t\t" + engineData[toolName][b][0] + " ${" + checkName(engineData[toolName][b][0][2:]) + "} \\\n") + elif engineData[toolName][b][2].lower() == "optional": + if engineData[toolName][b][1].lower() == "null": + wdlFile.write("\t\t\t${default=\"\" \"" + engineData[toolName][b][0] + " \" + " + checkName(engineData[toolName][b][0][2:]) + + "} \\\n") + else: + wdlFile.write("\t\t\t" + engineData[toolName][b][0] + " ${default=\"" + checkQuotes(engineData[toolName][b][1]) + "\" " + + checkName(engineData[toolName][b][0][2:]) + "} \\\n") + else: + print("There was an error determining the optional status of an engine argument: Input received: " + + engineData[toolName][b][2] + " for the argument " + toolName + ": " + engineData[toolName][b][0]) + + + #iterate through remaining options (non-defaults) + for b in range(0,len(toolData["parallel"])): + wdlFile.write("\t\t\t${default=\"\" \"" + toolData["parallel"][b]["arg"] + "\" + " + toolData["parallel"][b]["arg"][1:] + "Val} \\\n") + for b in range(0,len(toolArgs)): + if toolArgs[b]["required"] == "yes": + wdlFile.write("\t\t\t" + toolArgs[b]["synonyms"] + " ${" + checkName(toolArgs[b]["name"][2:]) + "} \\\n") + elif toolArgs[b]["required"] == "no": + if toolArgs[b]["defaultValue"] != "NA" and toolArgs[b]["defaultValue"] != "none" and toolArgs[b]["defaultValue"] != "[ ]": + if toolArgs[b]["synonyms"] != "NA": + wdlFile.write("\t\t\t" + toolArgs[b]["synonyms"] + " ${default=\"" + checkQuotes(toolArgs[b]["defaultValue"]) + "\" " + + checkName(toolArgs[b]["name"][2:]) + "} \\\n") + else: + wdlFile.write("\t\t\t" + checkName(toolArgs[b]["name"][2:]) + " ${default=\"" + checkQuotes(toolArgs[b]["defaultValue"]) + "\" " + + checkName(toolArgs[b]["name"][2:]) + "} \\\n") + else: + if toolArgs[b]["synonyms"] != "NA": + wdlFile.write("\t\t\t${default=\"\" \"" + toolArgs[b]["synonyms"] + " \" + " + checkName(toolArgs[b]["name"][2:]) + "} \\\n") + else: + wdlFile.write("\t\t\t${default=\"\" \"" + checkName(toolArgs[b]["name"][2:]) + " \" + " + checkName(toolArgs[b]["name"][2:]) + "} \\\n") + else: + print("There was an error determining optional status of an argument. Input received: " + + toolArgs[b]["required"] + " for the argument " + toolArgs[b]["synonyms"]) + wdlFile.write("\t\t\t${default=\"\\n\" userString} \n" + + "\t}\n") + + #write output + wdlFile.write("\n\toutput {\n" + + "\t\t#To track additional outputs from your task, please manually add them below\n" + + "\t\tString taskOut = \"${out}\"\n" + + "\t}\n") + + #write runtime + #TODO Replace docker image with GATK-specific one + wdlFile.write("\n\truntime {\n" + + "\t\tdocker: \"broadinstitute/genomes-in-the-cloud:2.2.2-1466113830\"\n" + + "\t}\n") + + #write parameter_meta + wdlFile.write("\n\tparameter_meta {\n" + + "\t\tgatk: \"Executable jar for the GenomeAnalysisTK\"\n" + + "\t\tref: \"fasta file of reference genome\"\n" + + "\t\trefIndex: \"Index file of reference genome\"\n" + + "\t\trefDict: \"dict file of reference genome\"\n" + + "\t\tuserString: \"An optional parameter which allows the user to specify additions to the command line at run time\"\n") + for b in range(0,len(toolArgs)): + wdlFile.write("\t\t" + checkName(toolArgs[b]["name"][2:]) + ": \"" + checkQuotes(toolArgs[b]["summary"]) + "\"\n") + if toolName in engineData.keys(): + for b in range(0,len(engineData[toolName])): + wdlFile.write("\t\t" + checkName(engineData[toolName][b][0][2:]) + ": \"" + + checkQuotes(cmdArgs[getArgIndexByName(engineData[toolName][b][0], cmdArgs)]["summary"]) + "\"\n" ) + wdlFile.write("\t}\n}\n") + + #write workflow + wdlFile.write("\nworkflow " + toolName + "Wf { \n" + + "\tcall " + toolName + "\n" + + "}\n") + + #close file + wdlFile.close() From ec5408870a3e08c7332fa2c4b4b2eec56eb58b4d Mon Sep 17 00:00:00 2001 From: knoblett Date: Thu, 6 Oct 2016 11:01:29 -0400 Subject: [PATCH 2/5] Rename to remove "Firecloud" in file name --- .../{gatkToWdlWrapper_Firecloud.py => gatkToWdlWrapper_withWorkflow.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/wrappers/gatk/{gatkToWdlWrapper_Firecloud.py => gatkToWdlWrapper_withWorkflow.py} (100%) diff --git a/scripts/wrappers/gatk/gatkToWdlWrapper_Firecloud.py b/scripts/wrappers/gatk/gatkToWdlWrapper_withWorkflow.py similarity index 100% rename from scripts/wrappers/gatk/gatkToWdlWrapper_Firecloud.py rename to scripts/wrappers/gatk/gatkToWdlWrapper_withWorkflow.py From 9965d74dad53d446c582d2cb6669deac691ab62c Mon Sep 17 00:00:00 2001 From: knoblett Date: Thu, 6 Oct 2016 14:00:28 -0400 Subject: [PATCH 3/5] Upload AnalyzeCovariates wdl script --- scripts/wrappers/gatk/AnalyzeCovariates_3.6.wdl | 60 +++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 scripts/wrappers/gatk/AnalyzeCovariates_3.6.wdl diff --git a/scripts/wrappers/gatk/AnalyzeCovariates_3.6.wdl b/scripts/wrappers/gatk/AnalyzeCovariates_3.6.wdl new file mode 100644 index 0000000..007bea0 --- /dev/null +++ b/scripts/wrappers/gatk/AnalyzeCovariates_3.6.wdl @@ -0,0 +1,60 @@ +# -------------------------------------------------------------------------------------------- +# This AnalyzeCovariates WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Create plots to visualize base recalibration results +# -------------------------------------------------------------------------------------------- + +task AnalyzeCovariates { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + File ? BQSR + File ? afterReportFile + File ? beforeReportFile + Boolean ? ignoreLastModificationTimes + File ? intermediateCsvFile + File ? plotsReportFile + + command { + java -jar ${gatk} \ + -T AnalyzeCovariates \ + -R ${ref} \ + ${default="" "--BQSR " + BQSR} \ + ${default="" "-after " + afterReportFile} \ + ${default="" "-before " + beforeReportFile} \ + -ignoreLMT ${default="false" ignoreLastModificationTimes} \ + ${default="" "-csv " + intermediateCsvFile} \ + ${default="" "-plots " + plotsReportFile} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + afterReportFile: "file containing the BQSR second-pass report file" + beforeReportFile: "file containing the BQSR first-pass report file" + ignoreLastModificationTimes: "do not emit warning messages related to suspicious last modification time order of inputs" + intermediateCsvFile: "location of the csv intermediate file" + plotsReportFile: "location of the output report" + BQSR: "Input covariates table file for on-the-fly base quality score recalibration" + } +} + +workflow AnalyzeCovariatesWf { + call AnalyzeCovariates +} From dc1be28533b128030d0475ec7ae7a2bd0a0894d1 Mon Sep 17 00:00:00 2001 From: knoblett Date: Thu, 6 Oct 2016 14:02:15 -0400 Subject: [PATCH 4/5] Move to GATKToolWorkflows_3.6 subfolder --- .../wrappers/gatk/{ => GATKToolWorkflows_3.6}/AnalyzeCovariates_3.6.wdl | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/wrappers/gatk/{ => GATKToolWorkflows_3.6}/AnalyzeCovariates_3.6.wdl (100%) diff --git a/scripts/wrappers/gatk/AnalyzeCovariates_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/AnalyzeCovariates_3.6.wdl similarity index 100% rename from scripts/wrappers/gatk/AnalyzeCovariates_3.6.wdl rename to scripts/wrappers/gatk/GATKToolWorkflows_3.6/AnalyzeCovariates_3.6.wdl From 55d93181ad14d640e524f236598a381c6d871022 Mon Sep 17 00:00:00 2001 From: knoblett Date: Thu, 6 Oct 2016 14:02:59 -0400 Subject: [PATCH 5/5] Add remaining GATK 3.6 tool workflows. --- .../GATKToolWorkflows_3.6/ASEReadCounter_3.6.wdl | 72 ++++++ .../ApplyRecalibration_3.6.wdl | 80 +++++++ .../GATKToolWorkflows_3.6/BaseRecalibrator_3.6.wdl | 113 ++++++++++ .../CalculateGenotypePosteriors_3.6.wdl | 78 +++++++ .../GATKToolWorkflows_3.6/CallableLoci_3.6.wdl | 78 +++++++ .../gatk/GATKToolWorkflows_3.6/CatVariants_3.6.wdl | 72 ++++++ .../gatk/GATKToolWorkflows_3.6/CheckPileup_3.6.wdl | 59 +++++ .../gatk/GATKToolWorkflows_3.6/ClipReads_3.6.wdl | 69 ++++++ .../GATKToolWorkflows_3.6/CombineGVCFs_3.6.wdl | 66 ++++++ .../GATKToolWorkflows_3.6/CombineVariants_3.6.wdl | 89 ++++++++ .../CompareCallableLoci_3.6.wdl | 57 +++++ .../gatk/GATKToolWorkflows_3.6/ContEst_3.6.wdl | 99 ++++++++ .../gatk/GATKToolWorkflows_3.6/CountBases_3.6.wdl | 48 ++++ .../GATKToolWorkflows_3.6/CountIntervals_3.6.wdl | 54 +++++ .../gatk/GATKToolWorkflows_3.6/CountLoci_3.6.wdl | 55 +++++ .../gatk/GATKToolWorkflows_3.6/CountMales_3.6.wdl | 51 +++++ .../GATKToolWorkflows_3.6/CountRODsByRef_3.6.wdl | 54 +++++ .../gatk/GATKToolWorkflows_3.6/CountRODs_3.6.wdl | 61 +++++ .../GATKToolWorkflows_3.6/CountReadEvents_3.6.wdl | 51 +++++ .../gatk/GATKToolWorkflows_3.6/CountReads_3.6.wdl | 50 +++++ .../CountTerminusEvent_3.6.wdl | 51 +++++ .../GATKToolWorkflows_3.6/DepthOfCoverage_3.6.wdl | 116 ++++++++++ .../GATKToolWorkflows_3.6/DiagnoseTargets_3.6.wdl | 84 +++++++ .../gatk/GATKToolWorkflows_3.6/DiffObjects_3.6.wdl | 75 +++++++ .../ErrorRatePerCycle_3.6.wdl | 57 +++++ .../gatk/GATKToolWorkflows_3.6/FastaStats_3.6.wdl | 48 ++++ .../FindCoveredIntervals_3.6.wdl | 87 ++++++++ .../gatk/GATKToolWorkflows_3.6/FlagStat_3.6.wdl | 53 +++++ .../GATKPaperGenotyper_3.6.wdl | 56 +++++ .../GCContentByInterval_3.6.wdl | 48 ++++ .../GenotypeConcordance_3.6.wdl | 69 ++++++ .../GATKToolWorkflows_3.6/GenotypeGVCFs_3.6.wdl | 92 ++++++++ .../GATKToolWorkflows_3.6/HaplotypeCaller_3.6.wdl | 227 +++++++++++++++++++ .../HaplotypeResolver_3.6.wdl | 57 +++++ .../GATKToolWorkflows_3.6/IndelRealigner_3.6.wdl | 90 ++++++++ .../LeftAlignAndTrimVariants_3.6.wdl | 60 +++++ .../GATKToolWorkflows_3.6/LeftAlignIndels_3.6.wdl | 51 +++++ .../gatk/GATKToolWorkflows_3.6/MuTect2_3.6.wdl | 248 +++++++++++++++++++++ .../PhaseByTransmission_3.6.wdl | 63 ++++++ .../gatk/GATKToolWorkflows_3.6/Pileup_3.6.wdl | 61 +++++ .../gatk/GATKToolWorkflows_3.6/PrintRODs_3.6.wdl | 48 ++++ .../gatk/GATKToolWorkflows_3.6/PrintReads_3.6.wdl | 74 ++++++ .../QualifyMissingIntervals_3.6.wdl | 74 ++++++ .../RandomlySplitVariants_3.6.wdl | 66 ++++++ .../ReadBackedPhasing_3.6.wdl | 81 +++++++ .../ReadClippingStats_3.6.wdl | 57 +++++ .../ReadGroupProperties_3.6.wdl | 51 +++++ .../ReadLengthDistribution_3.6.wdl | 51 +++++ .../RealignerTargetCreator_3.6.wdl | 68 ++++++ .../RegenotypeVariants_3.6.wdl | 53 +++++ .../GATKToolWorkflows_3.6/SelectHeaders_3.6.wdl | 65 ++++++ .../GATKToolWorkflows_3.6/SelectVariants_3.6.wdl | 161 +++++++++++++ .../SimulateReadsForVariants_3.6.wdl | 66 ++++++ .../GATKToolWorkflows_3.6/SplitNCigarReads_3.6.wdl | 66 ++++++ .../GATKToolWorkflows_3.6/SplitSamFile_3.6.wdl | 51 +++++ .../GATKToolWorkflows_3.6/UnifiedGenotyper_3.6.wdl | 151 +++++++++++++ .../GATKToolWorkflows_3.6/ValidateVariants_3.6.wdl | 63 ++++++ .../ValidationSiteSelector_3.6.wdl | 84 +++++++ .../GATKToolWorkflows_3.6/VariantAnnotator_3.6.wdl | 95 ++++++++ .../gatk/GATKToolWorkflows_3.6/VariantEval_3.6.wdl | 116 ++++++++++ .../VariantFiltration_3.6.wdl | 96 ++++++++ .../VariantRecalibrator_3.6.wdl | 134 +++++++++++ .../VariantsToAllelicPrimitives_3.6.wdl | 51 +++++ .../VariantsToBinaryPed_3.6.wdl | 75 +++++++ .../GATKToolWorkflows_3.6/VariantsToTable_3.6.wdl | 72 ++++++ .../GATKToolWorkflows_3.6/VariantsToVCF_3.6.wdl | 57 +++++ 66 files changed, 5075 insertions(+) create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/ASEReadCounter_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/ApplyRecalibration_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/BaseRecalibrator_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CalculateGenotypePosteriors_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CallableLoci_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CatVariants_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CheckPileup_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/ClipReads_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CombineGVCFs_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CombineVariants_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CompareCallableLoci_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/ContEst_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountBases_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountIntervals_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountLoci_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountMales_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountRODsByRef_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountRODs_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountReadEvents_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountReads_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountTerminusEvent_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/DepthOfCoverage_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/DiagnoseTargets_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/DiffObjects_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/ErrorRatePerCycle_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/FastaStats_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/FindCoveredIntervals_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/FlagStat_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/GATKPaperGenotyper_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/GCContentByInterval_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/GenotypeConcordance_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/GenotypeGVCFs_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/HaplotypeCaller_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/HaplotypeResolver_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/IndelRealigner_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/LeftAlignAndTrimVariants_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/LeftAlignIndels_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/MuTect2_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/PhaseByTransmission_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/Pileup_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/PrintRODs_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/PrintReads_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/QualifyMissingIntervals_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/RandomlySplitVariants_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadBackedPhasing_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadClippingStats_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadGroupProperties_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadLengthDistribution_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/RealignerTargetCreator_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/RegenotypeVariants_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/SelectHeaders_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/SelectVariants_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/SimulateReadsForVariants_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/SplitNCigarReads_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/SplitSamFile_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/UnifiedGenotyper_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/ValidateVariants_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/ValidationSiteSelector_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantAnnotator_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantEval_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantFiltration_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantRecalibrator_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToAllelicPrimitives_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToBinaryPed_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToTable_3.6.wdl create mode 100644 scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToVCF_3.6.wdl diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ASEReadCounter_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ASEReadCounter_3.6.wdl new file mode 100644 index 0000000..426a052 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ASEReadCounter_3.6.wdl @@ -0,0 +1,72 @@ +# -------------------------------------------------------------------------------------------- +# This ASEReadCounter WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Calculate read counts per allele for allele-specific expression analysis +# -------------------------------------------------------------------------------------------- + +task ASEReadCounter { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + String unsafe + String ? countOverlapReadsType + String ? minBaseQuality + Int ? minDepthOfNonFilteredBase + Int ? minMappingQuality + String ? out + String ? outputFormat + String sitesVCFFile + + command { + java -jar ${gatk} \ + -T ASEReadCounter \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + --unsafe ${unsafe} \ + -overlap ${default="COUNT_FRAGMENTS_REQUIRE_SAME_BASE" countOverlapReadsType} \ + -mbq ${default="0" minBaseQuality} \ + -minDepth ${default="-1" minDepthOfNonFilteredBase} \ + -mmq ${default="0" minMappingQuality} \ + -o ${default="stdout" out} \ + outputFormat ${default="RTABLE" outputFormat} \ + -sites ${sitesVCFFile} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + countOverlapReadsType: "Handling of overlapping reads from the same fragment" + minBaseQuality: "Minimum base quality" + minDepthOfNonFilteredBase: "Minimum number of bases that pass filters" + minMappingQuality: "Minimum read mapping quality" + out: "An output file created by the walker. Will overwrite contents if file exists" + outputFormat: "Format of the output file, can be CSV, TABLE, RTABLE" + sitesVCFFile: "Undocumented option" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + unsafe: "Enable unsafe operations: nothing will be checked at runtime" + } +} + +workflow ASEReadCounterWf { + call ASEReadCounter +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ApplyRecalibration_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ApplyRecalibration_3.6.wdl new file mode 100644 index 0000000..f60616f --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ApplyRecalibration_3.6.wdl @@ -0,0 +1,80 @@ +# -------------------------------------------------------------------------------------------- +# This ApplyRecalibration WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Apply a score cutoff to filter variants based on a recalibration table +# -------------------------------------------------------------------------------------------- + +task ApplyRecalibration { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Int ? ntVal + Boolean ? excludeFiltered + Boolean ? ignore_all_filters + String ? ignore_filter + Array[String] task_input + Float ? lodCutoff + String ? mode + String ? out + String recal_file + File ? tranches_file + Float ? ts_filter_level + Boolean ? useAlleleSpecificAnnotations + + command { + java -jar ${gatk} \ + -T ApplyRecalibration \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nt" + ntVal} \ + -ef ${default="false" excludeFiltered} \ + -ignoreAllFilters ${default="false" ignore_all_filters} \ + ${default="" "-ignoreFilter " + ignore_filter} \ + -input ${task_input} \ + ${default="" "-lodCutoff " + lodCutoff} \ + -mode ${default="SNP" mode} \ + -o ${default="stdout" out} \ + -recalFile ${recal_file} \ + ${default="" "-tranchesFile " + tranches_file} \ + ${default="" "-ts_filter_level " + ts_filter_level} \ + -AS ${default="false" useAlleleSpecificAnnotations} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + excludeFiltered: "Don't output filtered loci after applying the recalibration" + ignore_all_filters: "If specified, the variant recalibrator will ignore all input filters. Useful to rerun the VQSR from a filtered output file." + ignore_filter: "If specified, the recalibration will be applied to variants marked as filtered by the specified filter name in the input VCF file" + task_input: "The raw input variants to be recalibrated" + lodCutoff: "The VQSLOD score below which to start filtering" + mode: "Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously." + out: "The output filtered and recalibrated VCF file in which each variant is annotated with its VQSLOD value" + recal_file: "The input recal file used by ApplyRecalibration" + tranches_file: "The input tranches file describing where to cut the data" + ts_filter_level: "The truth sensitivity level at which to start filtering" + useAlleleSpecificAnnotations: "If specified, the tool will attempt to apply a filter to each allele based on the input tranches and allele-specific .recal file." + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow ApplyRecalibrationWf { + call ApplyRecalibration +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/BaseRecalibrator_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/BaseRecalibrator_3.6.wdl new file mode 100644 index 0000000..d044319 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/BaseRecalibrator_3.6.wdl @@ -0,0 +1,113 @@ +# -------------------------------------------------------------------------------------------- +# This BaseRecalibrator WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Detect systematic errors in base quality scores +# -------------------------------------------------------------------------------------------- + +task BaseRecalibrator { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + File ? BQSR + Int ? nctVal + String ? binary_tag_name + Float ? bqsrBAQGapOpenPenalty + String ? covariate + String ? deletions_default_quality + Int ? indels_context_size + String ? insertions_default_quality + Array[String] ? knownSites + Boolean ? list + String ? low_quality_tail + Boolean ? lowMemoryMode + Int ? maximum_cycle_value + Int ? mismatches_context_size + String ? mismatches_default_quality + Boolean ? no_standard_covs + File out + Int ? quantizing_levels + Boolean ? run_without_dbsnp_potentially_ruining_quality + String ? solid_nocall_strategy + String ? solid_recal_mode + Boolean ? sort_by_all_columns + + command { + java -jar ${gatk} \ + -T BaseRecalibrator \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="" "--BQSR " + BQSR} \ + ${default="" "-nct" + nctVal} \ + ${default="" "-bintag " + binary_tag_name} \ + -bqsrBAQGOP ${default="40.0" bqsrBAQGapOpenPenalty} \ + ${default="" "-cov " + covariate} \ + -ddq ${default="45" deletions_default_quality} \ + -ics ${default="3" indels_context_size} \ + -idq ${default="45" insertions_default_quality} \ + -knownSites ${default="[]" knownSites} \ + -ls ${default="false" list} \ + -lqt ${default="2" low_quality_tail} \ + -lowMemoryMode ${default="false" lowMemoryMode} \ + -maxCycle ${default="500" maximum_cycle_value} \ + -mcs ${default="2" mismatches_context_size} \ + -mdq ${default="-1" mismatches_default_quality} \ + -noStandard ${default="false" no_standard_covs} \ + -o ${out} \ + -ql ${default="16" quantizing_levels} \ + -run_without_dbsnp_potentially_ruining_quality ${default="false" run_without_dbsnp_potentially_ruining_quality} \ + -solid_nocall_strategy ${default="THROW_EXCEPTION" solid_nocall_strategy} \ + -sMode ${default="SET_Q_ZERO" solid_recal_mode} \ + -sortAllCols ${default="false" sort_by_all_columns} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + binary_tag_name: "the binary tag covariate name if using it" + bqsrBAQGapOpenPenalty: "BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets" + covariate: "One or more covariates to be used in the recalibration. Can be specified multiple times" + deletions_default_quality: "default quality for the base deletions covariate" + indels_context_size: "Size of the k-mer context to be used for base insertions and deletions" + insertions_default_quality: "default quality for the base insertions covariate" + knownSites: "A database of known polymorphic sites" + list: "List the available covariates and exit" + low_quality_tail: "minimum quality for the bases in the tail of the reads to be considered" + lowMemoryMode: "Reduce memory usage in multi-threaded code at the expense of threading efficiency" + maximum_cycle_value: "The maximum cycle value permitted for the Cycle covariate" + mismatches_context_size: "Size of the k-mer context to be used for base mismatches" + mismatches_default_quality: "default quality for the base mismatches covariate" + no_standard_covs: "Do not use the standard set of covariates, but rather just the ones listed using the -cov argument" + out: "The output recalibration table file to create" + quantizing_levels: "number of distinct quality scores in the quantized output" + run_without_dbsnp_potentially_ruining_quality: "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only." + solid_nocall_strategy: "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ" + solid_recal_mode: "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS" + sort_by_all_columns: "Sort the rows in the tables of reports" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + BQSR: "Input covariates table file for on-the-fly base quality score recalibration" + } +} + +workflow BaseRecalibratorWf { + call BaseRecalibrator +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CalculateGenotypePosteriors_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CalculateGenotypePosteriors_3.6.wdl new file mode 100644 index 0000000..c742685 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CalculateGenotypePosteriors_3.6.wdl @@ -0,0 +1,78 @@ +# -------------------------------------------------------------------------------------------- +# This CalculateGenotypePosteriors WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Calculate genotype posterior likelihoods given panel data +# -------------------------------------------------------------------------------------------- + +task CalculateGenotypePosteriors { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Boolean ? defaultToAC + Float ? deNovoPrior + Boolean ? discoveredACpriorsOff + Float ? globalPrior + Boolean ? ignoreInputSamples + Int ? numRefSamplesIfNoCall + String ? out + Boolean ? skipFamilyPriors + Boolean ? skipPopulationPriors + Array[String] ? supporting + String variant + + command { + java -jar ${gatk} \ + -T CalculateGenotypePosteriors \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -useAC ${default="false" defaultToAC} \ + -DNP ${default="1.0E-6" deNovoPrior} \ + -useACoff ${default="false" discoveredACpriorsOff} \ + -G ${default="0.001" globalPrior} \ + -ext ${default="false" ignoreInputSamples} \ + -nrs ${default="0" numRefSamplesIfNoCall} \ + -o ${default="stdout" out} \ + -skipFam ${default="false" skipFamilyPriors} \ + -skipPop ${default="false" skipPopulationPriors} \ + -supporting ${default="[]" supporting} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + defaultToAC: "Use the AC field as opposed to MLEAC. Does nothing if VCF lacks MLEAC field" + deNovoPrior: "The de novo mutation prior" + discoveredACpriorsOff: "Do not use discovered allele count in the input callset for variants that do not appear in the external callset. " + globalPrior: "The global Dirichlet prior parameters for the allele frequency" + ignoreInputSamples: "Use external information only; do not inform genotype priors by the discovered allele frequency in the callset whose posteriors are being calculated. Useful for callsets containing related individuals." + numRefSamplesIfNoCall: "The number of homozygous reference to infer were seen at a position where an other callset contains no site or genotype information" + out: "File to which variants should be written" + skipFamilyPriors: "Skip application of family-based priors" + skipPopulationPriors: "Skip application of population-based priors" + supporting: "Other callsets to use in generating genotype posteriors" + variant: "Input VCF file" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CalculateGenotypePosteriorsWf { + call CalculateGenotypePosteriors +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CallableLoci_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CallableLoci_3.6.wdl new file mode 100644 index 0000000..c3e0bab --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CallableLoci_3.6.wdl @@ -0,0 +1,78 @@ +# -------------------------------------------------------------------------------------------- +# This CallableLoci WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Collect statistics on callable, uncallable, poorly mapped, and other parts of the genome +# -------------------------------------------------------------------------------------------- + +task CallableLoci { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + String ? format + Int ? maxDepth + Float ? maxFractionOfReadsWithLowMAPQ + String ? maxLowMAPQ + String ? minBaseQuality + Int ? minDepth + Int ? minDepthForLowMAPQ + String ? minMappingQuality + String ? out + File summary + + command { + java -jar ${gatk} \ + -T CallableLoci \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + -format ${default="BED" format} \ + -maxDepth ${default="-1" maxDepth} \ + -frlmq ${default="0.1" maxFractionOfReadsWithLowMAPQ} \ + -mlmq ${default="1" maxLowMAPQ} \ + -mbq ${default="20" minBaseQuality} \ + -minDepth ${default="4" minDepth} \ + -mdflmq ${default="10" minDepthForLowMAPQ} \ + -mmq ${default="10" minMappingQuality} \ + -o ${default="stdout" out} \ + -summary ${summary} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + format: "Output format" + maxDepth: "Maximum read depth before a locus is considered poorly mapped" + maxFractionOfReadsWithLowMAPQ: "If the fraction of reads at a base with low mapping quality exceeds this value, the site may be poorly mapped" + maxLowMAPQ: "Maximum value for MAPQ to be considered a problematic mapped read." + minBaseQuality: "Minimum quality of bases to count towards depth." + minDepth: "Minimum QC+ read depth before a locus is considered callable" + minDepthForLowMAPQ: "Minimum read depth before a locus is considered a potential candidate for poorly mapped" + minMappingQuality: "Minimum mapping quality of reads to count towards depth." + out: "An output file created by the walker. Will overwrite contents if file exists" + summary: "Name of file for output summary" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CallableLociWf { + call CallableLoci +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CatVariants_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CatVariants_3.6.wdl new file mode 100644 index 0000000..ec68ab9 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CatVariants_3.6.wdl @@ -0,0 +1,72 @@ +# -------------------------------------------------------------------------------------------- +# This CatVariants WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Concatenate VCF files of non-overlapping genome intervals, all with the same set of samples +# -------------------------------------------------------------------------------------------- + +task CatVariants { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Boolean ? assumeSorted + Boolean ? help + String ? log_to_file + String ? logging_level + File outputFile + File reference + Array[File] variant + Int ? variant_index_parameter + String ? variant_index_type + Boolean ? version + + command { + java -jar ${gatk} \ + -T CatVariants \ + -R ${ref} \ + -assumeSorted ${default="false" assumeSorted} \ + -h ${default="false" help} \ + ${default="" "-log " + log_to_file} \ + -l ${default="INFO" logging_level} \ + -out ${outputFile} \ + -R ${reference} \ + -V ${variant} \ + variant_index_parameter ${default="-1" variant_index_parameter} \ + variant_index_type ${default="DYNAMIC_SEEK" variant_index_type} \ + -version ${default="false" version} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + assumeSorted: "assumeSorted should be true if the input files are already sorted (based on the position of the variants)" + help: "Generate the help message" + log_to_file: "Set the logging location" + logging_level: "Set the minimum level of logging" + outputFile: "output file" + reference: "genome reference file .fasta" + variant: "Input VCF file/s" + variant_index_parameter: "the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator" + variant_index_type: "which type of IndexCreator to use for VCF/BCF indices" + version: "Output version information" + } +} + +workflow CatVariantsWf { + call CatVariants +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CheckPileup_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CheckPileup_3.6.wdl new file mode 100644 index 0000000..4c0beda --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CheckPileup_3.6.wdl @@ -0,0 +1,59 @@ +# -------------------------------------------------------------------------------------------- +# This CheckPileup WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Compare GATK's internal pileup to a reference Samtools pileup +# -------------------------------------------------------------------------------------------- + +task CheckPileup { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Int ? ntVal + Boolean ? continue_after_error + String ? out + String pileup + + command { + java -jar ${gatk} \ + -T CheckPileup \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nt" + ntVal} \ + continue_after_error ${default="false" continue_after_error} \ + -o ${default="stdout" out} \ + -pileup ${pileup} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + continue_after_error: "Continue after encountering an error" + out: "An output file created by the walker. Will overwrite contents if file exists" + pileup: "Pileup generated by Samtools" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CheckPileupWf { + call CheckPileup +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ClipReads_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ClipReads_3.6.wdl new file mode 100644 index 0000000..7ea5581 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ClipReads_3.6.wdl @@ -0,0 +1,69 @@ +# -------------------------------------------------------------------------------------------- +# This ClipReads WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Read clipping based on quality, position or sequence matching +# -------------------------------------------------------------------------------------------- + +task ClipReads { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + String ? clipRepresentation + String ? clipSequence + String ? clipSequencesFile + String ? cyclesToTrim + String ? out + String ? outputStatistics + Int ? qTrimmingThreshold + + command { + java -jar ${gatk} \ + -T ClipReads \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + -CR ${default="WRITE_NS" clipRepresentation} \ + ${default="" "-X " + clipSequence} \ + ${default="" "-XF " + clipSequencesFile} \ + ${default="" "-CT " + cyclesToTrim} \ + -o ${default="stdout" out} \ + ${default="" "-os " + outputStatistics} \ + -QT ${default="-1" qTrimmingThreshold} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + clipRepresentation: "How should we actually clip the bases?" + clipSequence: "Remove sequences within reads matching this sequence" + clipSequencesFile: "Remove sequences within reads matching the sequences in this FASTA file" + cyclesToTrim: "String indicating machine cycles to clip from the reads" + out: "Write BAM output here" + outputStatistics: "File to output statistics" + qTrimmingThreshold: "If provided, the Q-score clipper will be applied" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow ClipReadsWf { + call ClipReads +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CombineGVCFs_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CombineGVCFs_3.6.wdl new file mode 100644 index 0000000..3112608 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CombineGVCFs_3.6.wdl @@ -0,0 +1,66 @@ +# -------------------------------------------------------------------------------------------- +# This CombineGVCFs WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Combine per-sample gVCF files produced by HaplotypeCaller into a multi-sample gVCF file +# -------------------------------------------------------------------------------------------- + +task CombineGVCFs { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Array[String] ? annotation + Int ? breakBandsAtMultiplesOf + Boolean ? convertToBasePairResolution + String ? dbsnp + String ? group + String ? out + Array[String] variant + + command { + java -jar ${gatk} \ + -T CombineGVCFs \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -A ${default="[AS_RMSMappingQuality]" annotation} \ + -breakBandsAtMultiplesOf ${default="0" breakBandsAtMultiplesOf} \ + -bpResolution ${default="false" convertToBasePairResolution} \ + ${default="" "-D " + dbsnp} \ + -G ${default="[StandardAnnotation]" group} \ + -o ${default="stdout" out} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + annotation: "One or more specific annotations to recompute. The single value 'none' removes the default annotations" + breakBandsAtMultiplesOf: "If > 0, reference bands will be broken up at genomic positions that are multiples of this number" + convertToBasePairResolution: "If specified, convert banded gVCFs to all-sites gVCFs" + dbsnp: "dbSNP file" + group: "One or more classes/groups of annotations to apply to variant calls" + out: "File to which the combined gVCF should be written" + variant: "One or more input gVCF files" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CombineGVCFsWf { + call CombineGVCFs +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CombineVariants_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CombineVariants_3.6.wdl new file mode 100644 index 0000000..ad2a8bb --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CombineVariants_3.6.wdl @@ -0,0 +1,89 @@ +# -------------------------------------------------------------------------------------------- +# This CombineVariants WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Combine variant records from different sources +# -------------------------------------------------------------------------------------------- + +task CombineVariants { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Int ? ntVal + Boolean ? assumeIdenticalSamples + Boolean ? excludeNonVariants + Boolean ? filteredAreUncalled + String ? filteredrecordsmergetype + String ? genotypemergeoption + Boolean ? mergeInfoWithMaxAC + Boolean ? minimalVCF + Int ? minimumN + String ? out + Boolean ? printComplexMerges + String ? rod_priority_list + String ? setKey + Boolean ? suppressCommandLineHeader + Array[String] variant + + command { + java -jar ${gatk} \ + -T CombineVariants \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nt" + ntVal} \ + -assumeIdenticalSamples ${default="false" assumeIdenticalSamples} \ + -env ${default="false" excludeNonVariants} \ + -filteredAreUncalled ${default="false" filteredAreUncalled} \ + -filteredRecordsMergeType ${default="KEEP_IF_ANY_UNFILTERED" filteredrecordsmergetype} \ + ${default="" "-genotypeMergeOptions " + genotypemergeoption} \ + -mergeInfoWithMaxAC ${default="false" mergeInfoWithMaxAC} \ + -minimalVCF ${default="false" minimalVCF} \ + -minN ${default="1" minimumN} \ + -o ${default="stdout" out} \ + -printComplexMerges ${default="false" printComplexMerges} \ + ${default="" "-priority " + rod_priority_list} \ + -setKey ${default="set" setKey} \ + -suppressCommandLineHeader ${default="false" suppressCommandLineHeader} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + assumeIdenticalSamples: "Assume input VCFs have identical sample sets and disjoint calls" + excludeNonVariants: "Exclude sites where no variation is present after merging" + filteredAreUncalled: "Treat filtered variants as uncalled" + filteredrecordsmergetype: "Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields" + genotypemergeoption: "Determines how we should merge genotype records for samples shared across the ROD files" + mergeInfoWithMaxAC: "Use the INFO content of the record with the highest AC" + minimalVCF: "Emit a sites-only file" + minimumN: "Minimum number of input files the site must be observed in to be included" + out: "File to which variants should be written" + printComplexMerges: "Emit interesting sites requiring complex compatibility merging to file" + rod_priority_list: "Ordered list specifying priority for merging" + setKey: "Key name for the set attribute" + suppressCommandLineHeader: "Do not output the command line to the header" + variant: "VCF files to merge together" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CombineVariantsWf { + call CombineVariants +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CompareCallableLoci_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CompareCallableLoci_3.6.wdl new file mode 100644 index 0000000..b6c4493 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CompareCallableLoci_3.6.wdl @@ -0,0 +1,57 @@ +# -------------------------------------------------------------------------------------------- +# This CompareCallableLoci WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Compare callability statistics +# -------------------------------------------------------------------------------------------- + +task CompareCallableLoci { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + String comp1 + String comp2 + String ? out + String ? printstate + + command { + java -jar ${gatk} \ + -T CompareCallableLoci \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -comp1 ${comp1} \ + -comp2 ${comp2} \ + -o ${default="stdout" out} \ + ${default="" "-printState " + printstate} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + comp1: "First comparison track name" + comp2: "Second comparison track name" + out: "An output file created by the walker. Will overwrite contents if file exists" + printstate: "If provided, prints sites satisfying this state pair" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CompareCallableLociWf { + call CompareCallableLoci +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ContEst_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ContEst_3.6.wdl new file mode 100644 index 0000000..5710894 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ContEst_3.6.wdl @@ -0,0 +1,99 @@ +# -------------------------------------------------------------------------------------------- +# This ContEst WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Estimate cross-sample contamination +# -------------------------------------------------------------------------------------------- + +task ContEst { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Boolean input_fileeval + Boolean ? input_filegenotype + Array[String] ? intervals + String ? base_report + Float ? beta_threshold + String ? genotype_mode + String ? genotypes + String ? lane_level_contamination + String ? likelihood_file + Int ? min_mapq + Int ? min_qscore + Int ? minimum_base_count + String ? out + String popfile + String ? population + Float ? precision + String ? sample_name + Float ? trim_fraction + Boolean ? verify_sample + + command { + java -jar ${gatk} \ + -T ContEst \ + -R ${ref} \ + --input_file:eval ${input_fileeval} \ + ${default="" "--input_file:genotype " + input_filegenotype} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-br " + base_report} \ + beta_threshold ${default="0.95" beta_threshold} \ + -gm ${default="HARD_THRESHOLD" genotype_mode} \ + ${default="" "-genotypes " + genotypes} \ + ${default="" "-llc " + lane_level_contamination} \ + ${default="" "-lf " + likelihood_file} \ + min_mapq ${default="20" min_mapq} \ + min_qscore ${default="20" min_qscore} \ + -mbc ${default="500" minimum_base_count} \ + -o ${default="stdout" out} \ + -pf ${popfile} \ + -population ${default="CEU" population} \ + -pc ${default="0.1" precision} \ + -sn ${default="unknown" sample_name} \ + trim_fraction ${default="0.01" trim_fraction} \ + -vs ${default="false" verify_sample} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + base_report: "Where to write a full report about the loci we processed" + beta_threshold: "threshold for p(f>=0.5) to trim" + genotype_mode: "which approach should we take to getting the genotypes (only in array-free mode)" + genotypes: "the genotype information for our sample" + lane_level_contamination: "set to META (default), SAMPLE or READGROUP to produce per-bam, per-sample or per-lane estimates" + likelihood_file: "write the likelihood values to the specified location" + min_mapq: "threshold for minimum mapping quality score" + min_qscore: "threshold for minimum base quality score" + minimum_base_count: "what minimum number of bases do we need to see to call contamination in a lane / sample?" + out: "An output file created by the walker. Will overwrite contents if file exists" + popfile: "the variant file containing information about the population allele frequencies" + population: "evaluate contamination for just a single contamination population" + precision: "the degree of precision to which the contamination tool should estimate (e.g. the bin size)" + sample_name: "The sample name; used to extract the correct genotypes from mutli-sample truth vcfs" + trim_fraction: "at most, what fraction of sites should be trimmed based on BETA_THRESHOLD" + verify_sample: "should we verify that the sample name is in the genotypes file?" + input_fileeval: "Output version information" + input_filegenotype: "Output version information" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow ContEstWf { + call ContEst +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountBases_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountBases_3.6.wdl new file mode 100644 index 0000000..fc6254c --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountBases_3.6.wdl @@ -0,0 +1,48 @@ +# -------------------------------------------------------------------------------------------- +# This CountBases WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Count the number of bases in a set of reads +# -------------------------------------------------------------------------------------------- + +task CountBases { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + + command { + java -jar ${gatk} \ + -T CountBases \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CountBasesWf { + call CountBases +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountIntervals_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountIntervals_3.6.wdl new file mode 100644 index 0000000..7e1c7a6 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountIntervals_3.6.wdl @@ -0,0 +1,54 @@ +# -------------------------------------------------------------------------------------------- +# This CountIntervals WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Count contiguous regions in an interval list +# -------------------------------------------------------------------------------------------- + +task CountIntervals { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Array[String] ? check + Int ? numOverlaps + String ? out + + command { + java -jar ${gatk} \ + -T CountIntervals \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -check ${default="[]" check} \ + -no ${default="2" numOverlaps} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + check: "Any number of RODs" + numOverlaps: "Count all occurrences of X or more overlapping intervals; defaults to 2" + out: "An output file created by the walker. Will overwrite contents if file exists" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CountIntervalsWf { + call CountIntervals +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountLoci_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountLoci_3.6.wdl new file mode 100644 index 0000000..4c4dc08 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountLoci_3.6.wdl @@ -0,0 +1,55 @@ +# -------------------------------------------------------------------------------------------- +# This CountLoci WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Count the total number of covered loci +# -------------------------------------------------------------------------------------------- + +task CountLoci { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Int ? nctVal + Int ? ntVal + String ? out + + command { + java -jar ${gatk} \ + -T CountLoci \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nct" + nctVal} \ + ${default="" "-nt" + ntVal} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + out: "An output file created by the walker. Will overwrite contents if file exists" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CountLociWf { + call CountLoci +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountMales_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountMales_3.6.wdl new file mode 100644 index 0000000..4f61edf --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountMales_3.6.wdl @@ -0,0 +1,51 @@ +# -------------------------------------------------------------------------------------------- +# This CountMales WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Count the number of reads seen from male samples +# -------------------------------------------------------------------------------------------- + +task CountMales { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + String ? out + + command { + java -jar ${gatk} \ + -T CountMales \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + out: "An output file created by the walker. Will overwrite contents if file exists" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CountMalesWf { + call CountMales +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountRODsByRef_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountRODsByRef_3.6.wdl new file mode 100644 index 0000000..46d4704 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountRODsByRef_3.6.wdl @@ -0,0 +1,54 @@ +# -------------------------------------------------------------------------------------------- +# This CountRODsByRef WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Count the number of ROD objects encountered along the reference +# -------------------------------------------------------------------------------------------- + +task CountRODsByRef { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Array[String] ? rod + Boolean ? showSkipped + Boolean ? verbose + + command { + java -jar ${gatk} \ + -T CountRODsByRef \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -rod ${default="[]" rod} \ + -s ${default="false" showSkipped} \ + -v ${default="false" verbose} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + rod: "Input VCF file(s)" + showSkipped: "If true, this tool will print out the skipped locations" + verbose: "If true, this tool will print out detailed information about the rods it finds and locations" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CountRODsByRefWf { + call CountRODsByRef +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountRODs_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountRODs_3.6.wdl new file mode 100644 index 0000000..abd5e76 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountRODs_3.6.wdl @@ -0,0 +1,61 @@ +# -------------------------------------------------------------------------------------------- +# This CountRODs WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Count the number of ROD objects encountered +# -------------------------------------------------------------------------------------------- + +task CountRODs { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Int ? nctVal + Int ? ntVal + String ? out + Array[String] rod + Boolean ? showSkipped + Boolean ? verbose + + command { + java -jar ${gatk} \ + -T CountRODs \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nct" + nctVal} \ + ${default="" "-nt" + ntVal} \ + -o ${default="stdout" out} \ + -rod ${rod} \ + -s ${default="false" showSkipped} \ + -v ${default="false" verbose} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + out: "An output file created by the walker. Will overwrite contents if file exists" + rod: "Input VCF file(s)" + showSkipped: "If true, this tool will print out the skipped locations" + verbose: "If true, this tool will print out detailed information about the rods it finds and locations" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CountRODsWf { + call CountRODs +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountReadEvents_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountReadEvents_3.6.wdl new file mode 100644 index 0000000..700142d --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountReadEvents_3.6.wdl @@ -0,0 +1,51 @@ +# -------------------------------------------------------------------------------------------- +# This CountReadEvents WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Count the number of read events +# -------------------------------------------------------------------------------------------- + +task CountReadEvents { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + String ? out + + command { + java -jar ${gatk} \ + -T CountReadEvents \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + out: "An output file created by the walker. Will overwrite contents if file exists" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CountReadEventsWf { + call CountReadEvents +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountReads_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountReads_3.6.wdl new file mode 100644 index 0000000..30d771d --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountReads_3.6.wdl @@ -0,0 +1,50 @@ +# -------------------------------------------------------------------------------------------- +# This CountReads WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Count the number of reads +# -------------------------------------------------------------------------------------------- + +task CountReads { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Int ? nctVal + + command { + java -jar ${gatk} \ + -T CountReads \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nct" + nctVal} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CountReadsWf { + call CountReads +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountTerminusEvent_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountTerminusEvent_3.6.wdl new file mode 100644 index 0000000..ea53c5c --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/CountTerminusEvent_3.6.wdl @@ -0,0 +1,51 @@ +# -------------------------------------------------------------------------------------------- +# This CountTerminusEvent WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Count the number of reads ending in insertions, deletions or soft-clips +# -------------------------------------------------------------------------------------------- + +task CountTerminusEvent { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + String ? out + + command { + java -jar ${gatk} \ + -T CountTerminusEvent \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + out: "An output file created by the walker. Will overwrite contents if file exists" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow CountTerminusEventWf { + call CountTerminusEvent +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/DepthOfCoverage_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/DepthOfCoverage_3.6.wdl new file mode 100644 index 0000000..baf0188 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/DepthOfCoverage_3.6.wdl @@ -0,0 +1,116 @@ +# -------------------------------------------------------------------------------------------- +# This DepthOfCoverage WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Assess sequence coverage by a wide array of metrics, partitioned by sample, read group, or library +# -------------------------------------------------------------------------------------------- + +task DepthOfCoverage { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Int ? ntVal + File ? calculateCoverageOverGenes + String ? countType + Boolean ? ignoreDeletionSites + Boolean ? includeDeletions + Boolean ? includeRefNSites + String ? maxBaseQuality + Int ? maxMappingQuality + String ? minBaseQuality + Int ? minMappingQuality + Int ? nBins + Boolean ? omitDepthOutputAtEachBase + Boolean ? omitIntervalStatistics + Boolean ? omitLocusTable + Boolean ? omitPerSampleStats + String ? out + String ? outputFormat + String ? partitionType + Boolean ? printBaseCounts + Boolean ? printBinEndpointsAndExit + Int ? start + Int ? stop + String ? summaryCoverageThreshold + + command { + java -jar ${gatk} \ + -T DepthOfCoverage \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nt" + ntVal} \ + ${default="" "-geneList " + calculateCoverageOverGenes} \ + countType ${default="COUNT_READS" countType} \ + ignoreDeletionSites ${default="false" ignoreDeletionSites} \ + -dels ${default="false" includeDeletions} \ + includeRefNSites ${default="false" includeRefNSites} \ + maxBaseQuality ${default="127" maxBaseQuality} \ + maxMappingQuality ${default="2147483647" maxMappingQuality} \ + -mbq ${default="-1" minBaseQuality} \ + -mmq ${default="-1" minMappingQuality} \ + nBins ${default="499" nBins} \ + -omitBaseOutput ${default="false" omitDepthOutputAtEachBase} \ + -omitIntervals ${default="false" omitIntervalStatistics} \ + -omitLocusTable ${default="false" omitLocusTable} \ + -omitSampleSummary ${default="false" omitPerSampleStats} \ + -o ${default="None" out} \ + outputFormat ${default="rtable" outputFormat} \ + -pt ${default="[sample]" partitionType} \ + -baseCounts ${default="false" printBaseCounts} \ + printBinEndpointsAndExit ${default="false" printBinEndpointsAndExit} \ + start ${default="1" start} \ + stop ${default="500" stop} \ + -ct ${default="[15]" summaryCoverageThreshold} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + calculateCoverageOverGenes: "Calculate coverage statistics over this list of genes" + countType: "How should overlapping reads from the same fragment be handled?" + ignoreDeletionSites: "Ignore sites consisting only of deletions" + includeDeletions: "Include information on deletions" + includeRefNSites: "Include sites where the reference is N" + maxBaseQuality: "Maximum quality of bases to count towards depth" + maxMappingQuality: "Maximum mapping quality of reads to count towards depth" + minBaseQuality: "Minimum quality of bases to count towards depth" + minMappingQuality: "Minimum mapping quality of reads to count towards depth" + nBins: "Number of bins to use for granular binning" + omitDepthOutputAtEachBase: "Do not output depth of coverage at each base" + omitIntervalStatistics: "Do not calculate per-interval statistics" + omitLocusTable: "Do not calculate per-sample per-depth counts of loci" + omitPerSampleStats: "Do not output the summary files per-sample" + out: "An output file created by the walker. Will overwrite contents if file exists" + outputFormat: "The format of the output file" + partitionType: "Partition type for depth of coverage" + printBaseCounts: "Add base counts to per-locus output" + printBinEndpointsAndExit: "Print the bin values and exit immediately" + start: "Starting (left endpoint) for granular binning" + stop: "Ending (right endpoint) for granular binning" + summaryCoverageThreshold: "Coverage threshold (in percent) for summarizing statistics" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow DepthOfCoverageWf { + call DepthOfCoverage +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/DiagnoseTargets_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/DiagnoseTargets_3.6.wdl new file mode 100644 index 0000000..9585f78 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/DiagnoseTargets_3.6.wdl @@ -0,0 +1,84 @@ +# -------------------------------------------------------------------------------------------- +# This DiagnoseTargets WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Analyze coverage distribution and validate read mates per interval and per sample +# -------------------------------------------------------------------------------------------- + +task DiagnoseTargets { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Float ? bad_mate_status_threshold + Float ? coverage_status_threshold + Float ? excessive_coverage_status_threshold + Int ? maximum_coverage + Int ? maximum_insert_size + Int ? minimum_base_quality + Int ? minimum_coverage + Int ? minimum_mapping_quality + String ? missing_intervals + String ? out + Float ? quality_status_threshold + Float ? voting_status_threshold + + command { + java -jar ${gatk} \ + -T DiagnoseTargets \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + -stBM ${default="0.5" bad_mate_status_threshold} \ + -stC ${default="0.2" coverage_status_threshold} \ + -stXC ${default="0.2" excessive_coverage_status_threshold} \ + -max ${default="1073741823" maximum_coverage} \ + -ins ${default="500" maximum_insert_size} \ + -BQ ${default="20" minimum_base_quality} \ + -min ${default="5" minimum_coverage} \ + -MQ ${default="20" minimum_mapping_quality} \ + ${default="" "-missing " + missing_intervals} \ + -o ${default="stdout" out} \ + -stQ ${default="0.5" quality_status_threshold} \ + -stV ${default="0.5" voting_status_threshold} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + bad_mate_status_threshold: "The proportion of the loci needed for calling BAD_MATE" + coverage_status_threshold: "The proportion of the loci needed for calling LOW_COVERAGE and COVERAGE_GAPS" + excessive_coverage_status_threshold: "The proportion of the loci needed for calling EXCESSIVE_COVERAGE" + maximum_coverage: "The maximum allowable coverage, used for calling EXCESSIVE_COVERAGE" + maximum_insert_size: "The maximum allowed distance between a read and its mate" + minimum_base_quality: "The minimum Base Quality that is considered for calls" + minimum_coverage: "The minimum allowable coverage, used for calling LOW_COVERAGE" + minimum_mapping_quality: "The minimum read mapping quality considered for calls" + missing_intervals: "Produces a file with the intervals that don't pass filters" + out: "File to which interval statistics should be written" + quality_status_threshold: "The proportion of the loci needed for calling POOR_QUALITY" + voting_status_threshold: "The needed proportion of samples containing a call for the interval to adopt the call " + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow DiagnoseTargetsWf { + call DiagnoseTargets +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/DiffObjects_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/DiffObjects_3.6.wdl new file mode 100644 index 0000000..0999851 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/DiffObjects_3.6.wdl @@ -0,0 +1,75 @@ +# -------------------------------------------------------------------------------------------- +# This DiffObjects WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: A generic engine for comparing tree-structured objects +# -------------------------------------------------------------------------------------------- + +task DiffObjects { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Boolean ? doPairwise + Int ? iterations + File master + Int ? maxCount1Diffs + Int ? maxDiffs + Int ? maxObjectsToRead + Int ? maxRawDiffsToSummarize + Int ? minCountForDiff + String ? out + Boolean ? showItemizedDifferences + File test + + command { + java -jar ${gatk} \ + -T DiffObjects \ + -R ${ref} \ + -doPairwise ${default="false" doPairwise} \ + iterations ${default="1" iterations} \ + -m ${master} \ + -M1 ${default="0" maxCount1Diffs} \ + -M ${default="0" maxDiffs} \ + -motr ${default="-1" maxObjectsToRead} \ + -maxRawDiffsToSummarize ${default="-1" maxRawDiffsToSummarize} \ + -MCFD ${default="1" minCountForDiff} \ + -o ${default="stdout" out} \ + -SID ${default="false" showItemizedDifferences} \ + -t ${test} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + doPairwise: "If provided, we will compute the minimum pairwise differences to summary, which can be extremely expensive" + iterations: "Number of iterations to perform, should be 1 unless you are doing memory testing" + master: "Master file: expected results" + maxCount1Diffs: "Max. number of diffs occuring exactly once in the file to process" + maxDiffs: "Max. number of diffs to process" + maxObjectsToRead: "Max. number of objects to read from the files. -1 [default] means unlimited" + maxRawDiffsToSummarize: "Max. number of differences to include in the summary. -1 [default] means unlimited" + minCountForDiff: "Min number of observations for a records to display" + out: "File to which results should be written" + showItemizedDifferences: "Should we enumerate all differences between the files?" + test: "Test file: new results to compare to the master file" + } +} + +workflow DiffObjectsWf { + call DiffObjects +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ErrorRatePerCycle_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ErrorRatePerCycle_3.6.wdl new file mode 100644 index 0000000..3bad8dd --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ErrorRatePerCycle_3.6.wdl @@ -0,0 +1,57 @@ +# -------------------------------------------------------------------------------------------- +# This ErrorRatePerCycle WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Compute the read error rate per position +# -------------------------------------------------------------------------------------------- + +task ErrorRatePerCycle { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Int ? min_base_quality_score + Int ? min_mapping_quality_score + String ? out + + command { + java -jar ${gatk} \ + -T ErrorRatePerCycle \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + -mbq ${default="0" min_base_quality_score} \ + -mmq ${default="20" min_mapping_quality_score} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + min_base_quality_score: "Minimum base quality required to consider a base for calling" + min_mapping_quality_score: "Minimum read mapping quality required to consider a read for calling" + out: "An output file created by the walker. Will overwrite contents if file exists" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow ErrorRatePerCycleWf { + call ErrorRatePerCycle +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/FastaStats_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/FastaStats_3.6.wdl new file mode 100644 index 0000000..b215b94 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/FastaStats_3.6.wdl @@ -0,0 +1,48 @@ +# -------------------------------------------------------------------------------------------- +# This FastaStats WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Calculate basic statistics about the reference sequence itself +# -------------------------------------------------------------------------------------------- + +task FastaStats { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + String ? out + + command { + java -jar ${gatk} \ + -T FastaStats \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + out: "An output file created by the walker. Will overwrite contents if file exists" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow FastaStatsWf { + call FastaStats +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/FindCoveredIntervals_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/FindCoveredIntervals_3.6.wdl new file mode 100644 index 0000000..2ee4e78 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/FindCoveredIntervals_3.6.wdl @@ -0,0 +1,87 @@ +# -------------------------------------------------------------------------------------------- +# This FindCoveredIntervals WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Outputs a list of intervals that are covered to or above a given threshold +# -------------------------------------------------------------------------------------------- + +task FindCoveredIntervals { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Float ? activeProbabilityThreshold + Int ? activeRegionExtension + Array[String] ? activeRegionIn + Int ? activeRegionMaxSize + String ? activeRegionOut + String ? activityProfileOut + Float ? bandPassSigma + Int ? coverage_threshold + Boolean ? forceActive + Int ? minBaseQuality + Int ? minMappingQuality + String ? out + Boolean ? uncovered + + command { + java -jar ${gatk} \ + -T FindCoveredIntervals \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + -ActProbThresh ${default="0.002" activeProbabilityThreshold} \ + ${default="" "-activeRegionExtension " + activeRegionExtension} \ + ${default="" "-AR " + activeRegionIn} \ + ${default="" "-activeRegionMaxSize " + activeRegionMaxSize} \ + ${default="" "-ARO " + activeRegionOut} \ + ${default="" "-APO " + activityProfileOut} \ + ${default="" "-bandPassSigma " + bandPassSigma} \ + -cov ${default="20" coverage_threshold} \ + -forceActive ${default="false" forceActive} \ + -minBQ ${default="0" minBaseQuality} \ + -minMQ ${default="0" minMappingQuality} \ + -o ${default="stdout" out} \ + -u ${default="false" uncovered} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + activeProbabilityThreshold: "Threshold for the probability of a profile state being active." + activeRegionExtension: "The active region extension; if not provided defaults to Walker annotated default" + activeRegionIn: "Use this interval list file as the active regions to process" + activeRegionMaxSize: "The active region maximum size; if not provided defaults to Walker annotated default" + activeRegionOut: "Output the active region to this IGV formatted file" + activityProfileOut: "Output the raw activity profile results in IGV format" + bandPassSigma: "The sigma of the band pass filter Gaussian kernel; if not provided defaults to Walker annotated default" + coverage_threshold: "The minimum allowable coverage to be considered covered" + forceActive: "If provided, all bases will be tagged as active" + minBaseQuality: "The minimum allowable base quality score to be counted for coverage" + minMappingQuality: "The minimum allowable mapping quality score to be counted for coverage" + out: "An output file created by the walker. Will overwrite contents if file exists" + uncovered: "output intervals that fail the coverage threshold instead" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow FindCoveredIntervalsWf { + call FindCoveredIntervals +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/FlagStat_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/FlagStat_3.6.wdl new file mode 100644 index 0000000..f5f4d49 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/FlagStat_3.6.wdl @@ -0,0 +1,53 @@ +# -------------------------------------------------------------------------------------------- +# This FlagStat WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Collect statistics about sequence reads based on their SAM flags +# -------------------------------------------------------------------------------------------- + +task FlagStat { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Int ? nctVal + String ? out + + command { + java -jar ${gatk} \ + -T FlagStat \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nct" + nctVal} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + out: "An output file created by the walker. Will overwrite contents if file exists" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow FlagStatWf { + call FlagStat +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/GATKPaperGenotyper_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/GATKPaperGenotyper_3.6.wdl new file mode 100644 index 0000000..b986460 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/GATKPaperGenotyper_3.6.wdl @@ -0,0 +1,56 @@ +# -------------------------------------------------------------------------------------------- +# This GATKPaperGenotyper WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Simple Bayesian genotyper used in the original GATK paper +# -------------------------------------------------------------------------------------------- + +task GATKPaperGenotyper { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Int ? ntVal + Float ? log_odds_score + String ? out + + command { + java -jar ${gatk} \ + -T GATKPaperGenotyper \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nt" + ntVal} \ + -LOD ${default="3.0" log_odds_score} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + log_odds_score: "The LOD threshold for us to call confidently a genotype" + out: "An output file created by the walker. Will overwrite contents if file exists" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow GATKPaperGenotyperWf { + call GATKPaperGenotyper +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/GCContentByInterval_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/GCContentByInterval_3.6.wdl new file mode 100644 index 0000000..87306b3 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/GCContentByInterval_3.6.wdl @@ -0,0 +1,48 @@ +# -------------------------------------------------------------------------------------------- +# This GCContentByInterval WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Calculates the GC content of the reference sequence for each interval +# -------------------------------------------------------------------------------------------- + +task GCContentByInterval { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + String ? out + + command { + java -jar ${gatk} \ + -T GCContentByInterval \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + out: "An output file created by the walker. Will overwrite contents if file exists" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow GCContentByIntervalWf { + call GCContentByInterval +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/GenotypeConcordance_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/GenotypeConcordance_3.6.wdl new file mode 100644 index 0000000..939bd86 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/GenotypeConcordance_3.6.wdl @@ -0,0 +1,69 @@ +# -------------------------------------------------------------------------------------------- +# This GenotypeConcordance WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Genotype concordance between two callsets +# -------------------------------------------------------------------------------------------- + +task GenotypeConcordance { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + String comp + String eval + Array[String] ? genotypeFilterExpressionComp + Array[String] ? genotypeFilterExpressionEval + Boolean ? ignoreFilters + Boolean ? moltenize + String ? out + String ? printInterestingSites + + command { + java -jar ${gatk} \ + -T GenotypeConcordance \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -comp ${comp} \ + -eval ${eval} \ + -gfc ${default="[]" genotypeFilterExpressionComp} \ + -gfe ${default="[]" genotypeFilterExpressionEval} \ + ignoreFilters ${default="false" ignoreFilters} \ + -moltenize ${default="false" moltenize} \ + -o ${default="stdout" out} \ + ${default="" "-sites " + printInterestingSites} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + comp: "The variants and genotypes to compare against" + eval: "The variants and genotypes to evaluate" + genotypeFilterExpressionComp: "One or more criteria to use to set COMP genotypes to no-call. These genotype-level filters are only applied to the COMP rod." + genotypeFilterExpressionEval: "One or more criteria to use to set EVAL genotypes to no-call. These genotype-level filters are only applied to the EVAL rod." + ignoreFilters: "Filters will be ignored" + moltenize: "Molten rather than tabular output" + out: "An output file created by the walker. Will overwrite contents if file exists" + printInterestingSites: "File to output the discordant sites and genotypes." + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow GenotypeConcordanceWf { + call GenotypeConcordance +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/GenotypeGVCFs_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/GenotypeGVCFs_3.6.wdl new file mode 100644 index 0000000..79d2880 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/GenotypeGVCFs_3.6.wdl @@ -0,0 +1,92 @@ +# -------------------------------------------------------------------------------------------- +# This GenotypeGVCFs WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Perform joint genotyping on gVCF files produced by HaplotypeCaller +# -------------------------------------------------------------------------------------------- + +task GenotypeGVCFs { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Int ? ntVal + Boolean ? annotateNDA + Array[String] ? annotation + String ? dbsnp + Array[String] ? group + Float ? heterozygosity + Boolean ? includeNonVariantSites + Float ? indel_heterozygosity + Array[Float] ? input_prior + Int ? max_alternate_alleles + Int ? max_num_PL_values + String ? out + Int ? sample_ploidy + Float ? standard_min_confidence_threshold_for_calling + Float ? standard_min_confidence_threshold_for_emitting + Array[String] variant + + command { + java -jar ${gatk} \ + -T GenotypeGVCFs \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nt" + ntVal} \ + -nda ${default="false" annotateNDA} \ + -A ${default="[]" annotation} \ + ${default="" "-D " + dbsnp} \ + -G ${default="[StandardAnnotation]" group} \ + -hets ${default="0.001" heterozygosity} \ + -allSites ${default="false" includeNonVariantSites} \ + -indelHeterozygosity ${default="1.25E-4" indel_heterozygosity} \ + -inputPrior ${default="[]" input_prior} \ + -maxAltAlleles ${default="6" max_alternate_alleles} \ + -maxNumPLValues ${default="100" max_num_PL_values} \ + -o ${default="stdout" out} \ + -ploidy ${default="2" sample_ploidy} \ + -stand_call_conf ${default="30.0" standard_min_confidence_threshold_for_calling} \ + -stand_emit_conf ${default="30.0" standard_min_confidence_threshold_for_emitting} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + annotateNDA: "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site" + annotation: "One or more specific annotations to recompute. The single value 'none' removes the default annotations" + dbsnp: "dbSNP file" + group: "One or more classes/groups of annotations to apply to variant calls" + heterozygosity: "Heterozygosity value used to compute prior likelihoods for any locus" + includeNonVariantSites: "Include loci found to be non-variant after genotyping" + indel_heterozygosity: "Heterozygosity for indel calling" + input_prior: "Input prior for calls" + max_alternate_alleles: "Maximum number of alternate alleles to genotype" + max_num_PL_values: "Maximum number of PL values to output" + out: "File to which variants should be written" + sample_ploidy: "Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)." + standard_min_confidence_threshold_for_calling: "The minimum phred-scaled confidence threshold at which variants should be called" + standard_min_confidence_threshold_for_emitting: "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)" + variant: "One or more input gVCF files" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow GenotypeGVCFsWf { + call GenotypeGVCFs +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/HaplotypeCaller_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/HaplotypeCaller_3.6.wdl new file mode 100644 index 0000000..3622a58 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/HaplotypeCaller_3.6.wdl @@ -0,0 +1,227 @@ +# -------------------------------------------------------------------------------------------- +# This HaplotypeCaller WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Call germline SNPs and indels via local re-assembly of haplotypes +# -------------------------------------------------------------------------------------------- + +task HaplotypeCaller { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + File ? BQSR + Int ? nctVal + Float ? activeProbabilityThreshold + Int ? activeRegionExtension + Array[String] ? activeRegionIn + Int ? activeRegionMaxSize + String ? activeRegionOut + String ? activityProfileOut + String ? alleles + Boolean ? allowNonUniqueKmersInRef + Boolean ? allSitePLs + Boolean ? annotateNDA + Array[String] ? annotation + String ? bamOutput + String ? bamWriterType + Float ? bandPassSigma + Array[String] ? comp + Boolean ? consensus + File ? contamination_fraction_per_sample_file + Float ? contamination_fraction_to_filter + String ? dbsnp + Boolean ? debug + Boolean ? disableOptimizations + Boolean ? doNotRunPhysicalPhasing + Boolean ? dontIncreaseKmerSizesForCycles + Boolean ? dontTrimActiveRegions + Boolean ? dontUseSoftClippedBases + Boolean ? emitDroppedReads + String ? emitRefConfidence + Array[String] ? excludeAnnotation + Boolean ? forceActive + Int ? gcpHMM + String ? genotyping_mode + String ? graphOutput + Array[String] ? group + Array[Int] ? GVCFGQBands + Float ? heterozygosity + Float ? indel_heterozygosity + Int ? indelSizeToEliminateInRefModel + Array[Float] ? input_prior + Array[Int] ? kmerSize + Int ? max_alternate_alleles + Int ? max_num_PL_values + Int ? maxNumHaplotypesInPopulation + Int ? maxReadsInRegionPerSample + String ? min_base_quality_score + Int ? minDanglingBranchLength + Int ? minPruning + Int ? minReadsPerAlignmentStart + Int ? numPruningSamples + String ? out + String ? output_mode + String ? pcr_indel_model + Int ? phredScaledGlobalReadMismappingRate + String ? sample_name + Int ? sample_ploidy + Float ? standard_min_confidence_threshold_for_calling + Float ? standard_min_confidence_threshold_for_emitting + Boolean ? useAllelesTrigger + Boolean ? useFilteredReadsForAnnotations + + command { + java -jar ${gatk} \ + -T HaplotypeCaller \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="" "--BQSR " + BQSR} \ + ${default="" "-nct" + nctVal} \ + -ActProbThresh ${default="0.002" activeProbabilityThreshold} \ + ${default="" "-activeRegionExtension " + activeRegionExtension} \ + ${default="" "-AR " + activeRegionIn} \ + ${default="" "-activeRegionMaxSize " + activeRegionMaxSize} \ + ${default="" "-ARO " + activeRegionOut} \ + ${default="" "-APO " + activityProfileOut} \ + ${default="" "-alleles " + alleles} \ + -allowNonUniqueKmersInRef ${default="false" allowNonUniqueKmersInRef} \ + -allSitePLs ${default="false" allSitePLs} \ + -nda ${default="false" annotateNDA} \ + -A ${default="[]" annotation} \ + ${default="" "-bamout " + bamOutput} \ + -bamWriterType ${default="CALLED_HAPLOTYPES" bamWriterType} \ + ${default="" "-bandPassSigma " + bandPassSigma} \ + -comp ${default="[]" comp} \ + -consensus ${default="false" consensus} \ + ${default="" "-contaminationFile " + contamination_fraction_per_sample_file} \ + -contamination ${default="0.0" contamination_fraction_to_filter} \ + ${default="" "-D " + dbsnp} \ + -debug ${default="false" debug} \ + -disableOptimizations ${default="false" disableOptimizations} \ + -doNotRunPhysicalPhasing ${default="false" doNotRunPhysicalPhasing} \ + -dontIncreaseKmerSizesForCycles ${default="false" dontIncreaseKmerSizesForCycles} \ + -dontTrimActiveRegions ${default="false" dontTrimActiveRegions} \ + -dontUseSoftClippedBases ${default="false" dontUseSoftClippedBases} \ + -edr ${default="false" emitDroppedReads} \ + -ERC ${default="false" emitRefConfidence} \ + -XA ${default="[]" excludeAnnotation} \ + -forceActive ${default="false" forceActive} \ + -gcpHMM ${default="10" gcpHMM} \ + -gt_mode ${default="DISCOVERY" genotyping_mode} \ + ${default="" "-graph " + graphOutput} \ + -G ${default="[StandardAnnotation, StandardHCAnnotation]" group} \ + -GQB ${default="[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 70, 80, 90, 99]" GVCFGQBands} \ + -hets ${default="0.001" heterozygosity} \ + -indelHeterozygosity ${default="1.25E-4" indel_heterozygosity} \ + -ERCIS ${default="10" indelSizeToEliminateInRefModel} \ + -inputPrior ${default="[]" input_prior} \ + -kmerSize ${default="[10, 25]" kmerSize} \ + -maxAltAlleles ${default="6" max_alternate_alleles} \ + -maxNumPLValues ${default="100" max_num_PL_values} \ + -maxNumHaplotypesInPopulation ${default="128" maxNumHaplotypesInPopulation} \ + -maxReadsInRegionPerSample ${default="10000" maxReadsInRegionPerSample} \ + -mbq ${default="10" min_base_quality_score} \ + -minDanglingBranchLength ${default="4" minDanglingBranchLength} \ + -minPruning ${default="2" minPruning} \ + -minReadsPerAlignStart ${default="10" minReadsPerAlignmentStart} \ + -numPruningSamples ${default="1" numPruningSamples} \ + -o ${default="stdout" out} \ + -out_mode ${default="EMIT_VARIANTS_ONLY" output_mode} \ + -pcrModel ${default="CONSERVATIVE" pcr_indel_model} \ + -globalMAPQ ${default="45" phredScaledGlobalReadMismappingRate} \ + ${default="" "-sn " + sample_name} \ + -ploidy ${default="2" sample_ploidy} \ + -stand_call_conf ${default="30.0" standard_min_confidence_threshold_for_calling} \ + -stand_emit_conf ${default="30.0" standard_min_confidence_threshold_for_emitting} \ + -allelesTrigger ${default="false" useAllelesTrigger} \ + -useFilteredReadsForAnnotations ${default="false" useFilteredReadsForAnnotations} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + activeProbabilityThreshold: "Threshold for the probability of a profile state being active." + activeRegionExtension: "The active region extension; if not provided defaults to Walker annotated default" + activeRegionIn: "Use this interval list file as the active regions to process" + activeRegionMaxSize: "The active region maximum size; if not provided defaults to Walker annotated default" + activeRegionOut: "Output the active region to this IGV formatted file" + activityProfileOut: "Output the raw activity profile results in IGV format" + alleles: "The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES" + allowNonUniqueKmersInRef: "Allow graphs that have non-unique kmers in the reference" + allSitePLs: "Annotate all sites with PLs" + annotateNDA: "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site" + annotation: "One or more specific annotations to apply to variant calls" + bamOutput: "File to which assembled haplotypes should be written" + bamWriterType: "Which haplotypes should be written to the BAM" + bandPassSigma: "The sigma of the band pass filter Gaussian kernel; if not provided defaults to Walker annotated default" + comp: "Comparison VCF file" + consensus: "1000G consensus mode" + contamination_fraction_per_sample_file: "Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be (Contamination is double) per line; No header." + contamination_fraction_to_filter: "Fraction of contamination in sequencing data (for all samples) to aggressively remove" + dbsnp: "dbSNP file" + debug: "Print out very verbose debug information about each triggering active region" + disableOptimizations: "Don't skip calculations in ActiveRegions with no variants" + doNotRunPhysicalPhasing: "Disable physical phasing" + dontIncreaseKmerSizesForCycles: "Disable iterating over kmer sizes when graph cycles are detected" + dontTrimActiveRegions: "If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping" + dontUseSoftClippedBases: "Do not analyze soft clipped bases in the reads" + emitDroppedReads: "Emit reads that are dropped for filtering, trimming, realignment failure" + emitRefConfidence: "Mode for emitting reference confidence scores" + excludeAnnotation: "One or more specific annotations to exclude" + forceActive: "If provided, all bases will be tagged as active" + gcpHMM: "Flat gap continuation penalty for use in the Pair HMM" + genotyping_mode: "Specifies how to determine the alternate alleles to use for genotyping" + graphOutput: "Write debug assembly graph information to this file" + group: "One or more classes/groups of annotations to apply to variant calls" + GVCFGQBands: "GQ thresholds for reference confidence bands" + heterozygosity: "Heterozygosity value used to compute prior likelihoods for any locus" + indel_heterozygosity: "Heterozygosity for indel calling" + indelSizeToEliminateInRefModel: "The size of an indel to check for in the reference model" + input_prior: "Input prior for calls" + kmerSize: "Kmer size to use in the read threading assembler" + max_alternate_alleles: "Maximum number of alternate alleles to genotype" + max_num_PL_values: "Maximum number of PL values to output" + maxNumHaplotypesInPopulation: "Maximum number of haplotypes to consider for your population" + maxReadsInRegionPerSample: "Maximum reads in an active region" + min_base_quality_score: "Minimum base quality required to consider a base for calling" + minDanglingBranchLength: "Minimum length of a dangling branch to attempt recovery" + minPruning: "Minimum support to not prune paths in the graph" + minReadsPerAlignmentStart: "Minimum number of reads sharing the same alignment start for each genomic location in an active region" + numPruningSamples: "Number of samples that must pass the minPruning threshold" + out: "File to which variants should be written" + output_mode: "Specifies which type of calls we should output" + pcr_indel_model: "The PCR indel model to use" + phredScaledGlobalReadMismappingRate: "The global assumed mismapping rate for reads" + sample_name: "Name of single sample to use from a multi-sample bam" + sample_ploidy: "Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)." + standard_min_confidence_threshold_for_calling: "The minimum phred-scaled confidence threshold at which variants should be called" + standard_min_confidence_threshold_for_emitting: "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)" + useAllelesTrigger: "Use additional trigger on variants found in an external alleles file" + useFilteredReadsForAnnotations: "Use the contamination-filtered read maps for the purposes of annotating variants" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + BQSR: "Input covariates table file for on-the-fly base quality score recalibration" + } +} + +workflow HaplotypeCallerWf { + call HaplotypeCaller +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/HaplotypeResolver_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/HaplotypeResolver_3.6.wdl new file mode 100644 index 0000000..d09edc6 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/HaplotypeResolver_3.6.wdl @@ -0,0 +1,57 @@ +# -------------------------------------------------------------------------------------------- +# This HaplotypeResolver WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Haplotype-based resolution of variants in separate callsets. +# -------------------------------------------------------------------------------------------- + +task HaplotypeResolver { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + String ? out + String ? setKey + String ? statusKey + Array[String] variant + + command { + java -jar ${gatk} \ + -T HaplotypeResolver \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -o ${default="stdout" out} \ + -setKey ${default="set" setKey} \ + -statusKey ${default="status" statusKey} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + out: "File to which variants should be written" + setKey: "Key used in the INFO key=value tag emitted describing which set the combined VCF record came from" + statusKey: "Key used in the INFO key=value tag emitted describing the extent to which records match" + variant: "Input VCF file" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow HaplotypeResolverWf { + call HaplotypeResolver +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/IndelRealigner_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/IndelRealigner_3.6.wdl new file mode 100644 index 0000000..fcd1b8a --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/IndelRealigner_3.6.wdl @@ -0,0 +1,90 @@ +# -------------------------------------------------------------------------------------------- +# This IndelRealigner WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Perform local realignment of reads around indels +# -------------------------------------------------------------------------------------------- + +task IndelRealigner { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + String ? consensusDeterminationModel + Float ? entropyThreshold + Array[String] ? knownAlleles + Float ? LODThresholdForCleaning + Int ? maxConsensuses + Int ? maxIsizeForMovement + Int ? maxPositionalMoveAllowed + Int ? maxReadsForConsensuses + Int ? maxReadsForRealignment + Int ? maxReadsInMemory + Boolean ? noOriginalAlignmentTags + String ? nWayOut + String ? out + String targetIntervals + + command { + java -jar ${gatk} \ + -T IndelRealigner \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + -model ${default="USE_READS" consensusDeterminationModel} \ + -entropy ${default="0.15" entropyThreshold} \ + -known ${default="[]" knownAlleles} \ + -LOD ${default="5.0" LODThresholdForCleaning} \ + -maxConsensuses ${default="30" maxConsensuses} \ + -maxIsize ${default="3000" maxIsizeForMovement} \ + -maxPosMove ${default="200" maxPositionalMoveAllowed} \ + -greedy ${default="120" maxReadsForConsensuses} \ + -maxReads ${default="20000" maxReadsForRealignment} \ + -maxInMemory ${default="150000" maxReadsInMemory} \ + -noTags ${default="false" noOriginalAlignmentTags} \ + ${default="" "-nWayOut " + nWayOut} \ + ${default="" "-o " + out} \ + -targetIntervals ${targetIntervals} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + consensusDeterminationModel: "Determines how to compute the possible alternate consenses" + entropyThreshold: "Percentage of mismatches at a locus to be considered having high entropy (0.0 < entropy <= 1.0)" + knownAlleles: "Input VCF file(s) with known indels" + LODThresholdForCleaning: "LOD threshold above which the cleaner will clean" + maxConsensuses: "Max alternate consensuses to try (necessary to improve performance in deep coverage)" + maxIsizeForMovement: "maximum insert size of read pairs that we attempt to realign" + maxPositionalMoveAllowed: "Maximum positional move in basepairs that a read can be adjusted during realignment" + maxReadsForConsensuses: "Max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage)" + maxReadsForRealignment: "Max reads allowed at an interval for realignment" + maxReadsInMemory: "max reads allowed to be kept in memory at a time by the SAMFileWriter" + noOriginalAlignmentTags: "Don't output the original cigar or alignment start tags for each realigned read in the output bam" + nWayOut: "Generate one output file for each input (-I) bam file (not compatible with -output)" + out: "Output bam" + targetIntervals: "Intervals file output from RealignerTargetCreator" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow IndelRealignerWf { + call IndelRealigner +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/LeftAlignAndTrimVariants_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/LeftAlignAndTrimVariants_3.6.wdl new file mode 100644 index 0000000..88ee2db --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/LeftAlignAndTrimVariants_3.6.wdl @@ -0,0 +1,60 @@ +# -------------------------------------------------------------------------------------------- +# This LeftAlignAndTrimVariants WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Left-align indels in a variant callset +# -------------------------------------------------------------------------------------------- + +task LeftAlignAndTrimVariants { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Boolean ? dontTrimAlleles + Boolean ? keepOriginalAC + String ? out + Boolean ? splitMultiallelics + String variant + + command { + java -jar ${gatk} \ + -T LeftAlignAndTrimVariants \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -notrim ${default="false" dontTrimAlleles} \ + -keepOriginalAC ${default="false" keepOriginalAC} \ + -o ${default="stdout" out} \ + -split ${default="false" splitMultiallelics} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + dontTrimAlleles: "Do not Trim alleles to remove bases common to all of them" + keepOriginalAC: "Store the original AC, AF, and AN values after subsetting" + out: "File to which variants should be written" + splitMultiallelics: "Split multiallelic records and left-align individual alleles" + variant: "Input VCF file" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow LeftAlignAndTrimVariantsWf { + call LeftAlignAndTrimVariants +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/LeftAlignIndels_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/LeftAlignIndels_3.6.wdl new file mode 100644 index 0000000..ab0ab7d --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/LeftAlignIndels_3.6.wdl @@ -0,0 +1,51 @@ +# -------------------------------------------------------------------------------------------- +# This LeftAlignIndels WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Left-align indels within reads in a bam file +# -------------------------------------------------------------------------------------------- + +task LeftAlignIndels { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + String ? out + + command { + java -jar ${gatk} \ + -T LeftAlignIndels \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + out: "Output bam" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow LeftAlignIndelsWf { + call LeftAlignIndels +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/MuTect2_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/MuTect2_3.6.wdl new file mode 100644 index 0000000..2caacef --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/MuTect2_3.6.wdl @@ -0,0 +1,248 @@ +# -------------------------------------------------------------------------------------------- +# This MuTect2 WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Call somatic SNPs and indels via local re-assembly of haplotypes +# -------------------------------------------------------------------------------------------- + +task MuTect2 { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Boolean input_filetumor + Boolean ? input_filenormal + Array[String] ? intervals + File ? BQSR + Int ? nctVal + Float ? activeProbabilityThreshold + Int ? activeRegionExtension + Array[String] ? activeRegionIn + Int ? activeRegionMaxSize + String ? activeRegionOut + String ? activityProfileOut + String ? alleles + Boolean ? allowNonUniqueKmersInRef + Boolean ? allSitePLs + Boolean ? annotateNDA + Array[String] ? annotation + Boolean ? artifact_detection_mode + String ? bamOutput + String ? bamWriterType + Float ? bandPassSigma + Array[String] ? comp + Boolean ? consensus + File ? contamination_fraction_per_sample_file + Float ? contamination_fraction_to_filter + Array[String] ? cosmic + String ? dbsnp + Float ? dbsnp_normal_lod + Boolean ? debug + String ? debug_read_name + Boolean ? disableOptimizations + Boolean ? doNotRunPhysicalPhasing + Boolean ? dontIncreaseKmerSizesForCycles + Boolean ? dontTrimActiveRegions + Boolean ? dontUseSoftClippedBases + Boolean ? emitDroppedReads + String ? emitRefConfidence + Array[String] ? excludeAnnotation + Boolean ? forceActive + Int ? gcpHMM + String ? genotyping_mode + String ? graphOutput + String ? group + Float ? heterozygosity + Float ? indel_heterozygosity + Float ? initial_normal_lod + Float ? initial_tumor_lod + Array[Float] ? input_prior + Array[Int] ? kmerSize + Boolean ? m2debug + Float ? max_alt_allele_in_normal_fraction + Int ? max_alt_alleles_in_normal_count + Int ? max_alt_alleles_in_normal_qscore_sum + Int ? max_alternate_alleles + Int ? max_num_PL_values + Int ? maxNumHaplotypesInPopulation + String ? min_base_quality_score + Int ? minDanglingBranchLength + Int ? minPruning + Float ? normal_lod + Array[String] ? normal_panel + Int ? numPruningSamples + String ? out + String ? output_mode + Int ? phredScaledGlobalReadMismappingRate + Int ? sample_ploidy + Float ? standard_min_confidence_threshold_for_calling + Float ? standard_min_confidence_threshold_for_emitting + Float ? tumor_lod + Boolean ? useFilteredReadsForAnnotations + + command { + java -jar ${gatk} \ + -T MuTect2 \ + -R ${ref} \ + --input_file:tumor ${input_filetumor} \ + ${default="" "--input_file:normal " + input_filenormal} \ + ${default="" "--intervals " + intervals} \ + ${default="" "--BQSR " + BQSR} \ + ${default="" "-nct" + nctVal} \ + -ActProbThresh ${default="0.002" activeProbabilityThreshold} \ + ${default="" "-activeRegionExtension " + activeRegionExtension} \ + ${default="" "-AR " + activeRegionIn} \ + ${default="" "-activeRegionMaxSize " + activeRegionMaxSize} \ + ${default="" "-ARO " + activeRegionOut} \ + ${default="" "-APO " + activityProfileOut} \ + ${default="" "-alleles " + alleles} \ + -allowNonUniqueKmersInRef ${default="false" allowNonUniqueKmersInRef} \ + -allSitePLs ${default="false" allSitePLs} \ + -nda ${default="false" annotateNDA} \ + -A ${default="[DepthPerAlleleBySample, BaseQualitySumPerAlleleBySample, TandemRepeatAnnotator, OxoGReadCounts]" annotation} \ + artifact_detection_mode ${default="false" artifact_detection_mode} \ + ${default="" "-bamout " + bamOutput} \ + -bamWriterType ${default="CALLED_HAPLOTYPES" bamWriterType} \ + ${default="" "-bandPassSigma " + bandPassSigma} \ + -comp ${default="[]" comp} \ + -consensus ${default="false" consensus} \ + ${default="" "-contaminationFile " + contamination_fraction_per_sample_file} \ + -contamination ${default="0.0" contamination_fraction_to_filter} \ + -cosmic ${default="[]" cosmic} \ + ${default="" "-D " + dbsnp} \ + dbsnp_normal_lod ${default="5.5" dbsnp_normal_lod} \ + -debug ${default="false" debug} \ + ${default="" "debug_read_name " + debug_read_name} \ + -disableOptimizations ${default="false" disableOptimizations} \ + -doNotRunPhysicalPhasing ${default="false" doNotRunPhysicalPhasing} \ + -dontIncreaseKmerSizesForCycles ${default="false" dontIncreaseKmerSizesForCycles} \ + -dontTrimActiveRegions ${default="false" dontTrimActiveRegions} \ + -dontUseSoftClippedBases ${default="false" dontUseSoftClippedBases} \ + -edr ${default="false" emitDroppedReads} \ + -ERC ${default="NONE" emitRefConfidence} \ + -XA ${default="[SpanningDeletions]" excludeAnnotation} \ + -forceActive ${default="false" forceActive} \ + -gcpHMM ${default="10" gcpHMM} \ + -gt_mode ${default="DISCOVERY" genotyping_mode} \ + ${default="" "-graph " + graphOutput} \ + -G ${default="[]" group} \ + -hets ${default="0.001" heterozygosity} \ + -indelHeterozygosity ${default="1.25E-4" indel_heterozygosity} \ + initial_normal_lod ${default="0.5" initial_normal_lod} \ + initial_tumor_lod ${default="4.0" initial_tumor_lod} \ + -inputPrior ${default="[]" input_prior} \ + -kmerSize ${default="[10, 25]" kmerSize} \ + -m2debug ${default="false" m2debug} \ + max_alt_allele_in_normal_fraction ${default="0.03" max_alt_allele_in_normal_fraction} \ + max_alt_alleles_in_normal_count ${default="1" max_alt_alleles_in_normal_count} \ + max_alt_alleles_in_normal_qscore_sum ${default="20" max_alt_alleles_in_normal_qscore_sum} \ + -maxAltAlleles ${default="6" max_alternate_alleles} \ + -maxNumPLValues ${default="100" max_num_PL_values} \ + -maxNumHaplotypesInPopulation ${default="128" maxNumHaplotypesInPopulation} \ + -mbq ${default="10" min_base_quality_score} \ + -minDanglingBranchLength ${default="4" minDanglingBranchLength} \ + -minPruning ${default="2" minPruning} \ + normal_lod ${default="2.2" normal_lod} \ + -PON ${default="[]" normal_panel} \ + -numPruningSamples ${default="1" numPruningSamples} \ + -o ${default="stdout" out} \ + -out_mode ${default="EMIT_VARIANTS_ONLY" output_mode} \ + -globalMAPQ ${default="45" phredScaledGlobalReadMismappingRate} \ + -ploidy ${default="2" sample_ploidy} \ + -stand_call_conf ${default="30.0" standard_min_confidence_threshold_for_calling} \ + -stand_emit_conf ${default="30.0" standard_min_confidence_threshold_for_emitting} \ + tumor_lod ${default="6.3" tumor_lod} \ + -useFilteredReadsForAnnotations ${default="false" useFilteredReadsForAnnotations} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + activeProbabilityThreshold: "Threshold for the probability of a profile state being active." + activeRegionExtension: "The active region extension; if not provided defaults to Walker annotated default" + activeRegionIn: "Use this interval list file as the active regions to process" + activeRegionMaxSize: "The active region maximum size; if not provided defaults to Walker annotated default" + activeRegionOut: "Output the active region to this IGV formatted file" + activityProfileOut: "Output the raw activity profile results in IGV format" + alleles: "The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES" + allowNonUniqueKmersInRef: "Allow graphs that have non-unique kmers in the reference" + allSitePLs: "Annotate all sites with PLs" + annotateNDA: "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site" + annotation: "One or more specific annotations to apply to variant calls" + artifact_detection_mode: "Enable artifact detection for creating panels of normals" + bamOutput: "File to which assembled haplotypes should be written" + bamWriterType: "Which haplotypes should be written to the BAM" + bandPassSigma: "The sigma of the band pass filter Gaussian kernel; if not provided defaults to Walker annotated default" + comp: "comparison VCF file" + consensus: "1000G consensus mode" + contamination_fraction_per_sample_file: "Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be (Contamination is double) per line; No header." + contamination_fraction_to_filter: "Fraction of contamination in sequencing data (for all samples) to aggressively remove" + cosmic: "VCF file of COSMIC sites" + dbsnp: "dbSNP file" + dbsnp_normal_lod: "LOD threshold for calling normal non-variant at dbsnp sites" + debug: "Print out very verbose debug information about each triggering active region" + debug_read_name: "trace this read name through the calling process" + disableOptimizations: "Don't skip calculations in ActiveRegions with no variants" + doNotRunPhysicalPhasing: "Disable physical phasing" + dontIncreaseKmerSizesForCycles: "Disable iterating over kmer sizes when graph cycles are detected" + dontTrimActiveRegions: "If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping" + dontUseSoftClippedBases: "If specified, we will not analyze soft clipped bases in the reads" + emitDroppedReads: "Emit reads that are dropped for filtering, trimming, realignment failure" + emitRefConfidence: "Mode for emitting reference confidence scores" + excludeAnnotation: "One or more specific annotations to exclude" + forceActive: "If provided, all bases will be tagged as active" + gcpHMM: "Flat gap continuation penalty for use in the Pair HMM" + genotyping_mode: "Specifies how to determine the alternate alleles to use for genotyping" + graphOutput: "Write debug assembly graph information to this file" + group: "One or more classes/groups of annotations to apply to variant calls" + heterozygosity: "Heterozygosity value used to compute prior likelihoods for any locus" + indel_heterozygosity: "Heterozygosity for indel calling" + initial_normal_lod: "Initial LOD threshold for calling normal variant" + initial_tumor_lod: "Initial LOD threshold for calling tumor variant" + input_prior: "Input prior for calls" + kmerSize: "Kmer size to use in the read threading assembler" + m2debug: "Print out very verbose M2 debug information" + max_alt_allele_in_normal_fraction: "Threshold for maximum alternate allele fraction in normal" + max_alt_alleles_in_normal_count: "Threshold for maximum alternate allele counts in normal" + max_alt_alleles_in_normal_qscore_sum: "Threshold for maximum alternate allele quality score sum in normal" + max_alternate_alleles: "Maximum number of alternate alleles to genotype" + max_num_PL_values: "Maximum number of PL values to output" + maxNumHaplotypesInPopulation: "Maximum number of haplotypes to consider for your population" + min_base_quality_score: "Minimum base quality required to consider a base for calling" + minDanglingBranchLength: "Minimum length of a dangling branch to attempt recovery" + minPruning: "Minimum support to not prune paths in the graph" + normal_lod: "LOD threshold for calling normal non-germline" + normal_panel: "VCF file of sites observed in normal" + numPruningSamples: "Number of samples that must pass the minPruning threshold" + out: "File to which variants should be written" + output_mode: "Specifies which type of calls we should output" + phredScaledGlobalReadMismappingRate: "The global assumed mismapping rate for reads" + sample_ploidy: "Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)." + standard_min_confidence_threshold_for_calling: "The minimum phred-scaled confidence threshold at which variants should be called" + standard_min_confidence_threshold_for_emitting: "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)" + tumor_lod: "LOD threshold for calling tumor variant" + useFilteredReadsForAnnotations: "Use the contamination-filtered read maps for the purposes of annotating variants" + input_filetumor: "Output version information" + input_filenormal: "Output version information" + intervals: "One or more genomic intervals over which to operate" + BQSR: "Input covariates table file for on-the-fly base quality score recalibration" + } +} + +workflow MuTect2Wf { + call MuTect2 +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/PhaseByTransmission_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/PhaseByTransmission_3.6.wdl new file mode 100644 index 0000000..eb58f3a --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/PhaseByTransmission_3.6.wdl @@ -0,0 +1,63 @@ +# -------------------------------------------------------------------------------------------- +# This PhaseByTransmission WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Compute the most likely genotype combination and phasing for trios and parent/child pairs +# -------------------------------------------------------------------------------------------- + +task PhaseByTransmission { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Float ? DeNovoPrior + Boolean ? FatherAlleleFirst + String ? MendelianViolationsFile + String ? out + String variant + + command { + java -jar ${gatk} \ + -T PhaseByTransmission \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + -prior ${default="1.0E-8" DeNovoPrior} \ + -fatherAlleleFirst ${default="false" FatherAlleleFirst} \ + ${default="" "-mvf " + MendelianViolationsFile} \ + -o ${default="stdout" out} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + DeNovoPrior: "Prior for de novo mutations. Default: 1e-8" + FatherAlleleFirst: "Ouputs the father allele as the first allele in phased child genotype. i.e. father|mother rather than mother|father." + MendelianViolationsFile: "File to output the mendelian violation details." + out: "An output file created by the walker. Will overwrite contents if file exists" + variant: "Input VCF file" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow PhaseByTransmissionWf { + call PhaseByTransmission +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/Pileup_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/Pileup_3.6.wdl new file mode 100644 index 0000000..c32677c --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/Pileup_3.6.wdl @@ -0,0 +1,61 @@ +# -------------------------------------------------------------------------------------------- +# This Pileup WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Print read alignments in Pileup-style format +# -------------------------------------------------------------------------------------------- + +task Pileup { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Int ? nctVal + Int ? ntVal + Array[String] ? metadata + String ? out + Boolean ? showVerbose + + command { + java -jar ${gatk} \ + -T Pileup \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nct" + nctVal} \ + ${default="" "-nt" + ntVal} \ + -metadata ${default="[]" metadata} \ + -o ${default="stdout" out} \ + -verbose ${default="false" showVerbose} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + metadata: "ROD file containing metadata" + out: "An output file created by the walker. Will overwrite contents if file exists" + showVerbose: "Add an extra verbose section to the pileup output" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow PileupWf { + call Pileup +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/PrintRODs_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/PrintRODs_3.6.wdl new file mode 100644 index 0000000..a47b63f --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/PrintRODs_3.6.wdl @@ -0,0 +1,48 @@ +# -------------------------------------------------------------------------------------------- +# This PrintRODs WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Print out all of the RODs in the input data set +# -------------------------------------------------------------------------------------------- + +task PrintRODs { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + String task_input + String ? out + + command { + java -jar ${gatk} \ + -T PrintRODs \ + -R ${ref} \ + -input ${task_input} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + task_input: "The input ROD which should be printed out." + out: "An output file created by the walker. Will overwrite contents if file exists" + } +} + +workflow PrintRODsWf { + call PrintRODs +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/PrintReads_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/PrintReads_3.6.wdl new file mode 100644 index 0000000..340340e --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/PrintReads_3.6.wdl @@ -0,0 +1,74 @@ +# -------------------------------------------------------------------------------------------- +# This PrintReads WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Write out sequence read data (for filtering, merging, subsetting etc) +# -------------------------------------------------------------------------------------------- + +task PrintReads { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + File ? BQSR + Int ? nctVal + Int ? number + String ? out + String ? platform + String ? readGroup + String ? sample_file + String ? sample_name + Boolean ? simplify + + command { + java -jar ${gatk} \ + -T PrintReads \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="" "--BQSR " + BQSR} \ + ${default="" "-nct" + nctVal} \ + -n ${default="-1" number} \ + -o ${default="stdout" out} \ + ${default="" "-platform " + platform} \ + ${default="" "-readGroup " + readGroup} \ + -sf ${default="[]" sample_file} \ + -sn ${default="[]" sample_name} \ + -s ${default="false" simplify} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + number: "Print the first n reads from the file, discarding the rest" + out: "Write output to this BAM filename instead of STDOUT" + platform: "Exclude all reads with this platform from the output" + readGroup: "Exclude all reads with this read group from the output" + sample_file: "File containing a list of samples (one per line). Can be specified multiple times" + sample_name: "Sample name to be included in the analysis. Can be specified multiple times." + simplify: "Simplify all reads" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + BQSR: "Input covariates table file for on-the-fly base quality score recalibration" + } +} + +workflow PrintReadsWf { + call PrintReads +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/QualifyMissingIntervals_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/QualifyMissingIntervals_3.6.wdl new file mode 100644 index 0000000..626743e --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/QualifyMissingIntervals_3.6.wdl @@ -0,0 +1,74 @@ +# -------------------------------------------------------------------------------------------- +# This QualifyMissingIntervals WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Collect quality metrics for a set of intervals +# -------------------------------------------------------------------------------------------- + +task QualifyMissingIntervals { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Int ? nctVal + String ? baitsfile + Int ? coveragethreshold + Float ? gcthreshold + String ? intervalsizethreshold + String ? mappingthreshold + String ? out + String ? qualthreshold + String targetsfile + + command { + java -jar ${gatk} \ + -T QualifyMissingIntervals \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nct" + nctVal} \ + ${default="" "-baits " + baitsfile} \ + -cov ${default="20" coveragethreshold} \ + -gc ${default="0.3" gcthreshold} \ + -size ${default="10" intervalsizethreshold} \ + -mmq ${default="20" mappingthreshold} \ + -o ${default="stdout" out} \ + -mbq ${default="20" qualthreshold} \ + -targets ${targetsfile} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + baitsfile: "Undocumented option" + coveragethreshold: "minimum coverage to be considered sequenceable" + gcthreshold: "upper and lower bound for an interval to be considered high/low GC content" + intervalsizethreshold: "minimum interval length to be considered" + mappingthreshold: "minimum mapping quality for it to be considered usable" + out: "An output file created by the walker. Will overwrite contents if file exists" + qualthreshold: "minimum base quality for it to be considered usable" + targetsfile: "Undocumented option" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow QualifyMissingIntervalsWf { + call QualifyMissingIntervals +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/RandomlySplitVariants_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/RandomlySplitVariants_3.6.wdl new file mode 100644 index 0000000..40e8108 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/RandomlySplitVariants_3.6.wdl @@ -0,0 +1,66 @@ +# -------------------------------------------------------------------------------------------- +# This RandomlySplitVariants WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Randomly split variants into different sets +# -------------------------------------------------------------------------------------------- + +task RandomlySplitVariants { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Float ? fractionToOut1 + Int ? numOfOutputVCFFiles + String ? out1 + File ? out2 + String ? prefixForAllOutputFileNames + Boolean ? splitToManyFiles + String variant + + command { + java -jar ${gatk} \ + -T RandomlySplitVariants \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -fraction ${default="0.5" fractionToOut1} \ + -N ${default="-1" numOfOutputVCFFiles} \ + -o1 ${default="stdout" out1} \ + ${default="" "-o2 " + out2} \ + ${default="" "-baseOutputName " + prefixForAllOutputFileNames} \ + -splitToMany ${default="false" splitToManyFiles} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + fractionToOut1: "Fraction of records to be placed in out1 (must be 0 >= fraction <= 1); all other records are placed in out2" + numOfOutputVCFFiles: "number of output VCF files. Only works with SplitToMany = true" + out1: "File #1 to which variants should be written" + out2: "File #2 to which variants should be written" + prefixForAllOutputFileNames: "the name of the output VCF file will be: .split..vcf. Required with SplitToMany option" + splitToManyFiles: "split (with uniform distribution) to more than 2 files. numOfFiles and baseOutputName parameters are required" + variant: "Input VCF file" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow RandomlySplitVariantsWf { + call RandomlySplitVariants +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadBackedPhasing_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadBackedPhasing_3.6.wdl new file mode 100644 index 0000000..9f555ce --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadBackedPhasing_3.6.wdl @@ -0,0 +1,81 @@ +# -------------------------------------------------------------------------------------------- +# This ReadBackedPhasing WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Annotate physical phasing information +# -------------------------------------------------------------------------------------------- + +task ReadBackedPhasing { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Int ? cacheWindowSize + Boolean ? debug + Boolean ? enableMergePhasedSegregatingPolymorphismsToMNP + Int ? maxGenomicDistanceForMNP + Int ? maxPhaseSites + Int ? min_base_quality_score + Int ? min_mapping_quality_score + String ? out + Float ? phaseQualityThresh + String ? sampleToPhase + String variant + + command { + java -jar ${gatk} \ + -T ReadBackedPhasing \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + -cacheWindow ${default="20000" cacheWindowSize} \ + -debug ${default="false" debug} \ + -enableMergeToMNP ${default="false" enableMergePhasedSegregatingPolymorphismsToMNP} \ + -maxDistMNP ${default="1" maxGenomicDistanceForMNP} \ + -maxSites ${default="10" maxPhaseSites} \ + -mbq ${default="17" min_base_quality_score} \ + -mmq ${default="20" min_mapping_quality_score} \ + -o ${default="stdout" out} \ + -phaseThresh ${default="20.0" phaseQualityThresh} \ + ${default="" "-sampleToPhase " + sampleToPhase} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + cacheWindowSize: "The window size (in bases) to cache variant sites and their reads for the phasing procedure" + debug: "If specified, print out very verbose debug information (if -l DEBUG is also specified)" + enableMergePhasedSegregatingPolymorphismsToMNP: "Merge consecutive phased sites into MNP records" + maxGenomicDistanceForMNP: "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records into a MNP record" + maxPhaseSites: "The maximum number of successive heterozygous sites permitted to be used by the phasing algorithm" + min_base_quality_score: "Minimum base quality required to consider a base for phasing" + min_mapping_quality_score: "Minimum read mapping quality required to consider a read for phasing" + out: "File to which variants should be written" + phaseQualityThresh: "The minimum phasing quality score required to output phasing" + sampleToPhase: "Only include these samples when phasing" + variant: "Input VCF file" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow ReadBackedPhasingWf { + call ReadBackedPhasing +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadClippingStats_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadClippingStats_3.6.wdl new file mode 100644 index 0000000..c2dc3e4 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadClippingStats_3.6.wdl @@ -0,0 +1,57 @@ +# -------------------------------------------------------------------------------------------- +# This ReadClippingStats WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Collect read clipping statistics +# -------------------------------------------------------------------------------------------- + +task ReadClippingStats { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Boolean ? include_unmapped + String ? out + Int ? skip + + command { + java -jar ${gatk} \ + -T ReadClippingStats \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + -u ${default="false" include_unmapped} \ + -o ${default="stdout" out} \ + -skip ${default="1" skip} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + include_unmapped: "Include unmapped reads in the analysis" + out: "An output file created by the walker. Will overwrite contents if file exists" + skip: "Do not print all reads, skip some." + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow ReadClippingStatsWf { + call ReadClippingStats +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadGroupProperties_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadGroupProperties_3.6.wdl new file mode 100644 index 0000000..4d7994f --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadGroupProperties_3.6.wdl @@ -0,0 +1,51 @@ +# -------------------------------------------------------------------------------------------- +# This ReadGroupProperties WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Collect statistics about read groups and their properties +# -------------------------------------------------------------------------------------------- + +task ReadGroupProperties { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Int ? max_values_for_median + String ? out + + command { + java -jar ${gatk} \ + -T ReadGroupProperties \ + -R ${ref} \ + --input_file ${input_file} \ + -maxElementsForMedian ${default="10000" max_values_for_median} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + max_values_for_median: "Calculate median from the first maxElementsForMedian values observed" + out: "An output file created by the walker. Will overwrite contents if file exists" + input_file: "Input file containing sequence data (BAM or CRAM)" + } +} + +workflow ReadGroupPropertiesWf { + call ReadGroupProperties +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadLengthDistribution_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadLengthDistribution_3.6.wdl new file mode 100644 index 0000000..fd57f27 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ReadLengthDistribution_3.6.wdl @@ -0,0 +1,51 @@ +# -------------------------------------------------------------------------------------------- +# This ReadLengthDistribution WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Collect read length statistics +# -------------------------------------------------------------------------------------------- + +task ReadLengthDistribution { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + String ? out + + command { + java -jar ${gatk} \ + -T ReadLengthDistribution \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + out: "An output file created by the walker. Will overwrite contents if file exists" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow ReadLengthDistributionWf { + call ReadLengthDistribution +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/RealignerTargetCreator_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/RealignerTargetCreator_3.6.wdl new file mode 100644 index 0000000..83cfb57 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/RealignerTargetCreator_3.6.wdl @@ -0,0 +1,68 @@ +# -------------------------------------------------------------------------------------------- +# This RealignerTargetCreator WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Define intervals to target for local realignment +# -------------------------------------------------------------------------------------------- + +task RealignerTargetCreator { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + Int ? ntVal + Array[String] ? known + Int ? maxIntervalSize + Int ? minReadsAtLocus + Float ? mismatchFraction + File ? out + Int ? windowSize + + command { + java -jar ${gatk} \ + -T RealignerTargetCreator \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nt" + ntVal} \ + -known ${default="[]" known} \ + -maxInterval ${default="500" maxIntervalSize} \ + -minReads ${default="4" minReadsAtLocus} \ + -mismatch ${default="0.0" mismatchFraction} \ + ${default="" "-o " + out} \ + -window ${default="10" windowSize} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + known: "Input VCF file with known indels" + maxIntervalSize: "maximum interval size; any intervals larger than this value will be dropped" + minReadsAtLocus: "minimum reads at a locus to enable using the entropy calculation" + mismatchFraction: "fraction of base qualities needing to mismatch for a position to have high entropy" + out: "An output file created by the walker. Will overwrite contents if file exists" + windowSize: "window size for calculating entropy or SNP clusters" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow RealignerTargetCreatorWf { + call RealignerTargetCreator +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/RegenotypeVariants_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/RegenotypeVariants_3.6.wdl new file mode 100644 index 0000000..01e07b6 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/RegenotypeVariants_3.6.wdl @@ -0,0 +1,53 @@ +# -------------------------------------------------------------------------------------------- +# This RegenotypeVariants WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Regenotypes the variants from a VCF containing PLs or GLs. +# -------------------------------------------------------------------------------------------- + +task RegenotypeVariants { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Int ? ntVal + String ? out + String variant + + command { + java -jar ${gatk} \ + -T RegenotypeVariants \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nt" + ntVal} \ + -o ${default="stdout" out} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + out: "File to which variants should be written" + variant: "Input VCF file" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow RegenotypeVariantsWf { + call RegenotypeVariants +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SelectHeaders_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SelectHeaders_3.6.wdl new file mode 100644 index 0000000..944ec13 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SelectHeaders_3.6.wdl @@ -0,0 +1,65 @@ +# -------------------------------------------------------------------------------------------- +# This SelectHeaders WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Selects headers from a VCF source +# -------------------------------------------------------------------------------------------- + +task SelectHeaders { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Int ? ntVal + String ? exclude_header_name + String ? header_expression + String ? header_name + Boolean ? include_interval_names + String ? out + String variant + + command { + java -jar ${gatk} \ + -T SelectHeaders \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nt" + ntVal} \ + ${default="" "-xl_hn " + exclude_header_name} \ + ${default="" "-he " + header_expression} \ + ${default="" "-hn " + header_name} \ + -iln ${default="false" include_interval_names} \ + -o ${default="stdout" out} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + exclude_header_name: "Exclude header. Can be specified multiple times" + header_expression: "Regular expression to select many headers from the tracks provided. Can be specified multiple times" + header_name: "Include header. Can be specified multiple times" + include_interval_names: "If set the interval file name minus the file extension, or the command line intervals, will be added to the headers" + out: "File to which variants should be written" + variant: "Input VCF file" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow SelectHeadersWf { + call SelectHeaders +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SelectVariants_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SelectVariants_3.6.wdl new file mode 100644 index 0000000..82ed7ac --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SelectVariants_3.6.wdl @@ -0,0 +1,161 @@ +# -------------------------------------------------------------------------------------------- +# This SelectVariants WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Select a subset of variants from a larger callset +# -------------------------------------------------------------------------------------------- + +task SelectVariants { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Int ? ntVal + String ? concordance + String ? discordance + String ? exclude_sample_expressions + String ? exclude_sample_file + String ? exclude_sample_name + Boolean ? excludeFiltered + File ? excludeIDs + Boolean ? excludeNonVariants + Boolean ? forceValidOutput + Boolean ? invertMendelianViolation + Boolean ? invertselect + File ? keepIDs + Boolean ? keepOriginalAC + Boolean ? keepOriginalDP + Int ? maxFilteredGenotypes + Float ? maxFractionFilteredGenotypes + Int ? maxIndelSize + Float ? maxNOCALLfraction + Int ? maxNOCALLnumber + Boolean ? mendelianViolation + Float ? mendelianViolationQualThreshold + Int ? minFilteredGenotypes + Float ? minFractionFilteredGenotypes + Int ? minIndelSize + String ? out + Boolean ? preserveAlleles + Float ? remove_fraction_genotypes + Boolean ? removeUnusedAlternates + String ? restrictAllelesTo + String ? sample_expressions + String ? sample_file + String ? sample_name + Float ? select_random_fraction + Array[String] ? selectexpressions + Array[String] ? selectTypeToExclude + Array[String] ? selectTypeToInclude + Boolean ? setFilteredGtToNocall + String variant + + command { + java -jar ${gatk} \ + -T SelectVariants \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nt" + ntVal} \ + ${default="" "-conc " + concordance} \ + ${default="" "-disc " + discordance} \ + -xl_se ${default="[]" exclude_sample_expressions} \ + -xl_sf ${default="[]" exclude_sample_file} \ + -xl_sn ${default="[]" exclude_sample_name} \ + -ef ${default="false" excludeFiltered} \ + ${default="" "-xlIDs " + excludeIDs} \ + -env ${default="false" excludeNonVariants} \ + forceValidOutput ${default="false" forceValidOutput} \ + -invMv ${default="false" invertMendelianViolation} \ + -invertSelect ${default="false" invertselect} \ + ${default="" "-IDs " + keepIDs} \ + -keepOriginalAC ${default="false" keepOriginalAC} \ + -keepOriginalDP ${default="false" keepOriginalDP} \ + maxFilteredGenotypes ${default="2147483647" maxFilteredGenotypes} \ + maxFractionFilteredGenotypes ${default="1.0" maxFractionFilteredGenotypes} \ + maxIndelSize ${default="2147483647" maxIndelSize} \ + maxNOCALLfraction ${default="1.0" maxNOCALLfraction} \ + maxNOCALLnumber ${default="2147483647" maxNOCALLnumber} \ + -mv ${default="false" mendelianViolation} \ + -mvq ${default="0.0" mendelianViolationQualThreshold} \ + minFilteredGenotypes ${default="0" minFilteredGenotypes} \ + minFractionFilteredGenotypes ${default="0.0" minFractionFilteredGenotypes} \ + minIndelSize ${default="0" minIndelSize} \ + -o ${default="stdout" out} \ + -noTrim ${default="false" preserveAlleles} \ + -fractionGenotypes ${default="0.0" remove_fraction_genotypes} \ + -trimAlternates ${default="false" removeUnusedAlternates} \ + -restrictAllelesTo ${default="ALL" restrictAllelesTo} \ + ${default="" "-se " + sample_expressions} \ + ${default="" "-sf " + sample_file} \ + -sn ${default="[]" sample_name} \ + -fraction ${default="0.0" select_random_fraction} \ + -select ${default="[]" selectexpressions} \ + -xlSelectType ${default="[]" selectTypeToExclude} \ + -selectType ${default="[]" selectTypeToInclude} \ + setFilteredGtToNocall ${default="false" setFilteredGtToNocall} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + concordance: "Output variants also called in this comparison track" + discordance: "Output variants not called in this comparison track" + exclude_sample_expressions: "List of sample expressions to exclude" + exclude_sample_file: "List of samples to exclude" + exclude_sample_name: "Exclude genotypes from this sample" + excludeFiltered: "Don't include filtered sites" + excludeIDs: "List of variant IDs to select" + excludeNonVariants: "Don't include non-variant sites" + forceValidOutput: "Forces output VCF to be compliant to up-to-date version" + invertMendelianViolation: "Output non-mendelian violation sites only" + invertselect: "Invert the selection criteria for -select" + keepIDs: "List of variant IDs to select" + keepOriginalAC: "Store the original AC, AF, and AN values after subsetting" + keepOriginalDP: "Store the original DP value after subsetting" + maxFilteredGenotypes: "Maximum number of samples filtered at the genotype level" + maxFractionFilteredGenotypes: "Maximum fraction of samples filtered at the genotype level" + maxIndelSize: "Maximum size of indels to include" + maxNOCALLfraction: "Maximum fraction of samples with no-call genotypes" + maxNOCALLnumber: "Maximum number of samples with no-call genotypes" + mendelianViolation: "Output mendelian violation sites only" + mendelianViolationQualThreshold: "Minimum GQ score for each trio member to accept a site as a violation" + minFilteredGenotypes: "Minimum number of samples filtered at the genotype level" + minFractionFilteredGenotypes: "Maximum fraction of samples filtered at the genotype level" + minIndelSize: "Minimum size of indels to include" + out: "File to which variants should be written" + preserveAlleles: "Preserve original alleles, do not trim" + remove_fraction_genotypes: "Select a fraction of genotypes at random from the input and sets them to no-call" + removeUnusedAlternates: "Remove alternate alleles not present in any genotypes" + restrictAllelesTo: "Select only variants of a particular allelicity" + sample_expressions: "Regular expression to select multiple samples" + sample_file: "File containing a list of samples to include" + sample_name: "Include genotypes from this sample" + select_random_fraction: "Select a fraction of variants at random from the input" + selectexpressions: "One or more criteria to use when selecting the data" + selectTypeToExclude: "Do not select certain type of variants from the input file" + selectTypeToInclude: "Select only a certain type of variants from the input file" + setFilteredGtToNocall: "Set filtered genotypes to no-call" + variant: "Input VCF file" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow SelectVariantsWf { + call SelectVariants +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SimulateReadsForVariants_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SimulateReadsForVariants_3.6.wdl new file mode 100644 index 0000000..1349386 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SimulateReadsForVariants_3.6.wdl @@ -0,0 +1,66 @@ +# -------------------------------------------------------------------------------------------- +# This SimulateReadsForVariants WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Generate simulated reads for variants +# -------------------------------------------------------------------------------------------- + +task SimulateReadsForVariants { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Int ? errorRate + String out + Int ? readDepth + Int ? readLength + String ? readSamplingMode + String ? rgPlatform + String variant + + command { + java -jar ${gatk} \ + -T SimulateReadsForVariants \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -ER ${default="20" errorRate} \ + -o ${out} \ + -DP ${default="20" readDepth} \ + -RL ${default="101" readLength} \ + -RSM ${default="CONSTANT" readSamplingMode} \ + -RGPL ${default="ILLUMINA" rgPlatform} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + errorRate: "Base error rate (Phred-scaled)" + out: "Reads corresponding to variants" + readDepth: "Read depth to generate" + readLength: "Read lengths (bp)" + readSamplingMode: "Sampling mode" + rgPlatform: "Sequencing platform" + variant: "Input VCF file" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow SimulateReadsForVariantsWf { + call SimulateReadsForVariants +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SplitNCigarReads_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SplitNCigarReads_3.6.wdl new file mode 100644 index 0000000..7fd8138 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SplitNCigarReads_3.6.wdl @@ -0,0 +1,66 @@ +# -------------------------------------------------------------------------------------------- +# This SplitNCigarReads WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Splits reads that contain Ns in their CIGAR string +# -------------------------------------------------------------------------------------------- + +task SplitNCigarReads { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] intervals + String unsafe + Boolean ? doNotFixOverhangs + Int ? maxBasesInOverhang + Int ? maxMismatchesInOverhang + Int ? maxReadsInMemory + String ? out + + command { + java -jar ${gatk} \ + -T SplitNCigarReads \ + -R ${ref} \ + --input_file ${input_file} \ + --intervals ${intervals} \ + --unsafe ${unsafe} \ + -doNotFixOverhangs ${default="false" doNotFixOverhangs} \ + -maxOverhang ${default="40" maxBasesInOverhang} \ + -maxMismatches ${default="1" maxMismatchesInOverhang} \ + -maxInMemory ${default="150000" maxReadsInMemory} \ + -o ${default="stdout" out} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + doNotFixOverhangs: "do not have the walker hard-clip overhanging sections of the reads" + maxBasesInOverhang: "max number of bases allowed in the overhang" + maxMismatchesInOverhang: "max number of mismatches allowed in the overhang" + maxReadsInMemory: "max reads allowed to be kept in memory at a time by the BAM writer" + out: "Write output to this BAM filename instead of STDOUT" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + unsafe: "Enable unsafe operations: nothing will be checked at runtime" + } +} + +workflow SplitNCigarReadsWf { + call SplitNCigarReads +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SplitSamFile_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SplitSamFile_3.6.wdl new file mode 100644 index 0000000..7e9d43c --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/SplitSamFile_3.6.wdl @@ -0,0 +1,51 @@ +# -------------------------------------------------------------------------------------------- +# This SplitSamFile WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Split a BAM file by sample +# -------------------------------------------------------------------------------------------- + +task SplitSamFile { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + String ? outputRoot + + command { + java -jar ${gatk} \ + -T SplitSamFile \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + outputRoot ${default="" outputRoot} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + outputRoot: "output BAM file" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow SplitSamFileWf { + call SplitSamFile +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/UnifiedGenotyper_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/UnifiedGenotyper_3.6.wdl new file mode 100644 index 0000000..d6c48db --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/UnifiedGenotyper_3.6.wdl @@ -0,0 +1,151 @@ +# -------------------------------------------------------------------------------------------- +# This UnifiedGenotyper WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Call SNPs and indels on a per-locus basis +# -------------------------------------------------------------------------------------------- + +task UnifiedGenotyper { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] input_file + Array[String] ? intervals + File ? BQSR + Int ? nctVal + Int ? ntVal + String ? alleles + Boolean ? allSitePLs + Boolean ? annotateNDA + Array[String] ? annotation + Array[String] ? comp + Boolean ? computeSLOD + File ? contamination_fraction_per_sample_file + Float ? contamination_fraction_to_filter + String ? dbsnp + Array[String] ? excludeAnnotation + String ? genotype_likelihoods_model + String ? genotyping_mode + String ? group + Float ? heterozygosity + Float ? indel_heterozygosity + String ? indelGapContinuationPenalty + String ? indelGapOpenPenalty + Array[Float] ? input_prior + Int ? max_alternate_alleles + Float ? max_deletion_fraction + Int ? max_num_PL_values + Int ? min_base_quality_score + Int ? min_indel_count_for_genotyping + Float ? min_indel_fraction_per_sample + String ? onlyEmitSamples + String ? out + String ? output_mode + String ? pair_hmm_implementation + Float ? pcr_error_rate + Int ? sample_ploidy + Float ? standard_min_confidence_threshold_for_calling + Float ? standard_min_confidence_threshold_for_emitting + + command { + java -jar ${gatk} \ + -T UnifiedGenotyper \ + -R ${ref} \ + --input_file ${input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="" "--BQSR " + BQSR} \ + ${default="" "-nct" + nctVal} \ + ${default="" "-nt" + ntVal} \ + ${default="" "-alleles " + alleles} \ + -allSitePLs ${default="false" allSitePLs} \ + -nda ${default="false" annotateNDA} \ + -A ${default="[]" annotation} \ + -comp ${default="[]" comp} \ + -slod ${default="false" computeSLOD} \ + ${default="" "-contaminationFile " + contamination_fraction_per_sample_file} \ + -contamination ${default="0.0" contamination_fraction_to_filter} \ + ${default="" "-D " + dbsnp} \ + -XA ${default="[]" excludeAnnotation} \ + -glm ${default="SNP" genotype_likelihoods_model} \ + -gt_mode ${default="DISCOVERY" genotyping_mode} \ + -G ${default="[Standard, StandardUG]" group} \ + -hets ${default="0.001" heterozygosity} \ + -indelHeterozygosity ${default="1.25E-4" indel_heterozygosity} \ + -indelGCP ${default="10" indelGapContinuationPenalty} \ + -indelGOP ${default="45" indelGapOpenPenalty} \ + -inputPrior ${default="[]" input_prior} \ + -maxAltAlleles ${default="6" max_alternate_alleles} \ + -deletions ${default="0.05" max_deletion_fraction} \ + -maxNumPLValues ${default="100" max_num_PL_values} \ + -mbq ${default="17" min_base_quality_score} \ + -minIndelCnt ${default="5" min_indel_count_for_genotyping} \ + -minIndelFrac ${default="0.25" min_indel_fraction_per_sample} \ + -onlyEmitSamples ${default="[]" onlyEmitSamples} \ + -o ${default="stdout" out} \ + -out_mode ${default="EMIT_VARIANTS_ONLY" output_mode} \ + -pairHMM ${default="LOGLESS_CACHING" pair_hmm_implementation} \ + -pcr_error ${default="1.0E-4" pcr_error_rate} \ + -ploidy ${default="2" sample_ploidy} \ + -stand_call_conf ${default="30.0" standard_min_confidence_threshold_for_calling} \ + -stand_emit_conf ${default="30.0" standard_min_confidence_threshold_for_emitting} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + alleles: "The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES" + allSitePLs: "Annotate all sites with PLs" + annotateNDA: "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site" + annotation: "One or more specific annotations to apply to variant calls" + comp: "Comparison VCF file" + computeSLOD: "If provided, we will calculate the SLOD (SB annotation)" + contamination_fraction_per_sample_file: "Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be (Contamination is double) per line; No header." + contamination_fraction_to_filter: "Fraction of contamination in sequencing data (for all samples) to aggressively remove" + dbsnp: "dbSNP file" + excludeAnnotation: "One or more specific annotations to exclude" + genotype_likelihoods_model: "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together" + genotyping_mode: "Specifies how to determine the alternate alleles to use for genotyping" + group: "One or more classes/groups of annotations to apply to variant calls. The single value 'none' removes the default group" + heterozygosity: "Heterozygosity value used to compute prior likelihoods for any locus" + indel_heterozygosity: "Heterozygosity for indel calling" + indelGapContinuationPenalty: "Indel gap continuation penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10" + indelGapOpenPenalty: "Indel gap open penalty, as Phred-scaled probability. I.e., 30 => 10^-30/10" + input_prior: "Input prior for calls" + max_alternate_alleles: "Maximum number of alternate alleles to genotype" + max_deletion_fraction: "Maximum fraction of reads with deletions spanning this locus for it to be callable" + max_num_PL_values: "Maximum number of PL values to output" + min_base_quality_score: "Minimum base quality required to consider a base for calling" + min_indel_count_for_genotyping: "Minimum number of consensus indels required to trigger genotyping run" + min_indel_fraction_per_sample: "Minimum fraction of all reads at a locus that must contain an indel (of any allele) for that sample to contribute to the indel count for alleles" + onlyEmitSamples: "If provided, only these samples will be emitted into the VCF, regardless of which samples are present in the BAM file" + out: "File to which variants should be written" + output_mode: "Specifies which type of calls we should output" + pair_hmm_implementation: "The PairHMM implementation to use for -glm INDEL genotype likelihood calculations" + pcr_error_rate: "The PCR error rate to be used for computing fragment-based likelihoods" + sample_ploidy: "Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)." + standard_min_confidence_threshold_for_calling: "The minimum phred-scaled confidence threshold at which variants should be called" + standard_min_confidence_threshold_for_emitting: "The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold)" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + BQSR: "Input covariates table file for on-the-fly base quality score recalibration" + } +} + +workflow UnifiedGenotyperWf { + call UnifiedGenotyper +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ValidateVariants_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ValidateVariants_3.6.wdl new file mode 100644 index 0000000..69db401 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ValidateVariants_3.6.wdl @@ -0,0 +1,63 @@ +# -------------------------------------------------------------------------------------------- +# This ValidateVariants WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Validate a VCF file with an extra strict set of criteria +# -------------------------------------------------------------------------------------------- + +task ValidateVariants { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + String ? dbsnp + Boolean ? doNotValidateFilteredRecords + Boolean ? validateGVCF + Array[String] ? validationTypeToExclude + String variant + Boolean ? warnOnErrors + + command { + java -jar ${gatk} \ + -T ValidateVariants \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-D " + dbsnp} \ + -doNotValidateFilteredRecords ${default="false" doNotValidateFilteredRecords} \ + -gvcf ${default="false" validateGVCF} \ + -Xtype ${default="[]" validationTypeToExclude} \ + -V ${variant} \ + -warnOnErrors ${default="false" warnOnErrors} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + dbsnp: "dbSNP file" + doNotValidateFilteredRecords: "skip validation on filtered records" + validateGVCF: "Validate this file as a GVCF" + validationTypeToExclude: "which validation type to exclude from a full strict validation" + variant: "Input VCF file" + warnOnErrors: "just emit warnings on errors instead of terminating the run at the first instance" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow ValidateVariantsWf { + call ValidateVariants +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ValidationSiteSelector_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ValidationSiteSelector_3.6.wdl new file mode 100644 index 0000000..74b6174 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/ValidationSiteSelector_3.6.wdl @@ -0,0 +1,84 @@ +# -------------------------------------------------------------------------------------------- +# This ValidationSiteSelector WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Randomly select variant records according to specified options +# -------------------------------------------------------------------------------------------- + +task ValidationSiteSelector { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + String ? frequencySelectionMode + Boolean ? ignoreGenotypes + Boolean ? ignorePolymorphicStatus + Boolean ? includeFilteredSites + Int numValidationSites + String ? out + String ? sample_expressions + String ? sample_file + String ? sample_name + String ? sampleMode + Float ? samplePNonref + Array[String] ? selectTypeToInclude + Array[String] variant + + command { + java -jar ${gatk} \ + -T ValidationSiteSelector \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -freqMode ${default="KEEP_AF_SPECTRUM" frequencySelectionMode} \ + -ignoreGenotypes ${default="false" ignoreGenotypes} \ + -ignorePolymorphicStatus ${default="false" ignorePolymorphicStatus} \ + -ifs ${default="false" includeFilteredSites} \ + -numSites ${numValidationSites} \ + -o ${default="stdout" out} \ + ${default="" "-se " + sample_expressions} \ + ${default="" "-sf " + sample_file} \ + -sn ${default="[]" sample_name} \ + -sampleMode ${default="NONE" sampleMode} \ + -samplePNonref ${default="0.99" samplePNonref} \ + -selectType ${default="[]" selectTypeToInclude} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + frequencySelectionMode: "Allele Frequency selection mode" + ignoreGenotypes: "If true, will ignore genotypes in VCF, will take AC,AF from annotations and will make no sample selection" + ignorePolymorphicStatus: "If true, will ignore polymorphic status in VCF, and will take VCF record directly without pre-selection" + includeFilteredSites: "If true, will include filtered sites in set to choose variants from" + numValidationSites: "Number of output validation sites" + out: "File to which variants should be written" + sample_expressions: "Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times" + sample_file: "File containing a list of samples (one per line) to include. Can be specified multiple times" + sample_name: "Include genotypes from this sample. Can be specified multiple times" + sampleMode: "Sample selection mode" + samplePNonref: "GL-based selection mode only: the probability that a site is non-reference in the samples for which to include the site" + selectTypeToInclude: "Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times" + variant: "Input VCF file, can be specified multiple times" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow ValidationSiteSelectorWf { + call ValidationSiteSelector +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantAnnotator_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantAnnotator_3.6.wdl new file mode 100644 index 0000000..d05a7fb --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantAnnotator_3.6.wdl @@ -0,0 +1,95 @@ +# -------------------------------------------------------------------------------------------- +# This VariantAnnotator WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Annotate variant calls with context information +# -------------------------------------------------------------------------------------------- + +task VariantAnnotator { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? input_file + Array[String] ? intervals + Int ? ntVal + Boolean ? alwaysAppendDbsnpId + Array[String] ? annotation + Array[String] ? comp + String ? dbsnp + Array[String] ? excludeAnnotation + String ? expression + Array[String] ? group + Boolean ? list + Float ? MendelViolationGenotypeQualityThreshold + String ? out + Array[String] ? resource + Boolean ? resourceAlleleConcordance + String ? snpEffFile + Boolean ? useAllAnnotations + String variant + + command { + java -jar ${gatk} \ + -T VariantAnnotator \ + -R ${ref} \ + ${default="" "--input_file " + input_file} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nt" + ntVal} \ + -alwaysAppendDbsnpId ${default="false" alwaysAppendDbsnpId} \ + -A ${default="[]" annotation} \ + -comp ${default="[]" comp} \ + ${default="" "-D " + dbsnp} \ + -XA ${default="[]" excludeAnnotation} \ + -E ${default="{}" expression} \ + -G ${default="[]" group} \ + -ls ${default="false" list} \ + -mvq ${default="0.0" MendelViolationGenotypeQualityThreshold} \ + -o ${default="stdout" out} \ + -resource ${default="[]" resource} \ + -rac ${default="false" resourceAlleleConcordance} \ + ${default="" "-snpEffFile " + snpEffFile} \ + -all ${default="false" useAllAnnotations} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + alwaysAppendDbsnpId: "Add dbSNP ID even if one is already present" + annotation: "One or more specific annotations to apply to variant calls" + comp: "Comparison VCF file" + dbsnp: "dbSNP file" + excludeAnnotation: "One or more specific annotations to exclude" + expression: "One or more specific expressions to apply to variant calls" + group: "One or more classes/groups of annotations to apply to variant calls" + list: "List the available annotations and exit" + MendelViolationGenotypeQualityThreshold: "GQ threshold for annotating MV ratio" + out: "File to which variants should be written" + resource: "External resource VCF file" + resourceAlleleConcordance: "Check for allele concordances when using an external resource VCF file" + snpEffFile: "SnpEff file from which to get annotations" + useAllAnnotations: "Use all possible annotations (not for the faint of heart)" + variant: "Input VCF file" + input_file: "Input file containing sequence data (BAM or CRAM)" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow VariantAnnotatorWf { + call VariantAnnotator +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantEval_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantEval_3.6.wdl new file mode 100644 index 0000000..b192bf7 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantEval_3.6.wdl @@ -0,0 +1,116 @@ +# -------------------------------------------------------------------------------------------- +# This VariantEval WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more) +# -------------------------------------------------------------------------------------------- + +task VariantEval { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Int ? ntVal + File ? ancestralAlignments + Array[String] ? comp + String ? dbsnp + Boolean ? doNotUseAllStandardModules + Boolean ? doNotUseAllStandardStratifications + Array[String] eval + String ? evalModule + String ? goldStandard + Boolean ? keepAC0 + String ? known_names + String ? knownCNVs + Boolean ? list + Float ? mendelianViolationQualThreshold + Boolean ? mergeEvals + Float ? minPhaseQuality + String ? out + Boolean ? requireStrictAlleleMatch + String ? sample + Int ? samplePloidy + Array[String] ? select_exps + Array[String] ? select_names + String ? stratificationModule + String ? stratIntervals + + command { + java -jar ${gatk} \ + -T VariantEval \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nt" + ntVal} \ + ${default="" "-aa " + ancestralAlignments} \ + -comp ${default="[]" comp} \ + ${default="" "-D " + dbsnp} \ + -noEV ${default="false" doNotUseAllStandardModules} \ + -noST ${default="false" doNotUseAllStandardStratifications} \ + -eval ${eval} \ + -EV ${default="[]" evalModule} \ + ${default="" "-gold " + goldStandard} \ + -keepAC0 ${default="false" keepAC0} \ + -knownName ${default="[]" known_names} \ + ${default="" "-knownCNVs " + knownCNVs} \ + -ls ${default="false" list} \ + -mvq ${default="50.0" mendelianViolationQualThreshold} \ + -mergeEvals ${default="false" mergeEvals} \ + -mpq ${default="10.0" minPhaseQuality} \ + -o ${default="stdout" out} \ + -strict ${default="false" requireStrictAlleleMatch} \ + ${default="" "-sn " + sample} \ + -ploidy ${default="2" samplePloidy} \ + -select ${default="[]" select_exps} \ + -selectName ${default="[]" select_names} \ + -ST ${default="[]" stratificationModule} \ + ${default="" "-stratIntervals " + stratIntervals} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + ancestralAlignments: "Fasta file with ancestral alleles" + comp: "Input comparison file(s)" + dbsnp: "dbSNP file" + doNotUseAllStandardModules: "Do not use the standard modules by default (instead, only those that are specified with the -EV option)" + doNotUseAllStandardStratifications: "Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)" + eval: "Input evaluation file(s)" + evalModule: "One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noEV is specified)" + goldStandard: "Evaluations that count calls at sites of true variation (e.g., indel calls) will use this argument as their gold standard for comparison" + keepAC0: "If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes" + known_names: "Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets" + knownCNVs: "File containing tribble-readable features describing a known list of copy number variants" + list: "List the available eval modules and exit" + mendelianViolationQualThreshold: "Minimum genotype QUAL score for each trio member required to accept a site as a violation. Default is 50." + mergeEvals: "If provided, all -eval tracks will be merged into a single eval track" + minPhaseQuality: "Minimum phasing quality" + out: "An output file created by the walker. Will overwrite contents if file exists" + requireStrictAlleleMatch: "If provided only comp and eval tracks with exactly matching reference and alternate alleles will be counted as overlapping" + sample: "Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context" + samplePloidy: "Per-sample ploidy (number of chromosomes per sample)" + select_exps: "One or more stratifications to use when evaluating the data" + select_names: "Names to use for the list of stratifications (must be a 1-to-1 mapping)" + stratificationModule: "One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified)" + stratIntervals: "File containing tribble-readable features for the IntervalStratificiation" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow VariantEvalWf { + call VariantEval +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantFiltration_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantFiltration_3.6.wdl new file mode 100644 index 0000000..5cf5f12 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantFiltration_3.6.wdl @@ -0,0 +1,96 @@ +# -------------------------------------------------------------------------------------------- +# This VariantFiltration WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Filter variant calls based on INFO and FORMAT annotations +# -------------------------------------------------------------------------------------------- + +task VariantFiltration { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Int ? clusterSize + Int ? clusterWindowSize + Array[String] ? filterExpression + Array[String] ? filterName + Boolean ? filterNotInMask + Array[String] ? genotypeFilterExpression + Array[String] ? genotypeFilterName + Boolean ? invalidatePreviousFilters + Boolean ? invertFilterExpression + Boolean ? invertGenotypeFilterExpression + String ? mask + Int ? maskExtension + String ? maskName + Boolean ? missingValuesInExpressionsShouldEvaluateAsFailing + String ? out + Boolean ? setFilteredGtToNocall + String variant + + command { + java -jar ${gatk} \ + -T VariantFiltration \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -cluster ${default="3" clusterSize} \ + -window ${default="0" clusterWindowSize} \ + -filter ${default="[]" filterExpression} \ + -filterName ${default="[]" filterName} \ + -filterNotInMask ${default="false" filterNotInMask} \ + -G_filter ${default="[]" genotypeFilterExpression} \ + -G_filterName ${default="[]" genotypeFilterName} \ + invalidatePreviousFilters ${default="false" invalidatePreviousFilters} \ + -invfilter ${default="false" invertFilterExpression} \ + -invG_filter ${default="false" invertGenotypeFilterExpression} \ + ${default="" "-mask " + mask} \ + -maskExtend ${default="0" maskExtension} \ + -maskName ${default="Mask" maskName} \ + missingValuesInExpressionsShouldEvaluateAsFailing ${default="false" missingValuesInExpressionsShouldEvaluateAsFailing} \ + -o ${default="stdout" out} \ + setFilteredGtToNocall ${default="false" setFilteredGtToNocall} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + clusterSize: "The number of SNPs which make up a cluster" + clusterWindowSize: "The window size (in bases) in which to evaluate clustered SNPs" + filterExpression: "One or more expression used with INFO fields to filter" + filterName: "Names to use for the list of filters" + filterNotInMask: "Filter records NOT in given input mask." + genotypeFilterExpression: "One or more expression used with FORMAT (sample/genotype-level) fields to filter (see documentation guide for more info)" + genotypeFilterName: "Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered" + invalidatePreviousFilters: "Remove previous filters applied to the VCF" + invertFilterExpression: "Invert the selection criteria for --filterExpression" + invertGenotypeFilterExpression: "Invert the selection criteria for --genotypeFilterExpression" + mask: "Input ROD mask" + maskExtension: "How many bases beyond records from a provided 'mask' rod should variants be filtered" + maskName: "The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call" + missingValuesInExpressionsShouldEvaluateAsFailing: "When evaluating the JEXL expressions, missing values should be considered failing the expression" + out: "File to which variants should be written" + setFilteredGtToNocall: "Set filtered genotypes to no-call" + variant: "Input VCF file" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow VariantFiltrationWf { + call VariantFiltration +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantRecalibrator_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantRecalibrator_3.6.wdl new file mode 100644 index 0000000..524e914 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantRecalibrator_3.6.wdl @@ -0,0 +1,134 @@ +# -------------------------------------------------------------------------------------------- +# This VariantRecalibrator WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Build a recalibration model to score variant quality for filtering purposes +# -------------------------------------------------------------------------------------------- + +task VariantRecalibrator { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Int ? ntVal + Array[String] ? aggregate + Float ? badLodCutoff + Float ? dirichlet + Boolean ? ignore_all_filters + Array[String] ? ignore_filter + Array[String] task_input + Int ? max_attempts + Int ? maxGaussians + Int ? maxIterations + Int ? maxNegativeGaussians + Int ? maxNumTrainingData + Int ? minNumBadVariants + String mode + String ? model_file + Int ? MQCapForLogitJitterTransform + Int ? numKMeans + Boolean ? output_model + Float ? priorCounts + String recal_file + Array[String] resource + File ? rscript_file + Float ? shrinkage + Float ? stdThreshold + Float ? target_titv + File tranches_file + Boolean ? trustAllPolymorphic + Array[Float] ? TStranche + Array[String] use_annotation + Boolean ? useAlleleSpecificAnnotations + + command { + java -jar ${gatk} \ + -T VariantRecalibrator \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-nt" + ntVal} \ + ${default="" "-aggregate " + aggregate} \ + -badLodCutoff ${default="-5.0" badLodCutoff} \ + -dirichlet ${default="0.001" dirichlet} \ + -ignoreAllFilters ${default="false" ignore_all_filters} \ + -ignoreFilter ${default="[]" ignore_filter} \ + -input ${task_input} \ + -max_attempts ${default="1" max_attempts} \ + -mG ${default="8" maxGaussians} \ + -mI ${default="150" maxIterations} \ + -mNG ${default="2" maxNegativeGaussians} \ + -maxNumTrainingData ${default="2500000" maxNumTrainingData} \ + -minNumBad ${default="1000" minNumBadVariants} \ + -mode ${mode} \ + -modelFile ${default="stdout" model_file} \ + -MQCap ${default="0" MQCapForLogitJitterTransform} \ + -nKM ${default="100" numKMeans} \ + -outputModel ${default="false" output_model} \ + -priorCounts ${default="20.0" priorCounts} \ + -recalFile ${recal_file} \ + -resource ${resource} \ + ${default="" "-rscriptFile " + rscript_file} \ + -shrinkage ${default="1.0" shrinkage} \ + -std ${default="10.0" stdThreshold} \ + -titv ${default="2.15" target_titv} \ + -tranchesFile ${tranches_file} \ + -allPoly ${default="false" trustAllPolymorphic} \ + -tranche ${default="[100.0, 99.9, 99.0, 90.0]" TStranche} \ + -an ${use_annotation} \ + -AS ${default="false" useAlleleSpecificAnnotations} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + aggregate: "Additional raw input variants to be used in building the model" + badLodCutoff: "LOD score cutoff for selecting bad variants" + dirichlet: "The dirichlet parameter in the variational Bayes algorithm." + ignore_all_filters: "If specified, the variant recalibrator will ignore all input filters. Useful to rerun the VQSR from a filtered output file." + ignore_filter: "If specified, the variant recalibrator will also use variants marked as filtered by the specified filter name in the input VCF file" + task_input: "One or more VCFs of raw input variants to be recalibrated" + max_attempts: "Number of attempts to build a model before failing" + maxGaussians: "Max number of Gaussians for the positive model" + maxIterations: "Maximum number of VBEM iterations" + maxNegativeGaussians: "Max number of Gaussians for the negative model" + maxNumTrainingData: "Maximum number of training data" + minNumBadVariants: "Minimum number of bad variants" + mode: "Recalibration mode to employ" + model_file: "A GATKReport containing the positive and negative model fits" + MQCapForLogitJitterTransform: "Apply logit transform and jitter to MQ values" + numKMeans: "Number of k-means iterations" + output_model: "If specified, the variant recalibrator will output the VQSR model fit to the file specified by -modelFile or to stdout" + priorCounts: "The number of prior counts to use in the variational Bayes algorithm." + recal_file: "The output recal file used by ApplyRecalibration" + resource: "A list of sites for which to apply a prior probability of being correct but which aren't used by the algorithm (training and truth sets are required to run)" + rscript_file: "The output rscript file generated by the VQSR to aid in visualization of the input data and learned model" + shrinkage: "The shrinkage parameter in the variational Bayes algorithm." + stdThreshold: "Annotation value divergence threshold (number of standard deviations from the means) " + target_titv: "The expected novel Ti/Tv ratio to use when calculating FDR tranches and for display on the optimization curve output figures. (approx 2.15 for whole genome experiments). ONLY USED FOR PLOTTING PURPOSES!" + tranches_file: "The output tranches file used by ApplyRecalibration" + trustAllPolymorphic: "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation." + TStranche: "The levels of truth sensitivity at which to slice the data. (in percent, that is 1.0 for 1 percent)" + use_annotation: "The names of the annotations which should used for calculations" + useAlleleSpecificAnnotations: "If specified, the variant recalibrator will attempt to use the allele-specific versions of the specified annotations." + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow VariantRecalibratorWf { + call VariantRecalibrator +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToAllelicPrimitives_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToAllelicPrimitives_3.6.wdl new file mode 100644 index 0000000..013e655 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToAllelicPrimitives_3.6.wdl @@ -0,0 +1,51 @@ +# -------------------------------------------------------------------------------------------- +# This VariantsToAllelicPrimitives WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Simplify multi-nucleotide variants (MNPs) into more basic/primitive alleles. +# -------------------------------------------------------------------------------------------- + +task VariantsToAllelicPrimitives { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + String ? out + String variant + + command { + java -jar ${gatk} \ + -T VariantsToAllelicPrimitives \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -o ${default="stdout" out} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + out: "File to which variants should be written" + variant: "Input VCF file" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow VariantsToAllelicPrimitivesWf { + call VariantsToAllelicPrimitives +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToBinaryPed_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToBinaryPed_3.6.wdl new file mode 100644 index 0000000..612c6a6 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToBinaryPed_3.6.wdl @@ -0,0 +1,75 @@ +# -------------------------------------------------------------------------------------------- +# This VariantsToBinaryPed WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Convert VCF to binary pedigree file +# -------------------------------------------------------------------------------------------- + +task VariantsToBinaryPed { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + String bed + String bim + Boolean ? checkAlternateAlleles + String ? dbsnp + String fam + Boolean ? majorAlleleFirst + File metaData + Int minGenotypeQuality + String ? outputMode + String variant + + command { + java -jar ${gatk} \ + -T VariantsToBinaryPed \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -bed ${bed} \ + -bim ${bim} \ + checkAlternateAlleles ${default="false" checkAlternateAlleles} \ + ${default="" "-D " + dbsnp} \ + -fam ${fam} \ + majorAlleleFirst ${default="false" majorAlleleFirst} \ + -m ${metaData} \ + -mgq ${minGenotypeQuality} \ + -mode ${default="INDIVIDUAL_MAJOR" outputMode} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + bed: "output bed file" + bim: "output map file" + checkAlternateAlleles: "Checks that alternate alleles actually appear in samples, erroring out if they do not" + dbsnp: "dbSNP file" + fam: "output fam file" + majorAlleleFirst: "Sets the major allele to be 'reference' for the bim file, rather than the ref allele" + metaData: "Sample metadata file" + minGenotypeQuality: "If genotype quality is lower than this value, output NO_CALL" + outputMode: "The output file mode (SNP major or individual major)" + variant: "Input VCF file" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow VariantsToBinaryPedWf { + call VariantsToBinaryPed +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToTable_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToTable_3.6.wdl new file mode 100644 index 0000000..f64f662 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToTable_3.6.wdl @@ -0,0 +1,72 @@ +# -------------------------------------------------------------------------------------------- +# This VariantsToTable WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Extract specific fields from a VCF file to a tab-delimited table +# -------------------------------------------------------------------------------------------- + +task VariantsToTable { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + Boolean ? allowMissingData + Array[String] ? fields + Array[String] ? genotypeFields + Int ? maxRecords + Boolean ? moltenize + String ? out + Boolean ? showFiltered + Boolean ? splitMultiAllelic + Array[String] variant + + command { + java -jar ${gatk} \ + -T VariantsToTable \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + -AMD ${default="false" allowMissingData} \ + -F ${default="[]" fields} \ + -GF ${default="[]" genotypeFields} \ + -M ${default="-1" maxRecords} \ + -moltenize ${default="false" moltenize} \ + -o ${default="stdout" out} \ + -raw ${default="false" showFiltered} \ + -SMA ${default="false" splitMultiAllelic} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + allowMissingData: "If provided, we will not require every record to contain every field" + fields: "The name of each field to capture for output in the table" + genotypeFields: "The name of each genotype field to capture for output in the table" + maxRecords: "If provided, we will emit at most maxRecord records to the table" + moltenize: "If provided, we will produce molten output" + out: "File to which results should be written" + showFiltered: "If provided, field values from filtered records will be included in the output" + splitMultiAllelic: "If provided, we will split multi-allelic records into multiple lines of output" + variant: "Input VCF file" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow VariantsToTableWf { + call VariantsToTable +} diff --git a/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToVCF_3.6.wdl b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToVCF_3.6.wdl new file mode 100644 index 0000000..9e4ece2 --- /dev/null +++ b/scripts/wrappers/gatk/GATKToolWorkflows_3.6/VariantsToVCF_3.6.wdl @@ -0,0 +1,57 @@ +# -------------------------------------------------------------------------------------------- +# This VariantsToVCF WDL task was generated on 10/04/16 for use with GATK version 3.6 +# For more information on using this wrapper, please see the WDL repository at +# https://github.com/broadinstitute/wdl/tree/develop/scripts/wrappers/gatk/README.md +# Task Summary: Convert variants from other file formats to VCF format +# -------------------------------------------------------------------------------------------- + +task VariantsToVCF { + File gatk + File ref + File refIndex + File refDict + String ? userString #If a parameter you'd like to use is missing from this task, use this term to add your own string + Array[String] ? intervals + String ? dbsnp + String ? out + String ? sample + String variant + + command { + java -jar ${gatk} \ + -T VariantsToVCF \ + -R ${ref} \ + ${default="" "--intervals " + intervals} \ + ${default="" "-D " + dbsnp} \ + -o ${default="stdout" out} \ + ${default="" "-sample " + sample} \ + -V ${variant} \ + ${default="\n" userString} + } + + output { + #To track additional outputs from your task, please manually add them below + String taskOut = "${out}" + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.2-1466113830" + } + + parameter_meta { + gatk: "Executable jar for the GenomeAnalysisTK" + ref: "fasta file of reference genome" + refIndex: "Index file of reference genome" + refDict: "dict file of reference genome" + userString: "An optional parameter which allows the user to specify additions to the command line at run time" + dbsnp: "dbSNP file" + out: "File to which variants should be written" + sample: "The sample name represented by the variant rod" + variant: "Input variant file" + intervals: "One or more genomic intervals over which to operate" + } +} + +workflow VariantsToVCFWf { + call VariantsToVCF +}