Permalink
...
Checking mergeability…
Don’t worry, you can still create the pull request.
Comparing changes
Open a pull request
- 4 commits
- 18 files changed
- 0 commit comments
- 1 contributor
Unified
Split
Showing
with
2,069 additions
and 0 deletions.
- +7 −0 scripts/other/GrabSamHeaderFromBams_160901.inputs.json
- +63 −0 scripts/other/GrabSamHeaderFromBams_160901.wdl
- +7 −0 scripts/other/PairedFastQFromBams_160901.inputs.json
- +78 −0 scripts/other/PairedFastQFromBams_160901.wdl
- +9 −0 scripts/other/ValidateBams_160902.inputs.json
- +69 −0 scripts/other/ValidateBams_160902.wdl
- +71 −0 scripts/other/WGS_HC_GVCF_160901.inputs.json
- +133 −0 scripts/other/WGS_HC_GVCF_160901.wdl
- +70 −0 scripts/other/WGS_Joint_Analysis_160909.inputs.json
- +182 −0 scripts/other/WGS_Joint_Analysis_160909.wdl
- +98 −0 scripts/other/WGS_PE_SingleSample_LegacyRef_160901.inputs.json
- +746 −0 scripts/other/WGS_PE_SingleSample_LegacyRef_160901.wdl
- +47 −0 scripts/other/WGS_VQSR_160909.inputs.json
- +214 −0 scripts/other/WGS_VQSR_160909.wdl
- +6 −0 scripts/other/generic.options.json
- +30 −0 scripts/other/uBamFromPairedFastQ_160902.inputs.json
- +90 −0 scripts/other/uBamFromPairedFastQ_160902.wdl
- +149 −0 scripts/utilities/create_scatter_intervals.py
View
7
scripts/other/GrabSamHeaderFromBams_160901.inputs.json
| @@ -0,0 +1,7 @@ | ||
| +{ | ||
| + "GrabSamHeaderFromBams.bam_list": [ | ||
| + "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.1.ATCACGAT.20k_reads.bam", | ||
| + "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.2.ATCACGAT.20k_reads.bam", | ||
| + "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06JUADXX130110.1.ATCACGAT.20k_reads.bam" | ||
| + ] | ||
| +} |
View
63
scripts/other/GrabSamHeaderFromBams_160901.wdl
| @@ -0,0 +1,63 @@ | ||
| +## Copyright Broad Institute, 2016 | ||
| +## | ||
| +## This WDL grabs the headers from a list of BAMs | ||
| +## | ||
| +## Requirements/expectations : | ||
| +## - List of valid BAM files | ||
| +## | ||
| +## Runtime parameters are optimized for Broad's Google Cloud Platform implementation. | ||
| +## For program versions, see docker containers. | ||
| +## | ||
| +## LICENSING : | ||
| +## This script is released under the WDL source code license (BSD-3) (see LICENSE in | ||
| +## https://github.com/broadinstitute/wdl). Note however that the programs it calls may | ||
| +## be subject to different licenses. Users are responsible for checking that they are | ||
| +## authorized to run all programs before running this script. Please see the docker | ||
| +## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed | ||
| +## licensing information pertaining to the included programs. | ||
| + | ||
| +# TASK DEFINITIONS | ||
| + | ||
| +# Extract the header from a BAM using samtools | ||
| +task GrabSAMHeader { | ||
| + File bam_file | ||
| + String output_basename | ||
| + | ||
| + command { | ||
| + samtools view -H ${bam_file} > ${output_basename}.txt | ||
| + } | ||
| + runtime { | ||
| + docker: "broadinstitute/genomes-in-the-cloud:2.2.3-1469027018" | ||
| + memory: "1 GB" | ||
| + cpu: "1" | ||
| + disks: "local-disk " + 200 + " HDD" | ||
| + } | ||
| + output { | ||
| + File output_bam = "${output_basename}.txt" | ||
| + } | ||
| +} | ||
| + | ||
| +# WORKFLOW DEFINITION | ||
| +workflow GrabSamHeaderFromBams { | ||
| + Array[File] bam_list | ||
| + | ||
| + # Convert multiple pairs of input fastqs in parallel | ||
| + scatter (input_bam in bam_list) { | ||
| + | ||
| + String sub_strip_path = "gs://.*/" | ||
| + String sub_strip_suffix = ".bam$" | ||
| + | ||
| + # Convert pair of FASTQs to uBAM | ||
| + call GrabSAMHeader { | ||
| + input: | ||
| + bam_file = input_bam, | ||
| + output_basename = sub(sub(input_bam, sub_strip_path, ""), sub_strip_suffix, "") + ".header" | ||
| + } | ||
| + } | ||
| + | ||
| + # Outputs that will be retained when execution is complete | ||
| + output { | ||
| + GrabSAMHeader.* | ||
| + } | ||
| +} | ||
| + |
View
7
scripts/other/PairedFastQFromBams_160901.inputs.json
| @@ -0,0 +1,7 @@ | ||
| +{ | ||
| + "PairedFastQFromBams.bam_list": [ | ||
| + "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.1.ATCACGAT.20k_reads.bam", | ||
| + "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.2.ATCACGAT.20k_reads.bam", | ||
| + "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06JUADXX130110.1.ATCACGAT.20k_reads.bam" | ||
| + ] | ||
| +} |
View
78
scripts/other/PairedFastQFromBams_160901.wdl
| @@ -0,0 +1,78 @@ | ||
| +## Copyright Broad Institute, 2016 | ||
| +## | ||
| +## This WDL converts a list of BAMs to pairs of FASTQs | ||
| +## | ||
| +## Requirements/expectations : | ||
| +## - List of valid BAM files | ||
| +## - Max one readgroup per BAM files. If there are more, the distinctions will be lost. | ||
| +## | ||
| +## Runtime parameters are optimized for Broad's Google Cloud Platform implementation. | ||
| +## For program versions, see docker containers. | ||
| +## | ||
| +## LICENSING : | ||
| +## This script is released under the WDL source code license (BSD-3) (see LICENSE in | ||
| +## https://github.com/broadinstitute/wdl). Note however that the programs it calls may | ||
| +## be subject to different licenses. Users are responsible for checking that they are | ||
| +## authorized to run all programs before running this script. Please see the docker | ||
| +## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed | ||
| +## licensing information pertaining to the included programs. | ||
| + | ||
| +# TASK DEFINITIONS | ||
| + | ||
| +# Run SamToFASTQ to revert the bam | ||
| +task PairedFastQFromBam { | ||
| + File bam_file | ||
| + String fastq_1 | ||
| + String fastq_2 | ||
| + String unpaired | ||
| + | ||
| + command { | ||
| + java -Xmx3000m -jar /usr/gitc/picard.jar \ | ||
| + SamToFastq \ | ||
| + I=${bam_file} \ | ||
| + FASTQ=${fastq_1} \ | ||
| + SECOND_END_FASTQ=${fastq_2} \ | ||
| + UNPAIRED_FASTQ=${unpaired} \ | ||
| + INCLUDE_NON_PRIMARY_ALIGNMENTS=true \ | ||
| + INCLUDE_NON_PF_READS=true | ||
| + } | ||
| + runtime { | ||
| + docker: "broadinstitute/genomes-in-the-cloud:2.2.3-1469027018" | ||
| + memory: "3500 MB" | ||
| + cpu: "1" | ||
| + disks: "local-disk " + 200 + " HDD" | ||
| + } | ||
| + output { | ||
| + File out_fastq_1 = "${fastq_1}" | ||
| + File out_fastq_2 = "${fastq_2}" | ||
| + File out_unpaired = "${unpaired}" | ||
| + } | ||
| +} | ||
| + | ||
| +# WORKFLOW DEFINITION | ||
| +workflow PairedFastQFromBams { | ||
| + Array[File] bam_list | ||
| + | ||
| + # Convert multiple pairs of input fastqs in parallel | ||
| + scatter (input_bam in bam_list) { | ||
| + | ||
| + String sub_strip_path = "gs://.*/" | ||
| + String sub_strip_suffix = ".bam$" | ||
| + File output_basename = sub(sub(input_bam, sub_strip_path, ""), sub_strip_suffix, "") | ||
| + | ||
| + # Convert pair of FASTQs to uBAM | ||
| + call PairedFastQFromBam { | ||
| + input: | ||
| + bam_file = input_bam, | ||
| + fastq_1 = output_basename + "_1.fastq", | ||
| + fastq_2 = output_basename + "_2.fastq", | ||
| + unpaired = output_basename + "_up.fastq" | ||
| + } | ||
| + } | ||
| + | ||
| + # Outputs that will be retained when execution is complete | ||
| + output { | ||
| + PairedFastQFromBam.* | ||
| + } | ||
| +} | ||
| + |
View
9
scripts/other/ValidateBams_160902.inputs.json
| @@ -0,0 +1,9 @@ | ||
| +{ | ||
| + "ValidateBAMs.bam_list": [ | ||
| + "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.1.ATCACGAT.20k_reads.bam", | ||
| + "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.2.ATCACGAT.20k_reads.bam", | ||
| + "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06JUADXX130110.1.ATCACGAT.20k_reads.bam" | ||
| + ], | ||
| + | ||
| + "ValidateBAMs.disk_size": 200 | ||
| +} |
View
69
scripts/other/ValidateBams_160902.wdl
| @@ -0,0 +1,69 @@ | ||
| +## Copyright Broad Institute, 2016 | ||
| +## | ||
| +## This WDL validates a list of BAMs in SUMMARY mode | ||
| +## | ||
| +## Requirements/expectations : | ||
| +## - List of BAM files to validate | ||
| +## | ||
| +## Runtime parameters are optimized for Broad's Google Cloud Platform implementation. | ||
| +## For program versions, see docker containers. | ||
| +## | ||
| +## LICENSING : | ||
| +## This script is released under the WDL source code license (BSD-3) (see LICENSE in | ||
| +## https://github.com/broadinstitute/wdl). Note however that the programs it calls may | ||
| +## be subject to different licenses. Users are responsible for checking that they are | ||
| +## authorized to run all programs before running this script. Please see the docker | ||
| +## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed | ||
| +## licensing information pertaining to the included programs. | ||
| + | ||
| +# TASK DEFINITIONS | ||
| + | ||
| +# Extract the header from a BAM using samtools | ||
| +task ValidateBAM { | ||
| + File bam_file | ||
| + String output_basename | ||
| + Int disk_size | ||
| + | ||
| + command { | ||
| + java -Xmx3000m -jar /usr/gitc/picard.jar \ | ||
| + ValidateSamFile \ | ||
| + I=${bam_file} \ | ||
| + OUTPUT=${output_basename}.txt \ | ||
| + MODE=SUMMARY | ||
| + } | ||
| + runtime { | ||
| + docker: "broadinstitute/genomes-in-the-cloud:2.2.3-1469027018" | ||
| + memory: "1 GB" | ||
| + cpu: "1" | ||
| + disks: "local-disk " + disk_size + " HDD" | ||
| + } | ||
| + output { | ||
| + File output_bam = "${output_basename}.txt" | ||
| + } | ||
| +} | ||
| + | ||
| +# WORKFLOW DEFINITION | ||
| +workflow ValidateBAMs { | ||
| + Array[File] bam_list | ||
| + Int disk_size | ||
| + | ||
| + # Convert multiple pairs of input fastqs in parallel | ||
| + scatter (input_bam in bam_list) { | ||
| + | ||
| + String sub_strip_path = "gs://.*/" | ||
| + String sub_strip_suffix = ".bam$" | ||
| + | ||
| + # Convert pair of FASTQs to uBAM | ||
| + call ValidateBAM { | ||
| + input: | ||
| + bam_file = input_bam, | ||
| + output_basename = sub(sub(input_bam, sub_strip_path, ""), sub_strip_suffix, "") + ".validation" | ||
| + } | ||
| + } | ||
| + | ||
| + # Outputs that will be retained when execution is complete | ||
| + output { | ||
| + ValidateBAM.* | ||
| + } | ||
| +} | ||
| + |
View
71
scripts/other/WGS_HC_GVCF_160901.inputs.json
| @@ -0,0 +1,71 @@ | ||
| +{ | ||
| + "ScatterHaplotypeCaller.sample_basename": "NA12878", | ||
| + "ScatterHaplotypeCaller.input_bam": "gs://dsde-comms-resources/bams_wgs/NA12878.bam", | ||
| + "ScatterHaplotypeCaller.input_bam_index": "gs://dsde-comms-resources/bams_wgs/NA12878.bai", | ||
| + | ||
| + "ScatterHaplotypeCaller.ref_dict": "gs://dsde-comms-resources/legacy_bundles/b37/human_g1k_v37_decoy.dict", | ||
| + "ScatterHaplotypeCaller.ref_fasta": "gs://dsde-comms-resources/legacy_bundles/b37/human_g1k_v37_decoy.fasta", | ||
| + "ScatterHaplotypeCaller.ref_fasta_index": "gs://dsde-comms-resources/legacy_bundles/b37/human_g1k_v37_decoy.fasta.fai", | ||
| + | ||
| + "ScatterHaplotypeCaller.scattered_intervals": [ | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0001_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0002_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0003_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0004_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0005_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0006_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0007_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0008_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0009_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0010_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0011_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0012_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0013_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0014_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0015_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0016_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0017_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0018_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0019_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0020_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0021_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0022_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0023_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0024_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0025_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0026_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0027_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0028_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0029_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0030_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0031_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0032_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0033_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0034_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0035_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0036_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0037_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0038_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0039_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0040_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0041_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0042_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0043_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0044_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0045_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0046_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0047_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0048_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0049_of_50/scattered.interval_list", | ||
| + "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0050_of_50/scattered.interval_list" | ||
| + ], | ||
| + | ||
| + "##_COMMENT5": "DISK SIZES + PREEMPTIBLES", | ||
| + "ScatterHaplotypeCaller.agg_small_disk": 200, | ||
| + "ScatterHaplotypeCaller.agg_medium_disk": 300, | ||
| + "ScatterHaplotypeCaller.agg_large_disk": 400, | ||
| + "ScatterHaplotypeCaller.agg_preemptible_tries": 0, | ||
| + "ScatterHaplotypeCaller.flowcell_small_disk": 200, | ||
| + "ScatterHaplotypeCaller.flowcell_medium_disk": 300, | ||
| + "ScatterHaplotypeCaller.preemptible_tries": 0 | ||
| +} |
Oops, something went wrong.