Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also .

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also .
...
Checking mergeability… Don’t worry, you can still create the pull request.
  • 4 commits
  • 18 files changed
  • 0 commit comments
  • 1 contributor
@@ -0,0 +1,7 @@
+{
+ "GrabSamHeaderFromBams.bam_list": [
+ "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.1.ATCACGAT.20k_reads.bam",
+ "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.2.ATCACGAT.20k_reads.bam",
+ "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06JUADXX130110.1.ATCACGAT.20k_reads.bam"
+ ]
+}
@@ -0,0 +1,63 @@
+## Copyright Broad Institute, 2016
+##
+## This WDL grabs the headers from a list of BAMs
+##
+## Requirements/expectations :
+## - List of valid BAM files
+##
+## Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
+## For program versions, see docker containers.
+##
+## LICENSING :
+## This script is released under the WDL source code license (BSD-3) (see LICENSE in
+## https://github.com/broadinstitute/wdl). Note however that the programs it calls may
+## be subject to different licenses. Users are responsible for checking that they are
+## authorized to run all programs before running this script. Please see the docker
+## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
+## licensing information pertaining to the included programs.
+
+# TASK DEFINITIONS
+
+# Extract the header from a BAM using samtools
+task GrabSAMHeader {
+ File bam_file
+ String output_basename
+
+ command {
+ samtools view -H ${bam_file} > ${output_basename}.txt
+ }
+ runtime {
+ docker: "broadinstitute/genomes-in-the-cloud:2.2.3-1469027018"
+ memory: "1 GB"
+ cpu: "1"
+ disks: "local-disk " + 200 + " HDD"
+ }
+ output {
+ File output_bam = "${output_basename}.txt"
+ }
+}
+
+# WORKFLOW DEFINITION
+workflow GrabSamHeaderFromBams {
+ Array[File] bam_list
+
+ # Convert multiple pairs of input fastqs in parallel
+ scatter (input_bam in bam_list) {
+
+ String sub_strip_path = "gs://.*/"
+ String sub_strip_suffix = ".bam$"
+
+ # Convert pair of FASTQs to uBAM
+ call GrabSAMHeader {
+ input:
+ bam_file = input_bam,
+ output_basename = sub(sub(input_bam, sub_strip_path, ""), sub_strip_suffix, "") + ".header"
+ }
+ }
+
+ # Outputs that will be retained when execution is complete
+ output {
+ GrabSAMHeader.*
+ }
+}
+
@@ -0,0 +1,7 @@
+{
+ "PairedFastQFromBams.bam_list": [
+ "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.1.ATCACGAT.20k_reads.bam",
+ "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.2.ATCACGAT.20k_reads.bam",
+ "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06JUADXX130110.1.ATCACGAT.20k_reads.bam"
+ ]
+}
@@ -0,0 +1,78 @@
+## Copyright Broad Institute, 2016
+##
+## This WDL converts a list of BAMs to pairs of FASTQs
+##
+## Requirements/expectations :
+## - List of valid BAM files
+## - Max one readgroup per BAM files. If there are more, the distinctions will be lost.
+##
+## Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
+## For program versions, see docker containers.
+##
+## LICENSING :
+## This script is released under the WDL source code license (BSD-3) (see LICENSE in
+## https://github.com/broadinstitute/wdl). Note however that the programs it calls may
+## be subject to different licenses. Users are responsible for checking that they are
+## authorized to run all programs before running this script. Please see the docker
+## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
+## licensing information pertaining to the included programs.
+
+# TASK DEFINITIONS
+
+# Run SamToFASTQ to revert the bam
+task PairedFastQFromBam {
+ File bam_file
+ String fastq_1
+ String fastq_2
+ String unpaired
+
+ command {
+ java -Xmx3000m -jar /usr/gitc/picard.jar \
+ SamToFastq \
+ I=${bam_file} \
+ FASTQ=${fastq_1} \
+ SECOND_END_FASTQ=${fastq_2} \
+ UNPAIRED_FASTQ=${unpaired} \
+ INCLUDE_NON_PRIMARY_ALIGNMENTS=true \
+ INCLUDE_NON_PF_READS=true
+ }
+ runtime {
+ docker: "broadinstitute/genomes-in-the-cloud:2.2.3-1469027018"
+ memory: "3500 MB"
+ cpu: "1"
+ disks: "local-disk " + 200 + " HDD"
+ }
+ output {
+ File out_fastq_1 = "${fastq_1}"
+ File out_fastq_2 = "${fastq_2}"
+ File out_unpaired = "${unpaired}"
+ }
+}
+
+# WORKFLOW DEFINITION
+workflow PairedFastQFromBams {
+ Array[File] bam_list
+
+ # Convert multiple pairs of input fastqs in parallel
+ scatter (input_bam in bam_list) {
+
+ String sub_strip_path = "gs://.*/"
+ String sub_strip_suffix = ".bam$"
+ File output_basename = sub(sub(input_bam, sub_strip_path, ""), sub_strip_suffix, "")
+
+ # Convert pair of FASTQs to uBAM
+ call PairedFastQFromBam {
+ input:
+ bam_file = input_bam,
+ fastq_1 = output_basename + "_1.fastq",
+ fastq_2 = output_basename + "_2.fastq",
+ unpaired = output_basename + "_up.fastq"
+ }
+ }
+
+ # Outputs that will be retained when execution is complete
+ output {
+ PairedFastQFromBam.*
+ }
+}
+
@@ -0,0 +1,9 @@
+{
+ "ValidateBAMs.bam_list": [
+ "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.1.ATCACGAT.20k_reads.bam",
+ "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06HDADXX130110.2.ATCACGAT.20k_reads.bam",
+ "gs://genomics-public-data/test-data/dna/wgs/hiseq2500/NA12878/H06JUADXX130110.1.ATCACGAT.20k_reads.bam"
+ ],
+
+ "ValidateBAMs.disk_size": 200
+}
@@ -0,0 +1,69 @@
+## Copyright Broad Institute, 2016
+##
+## This WDL validates a list of BAMs in SUMMARY mode
+##
+## Requirements/expectations :
+## - List of BAM files to validate
+##
+## Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
+## For program versions, see docker containers.
+##
+## LICENSING :
+## This script is released under the WDL source code license (BSD-3) (see LICENSE in
+## https://github.com/broadinstitute/wdl). Note however that the programs it calls may
+## be subject to different licenses. Users are responsible for checking that they are
+## authorized to run all programs before running this script. Please see the docker
+## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
+## licensing information pertaining to the included programs.
+
+# TASK DEFINITIONS
+
+# Extract the header from a BAM using samtools
+task ValidateBAM {
+ File bam_file
+ String output_basename
+ Int disk_size
+
+ command {
+ java -Xmx3000m -jar /usr/gitc/picard.jar \
+ ValidateSamFile \
+ I=${bam_file} \
+ OUTPUT=${output_basename}.txt \
+ MODE=SUMMARY
+ }
+ runtime {
+ docker: "broadinstitute/genomes-in-the-cloud:2.2.3-1469027018"
+ memory: "1 GB"
+ cpu: "1"
+ disks: "local-disk " + disk_size + " HDD"
+ }
+ output {
+ File output_bam = "${output_basename}.txt"
+ }
+}
+
+# WORKFLOW DEFINITION
+workflow ValidateBAMs {
+ Array[File] bam_list
+ Int disk_size
+
+ # Convert multiple pairs of input fastqs in parallel
+ scatter (input_bam in bam_list) {
+
+ String sub_strip_path = "gs://.*/"
+ String sub_strip_suffix = ".bam$"
+
+ # Convert pair of FASTQs to uBAM
+ call ValidateBAM {
+ input:
+ bam_file = input_bam,
+ output_basename = sub(sub(input_bam, sub_strip_path, ""), sub_strip_suffix, "") + ".validation"
+ }
+ }
+
+ # Outputs that will be retained when execution is complete
+ output {
+ ValidateBAM.*
+ }
+}
+
@@ -0,0 +1,71 @@
+{
+ "ScatterHaplotypeCaller.sample_basename": "NA12878",
+ "ScatterHaplotypeCaller.input_bam": "gs://dsde-comms-resources/bams_wgs/NA12878.bam",
+ "ScatterHaplotypeCaller.input_bam_index": "gs://dsde-comms-resources/bams_wgs/NA12878.bai",
+
+ "ScatterHaplotypeCaller.ref_dict": "gs://dsde-comms-resources/legacy_bundles/b37/human_g1k_v37_decoy.dict",
+ "ScatterHaplotypeCaller.ref_fasta": "gs://dsde-comms-resources/legacy_bundles/b37/human_g1k_v37_decoy.fasta",
+ "ScatterHaplotypeCaller.ref_fasta_index": "gs://dsde-comms-resources/legacy_bundles/b37/human_g1k_v37_decoy.fasta.fai",
+
+ "ScatterHaplotypeCaller.scattered_intervals": [
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0001_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0002_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0003_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0004_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0005_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0006_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0007_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0008_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0009_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0010_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0011_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0012_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0013_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0014_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0015_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0016_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0017_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0018_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0019_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0020_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0021_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0022_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0023_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0024_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0025_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0026_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0027_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0028_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0029_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0030_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0031_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0032_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0033_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0034_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0035_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0036_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0037_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0038_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0039_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0040_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0041_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0042_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0043_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0044_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0045_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0046_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0047_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0048_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0049_of_50/scattered.interval_list",
+ "gs://dsde-comms-resources/legacy_bundles/b37/scattered_wgs_intervals_b37/temp_0050_of_50/scattered.interval_list"
+ ],
+
+ "##_COMMENT5": "DISK SIZES + PREEMPTIBLES",
+ "ScatterHaplotypeCaller.agg_small_disk": 200,
+ "ScatterHaplotypeCaller.agg_medium_disk": 300,
+ "ScatterHaplotypeCaller.agg_large_disk": 400,
+ "ScatterHaplotypeCaller.agg_preemptible_tries": 0,
+ "ScatterHaplotypeCaller.flowcell_small_disk": 200,
+ "ScatterHaplotypeCaller.flowcell_medium_disk": 300,
+ "ScatterHaplotypeCaller.preemptible_tries": 0
+}
Oops, something went wrong.

No commit comments for this range