diff --git a/scripts/broad_dsde_workflows/RevertBamToUnmappedRGBamsWf_170107.inputs.json b/scripts/broad_dsde_workflows/RevertBamToUnmappedRGBamsWf_170107.inputs.json deleted file mode 100644 index 5511207..0000000 --- a/scripts/broad_dsde_workflows/RevertBamToUnmappedRGBamsWf_170107.inputs.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - - "RevertBamToUnmappedRGBamsWf.ref_fasta": "gs://gatk-legacy-bundles/b37/human_g1k_v37_decoy.fasta", - "RevertBamToUnmappedRGBamsWf.ref_fasta_index": "gs://gatk-legacy-bundles/b37/human_g1k_v37_decoy.fasta.fai", - - "RevertBamToUnmappedRGBamsWf.input_bam": "gs://gatk-test-data/wgs_bam/NA12878_20k_b37/NA12878.bam", - - "RevertBamToUnmappedRGBamsWf.output_dir": ".", - - "RevertBamToUnmappedRGBamsWf.RevertBamToUnmappedRGBams.max_discard_pct": 0.01, - - "RevertBamToUnmappedRGBamsWf.RevertBamToUnmappedRGBams.disk_size": 10, - "RevertBamToUnmappedRGBamsWf.RevertBamToUnmappedRGBams.mem_size": "1 GB", - "RevertBamToUnmappedRGBamsWf.SortBamByQueryname.disk_size": 10, - "RevertBamToUnmappedRGBamsWf.SortBamByQueryname.mem_size": "3500 MB" -} diff --git a/scripts/broad_dsde_workflows/RevertBamToUnmappedRGBamsWf_170306.inputs.json b/scripts/broad_dsde_workflows/RevertBamToUnmappedRGBamsWf_170306.inputs.json new file mode 100644 index 0000000..51d8fc1 --- /dev/null +++ b/scripts/broad_dsde_workflows/RevertBamToUnmappedRGBamsWf_170306.inputs.json @@ -0,0 +1,16 @@ +{ + + "RevertBamToUnmappedRGBamsWf.ref_fasta": "gs://gatk-legacy-bundles/b37/human_g1k_v37_decoy.fasta", + "RevertBamToUnmappedRGBamsWf.ref_fasta_index": "gs://gatk-legacy-bundles/b37/human_g1k_v37_decoy.fasta.fai", + + "RevertBamToUnmappedRGBamsWf.input_bam": "gs://gatk-test-data/wgs_bam/NA12878_20k_b37/NA12878.bam", + "RevertBamToUnmappedRGBamsWf.SplitReadsByRG.input_bam_index": "gs://gatk-test-data/wgs_bam/NA12878_20k_b37/NA12878.bai", + + "RevertBamToUnmappedRGBamsWf.RevertBamToUnmapped.max_discard_pct": 0.01, + + "RevertBamToUnmappedRGBamsWf.SplitReadsByRG.disk_size": "10", + "RevertBamToUnmappedRGBamsWf.SplitReadsByRG.mem_size": "1 GB", + + "RevertBamToUnmappedRGBamsWf.RevertBamToUnmapped.disk_size": 10, + "RevertBamToUnmappedRGBamsWf.RevertBamToUnmapped.mem_size": "1 GB" +} diff --git a/scripts/broad_dsde_workflows/RevertBamToUnmappedRGBamsWf_170107.wdl b/scripts/broad_dsde_workflows/RevertBamToUnmappedRGBamsWf_170306.wdl similarity index 55% rename from scripts/broad_dsde_workflows/RevertBamToUnmappedRGBamsWf_170107.wdl rename to scripts/broad_dsde_workflows/RevertBamToUnmappedRGBamsWf_170306.wdl index b75682b..48327d8 100644 --- a/scripts/broad_dsde_workflows/RevertBamToUnmappedRGBamsWf_170107.wdl +++ b/scripts/broad_dsde_workflows/RevertBamToUnmappedRGBamsWf_170306.wdl @@ -26,20 +26,46 @@ # TASK DEFINITIONS -# Revert a BAM to uBAMs, one per readgroup -task RevertBamToUnmappedRGBams { +# Split sample BAM into per-readgroup BAMs +task SplitReadsByRG { File input_bam - String output_dir + File input_bam_index + Int disk_size + String mem_size + + command { + java -Xmx4000m -jar /usr/gitc/GATK4.jar \ + SplitReads \ + -I ${input_bam} \ + -O . \ + -RG + } + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.5-1486412288" + disks: "local-disk " + disk_size + " HDD" + memory: mem_size + } + output { + Array[File] readgroup_bams = glob("*.bam") + } +} + +# Revert a BAM to uBAM +task RevertBamToUnmapped { + File input_bam + String output_basename Float? max_discard_pct Int disk_size String mem_size + String output_name = "${output_basename}.bam" + command { - java -Xmx1000m -jar /usr/gitc/picard.jar \ + java -Xmx4000m -jar /usr/gitc/picard.jar \ RevertSam \ INPUT=${input_bam} \ - O=${output_dir} \ - OUTPUT_BY_READGROUP=true \ + O=${output_name} \ + OUTPUT_BY_READGROUP=false \ VALIDATION_STRINGENCY=LENIENT \ SANITIZE=TRUE \ MAX_DISCARD_FRACTION=${max_discard_pct} \ @@ -47,12 +73,12 @@ task RevertBamToUnmappedRGBams { SORT_ORDER=queryname } runtime { - docker: "broadinstitute/genomes-in-the-cloud:2.2.3-1469027018" + docker: "broadinstitute/genomes-in-the-cloud:2.2.5-1486412288" disks: "local-disk " + disk_size + " HDD" memory: mem_size } output { - Array[File] unmapped_bams = glob("*.bam") + File unmapped_bam = "${output_name}" } } @@ -61,17 +87,28 @@ workflow RevertBamToUnmappedRGBamsWf { File input_bam File ref_fasta File ref_fasta_index - String output_dir - # Revert inputs to unmapped - call RevertBamToUnmappedRGBams { - input: - input_bam = input_bam, - output_dir = output_dir + # Split input BAM by readgroup + call SplitReadsByRG { + input: + input_bam = input_bam + } + + scatter (readgroup_bam in SplitReadsByRG.readgroup_bams) { + + String sub_strip_path = "gs://.*/" + String sub_strip_suffix = ".bam$" + + # Revert readgroup BAMs to unmapped + call RevertBamToUnmapped { + input: + input_bam = readgroup_bam, + output_basename = sub(sub(input_bam, sub_strip_path, ""), sub_strip_suffix, "") + ".unmapped" + } } # Outputs that will be retained when execution is complete output { - Array[File] unmapped_bams_output=RevertBamToUnmappedRGBams.unmapped_bams + Array[File] unmapped_bams_output=RevertBamToUnmapped.unmapped_bam } }