From 71ffa827c45cfa9cec62122607b3959b9adf14b7 Mon Sep 17 00:00:00 2001 From: bshifaw Date: Fri, 27 Apr 2018 15:01:38 +0000 Subject: [PATCH] Update to gatk4.0.4.0 --- cnv_common_tasks.wdl | 60 ++-- cnv_somatic_oncotator_workflow.wdl | 1 + cnv_somatic_pair_workflow.b37.inputs.json | 4 +- cnv_somatic_pair_workflow.wdl | 315 +++++++++++---------- cnv_somatic_panel_workflow.b37.inputs.json | 2 +- cnv_somatic_panel_workflow.wdl | 6 +- 6 files changed, 213 insertions(+), 175 deletions(-) diff --git a/cnv_common_tasks.wdl b/cnv_common_tasks.wdl index 0b462da..ee8df0b 100644 --- a/cnv_common_tasks.wdl +++ b/cnv_common_tasks.wdl @@ -120,7 +120,7 @@ task CollectCounts { set -e export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override} - gatk --java-options "-Xmx${command_mem_mb}m" CollectFragmentCounts \ + gatk --java-options "-Xmx${command_mem_mb}m" CollectReadCounts \ -L ${intervals} \ --input ${bam} \ --reference ${ref_fasta} \ @@ -235,8 +235,12 @@ task ScatterIntervals { task PostprocessGermlineCNVCalls { String entity_id - Array[File] chunk_path_tars - String sample_index + Array[File] gcnv_calls_tars + Array[File] gcnv_model_tars + File contig_ploidy_calls_tar + Array[String]? allosomal_contigs + Int ref_copy_number_autosomal_contigs + Int sample_index File? gatk4_jar_override # Runtime parameters @@ -250,8 +254,9 @@ task PostprocessGermlineCNVCalls { Int machine_mem_mb = select_first([mem_gb, 7]) * 1000 Int command_mem_mb = machine_mem_mb - 1000 - String sample_directory = "SAMPLE_${sample_index}" #this is a hardcoded convention in gcnvkernel - String vcf_filename = "${entity_id}.vcf.gz" + String genotyped_intervals_vcf_filename = "genotyped-intervals-${entity_id}.vcf.gz" + String genotyped_segments_vcf_filename = "genotyped-segments-${entity_id}.vcf.gz" + Boolean allosomal_contigs_specified = defined(allosomal_contigs) && length(select_first([allosomal_contigs, []])) > 0 String dollar = "$" #WDL workaround for using array[@], see https://github.com/broadinstitute/cromwell/issues/1819 @@ -259,20 +264,40 @@ task PostprocessGermlineCNVCalls { set -e export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override} - #untar chunk_path_tars to CHUNK_0, CHUNK_1, etc. directories and build chunk_paths_command_line="--chunk_path CHUNK_0 ..." - chunk_path_array=(${sep=" " chunk_path_tars}) - chunk_paths_command_line="" - for index in ${dollar}{!chunk_path_array[@]}; do - chunk_path_tar=${dollar}{chunk_path_array[$index]} - mkdir CHUNK_$index - tar xzf $chunk_path_tar -C CHUNK_$index - chunk_paths_command_line="$chunk_paths_command_line --chunk-path CHUNK_$index" + # untar calls to CALLS_0, CALLS_1, etc directories and build the command line + gcnv_calls_tar_array=(${sep=" " gcnv_calls_tars}) + calls_args="" + for index in ${dollar}{!gcnv_calls_tar_array[@]}; do + gcnv_calls_tar=${dollar}{gcnv_calls_tar_array[$index]} + mkdir CALLS_$index + tar xzf $gcnv_calls_tar -C CALLS_$index + calls_args="$calls_args --calls-shard-path CALLS_$index" done + # untar models to MODEL_0, MODEL_1, etc directories and build the command line + gcnv_model_tar_array=(${sep=" " gcnv_model_tars}) + model_args="" + for index in ${dollar}{!gcnv_model_tar_array[@]}; do + gcnv_model_tar=${dollar}{gcnv_model_tar_array[$index]} + mkdir MODEL_$index + tar xzf $gcnv_model_tar -C MODEL_$index + model_args="$model_args --model-shard-path MODEL_$index" + done + + mkdir extracted-contig-ploidy-calls + tar xzf ${contig_ploidy_calls_tar} -C extracted-contig-ploidy-calls + + allosomal_contigs_args="--allosomal-contig ${sep=" --allosomal-contig " allosomal_contigs}" + gatk --java-options "-Xmx${command_mem_mb}m" PostprocessGermlineCNVCalls \ - $chunk_paths_command_line \ - --sample-directory ${sample_directory} \ - --output ${vcf_filename} + $calls_args \ + $model_args \ + ${true="$allosomal_contigs_args" false="" allosomal_contigs_specified} \ + --autosomal-ref-copy-number ${ref_copy_number_autosomal_contigs} \ + --contig-ploidy-calls extracted-contig-ploidy-calls \ + --sample-index ${sample_index} \ + --output-genotyped-intervals ${genotyped_intervals_vcf_filename} \ + --output-genotyped-segments ${genotyped_segments_vcf_filename} >>> runtime { @@ -284,6 +309,7 @@ task PostprocessGermlineCNVCalls { } output { - File vcf = vcf_filename + File genotyped_intervals_vcf = genotyped_intervals_vcf_filename + File genotyped_segments_vcf = genotyped_segments_vcf_filename } } diff --git a/cnv_somatic_oncotator_workflow.wdl b/cnv_somatic_oncotator_workflow.wdl index 34c2c7b..7f0abac 100644 --- a/cnv_somatic_oncotator_workflow.wdl +++ b/cnv_somatic_oncotator_workflow.wdl @@ -77,3 +77,4 @@ task OncotateSegments { File oncotated_called_gene_list_file = "${basename_called_file}.gene_list.txt" } } + diff --git a/cnv_somatic_pair_workflow.b37.inputs.json b/cnv_somatic_pair_workflow.b37.inputs.json index bb7c984..4bfb140 100644 --- a/cnv_somatic_pair_workflow.b37.inputs.json +++ b/cnv_somatic_pair_workflow.b37.inputs.json @@ -14,7 +14,7 @@ "CNVSomaticPairWorkflow.intervals": "gs://gatk-test-data/cnv/somatic/ice_targets.tsv.interval_list", "##_COMMENT3": "Docker", - "CNVSomaticPairWorkflow.gatk_docker": "broadinstitute/gatk:4.0.1.2", + "CNVSomaticPairWorkflow.gatk_docker": "broadinstitute/gatk:4.0.4.0", "##CNVSomaticPairWorkflow.oncotator_docker": "(optional) String?", "##_COMMENT4": "Memory Optional", @@ -64,7 +64,7 @@ "##_COMMENT7": "Misc Optional", "##CNVSomaticPairWorkflow.additional_args_for_oncotator": "(optional) String?", - "##CNVSomaticPairWorkflow.is_run_oncotator": "(optional) Boolean?", + "CNVSomaticPairWorkflow.is_run_oncotator": true, "##CNVSomaticPairWorkflow.ModelSegmentsNormal.normal_allelic_counts": "(optional) File?", "##CNVSomaticPairWorkflow.num_smoothing_iterations_per_fit": "(optional) Int?", "##CNVSomaticPairWorkflow.calling_copy_ratio_z_score_threshold": "(optional) Float?", diff --git a/cnv_somatic_pair_workflow.wdl b/cnv_somatic_pair_workflow.wdl index 791ff83..565fce6 100644 --- a/cnv_somatic_pair_workflow.wdl +++ b/cnv_somatic_pair_workflow.wdl @@ -15,8 +15,7 @@ # - Example invocation: # # java -jar cromwell.jar run cnv_somatic_pair_workflow.wdl -i my_parameters.json -# -#gatk4.0.1.2 release +# ############# import "cnv_common_tasks.wdl" as CNVTasks @@ -31,8 +30,8 @@ workflow CNVSomaticPairWorkflow { File intervals File tumor_bam File tumor_bam_idx - File normal_bam - File normal_bam_idx + File? normal_bam + File? normal_bam_idx File read_count_pon File ref_fasta_dict File ref_fasta_fai @@ -123,12 +122,15 @@ workflow CNVSomaticPairWorkflow { Int ref_size = ceil(size(ref_fasta, "GB") + size(ref_fasta_dict, "GB") + size(ref_fasta_fai, "GB")) Int read_count_pon_size = ceil(size(read_count_pon, "GB")) Int tumor_bam_size = ceil(size(tumor_bam, "GB") + size(tumor_bam_idx, "GB")) - Int normal_bam_size = ceil(size(normal_bam, "GB") + size(normal_bam_idx, "GB")) + Int normal_bam_size = if defined(normal_bam) then ceil(size(normal_bam, "GB") + size(normal_bam_idx, "GB")) else 0 Int gatk4_override_size = if defined(gatk4_jar_override) then ceil(size(gatk4_jar_override, "GB")) else 0 # This is added to every task as padding, should increase if systematically you need more disk for every call Int disk_pad = 20 + ceil(size(intervals, "GB")) + ceil(size(common_sites, "GB")) + gatk4_override_size + select_first([emergency_extra_disk,0]) + File final_normal_bam = select_first([normal_bam, "null"]) + File final_normal_bam_idx = select_first([normal_bam_idx, "null"]) + Int preprocess_intervals_disk = ref_size + disk_pad call CNVTasks.PreprocessIntervals { input: @@ -162,23 +164,6 @@ workflow CNVSomaticPairWorkflow { preemptible_attempts = preemptible_attempts } - Int collect_counts_normal_disk = normal_bam_size + ceil(size(PreprocessIntervals.preprocessed_intervals, "GB")) + disk_pad - call CNVTasks.CollectCounts as CollectCountsNormal { - input: - intervals = PreprocessIntervals.preprocessed_intervals, - bam = normal_bam, - bam_idx = normal_bam_idx, - ref_fasta = ref_fasta, - ref_fasta_fai = ref_fasta_fai, - ref_fasta_dict = ref_fasta_dict, - format = format, - gatk4_jar_override = gatk4_jar_override, - gatk_docker = gatk_docker, - mem_gb = mem_gb_for_collect_counts, - disk_space_gb = collect_counts_normal_disk, - preemptible_attempts = preemptible_attempts - } - Int collect_allelic_counts_tumor_disk = tumor_bam_size + ref_size + disk_pad call CNVTasks.CollectAllelicCounts as CollectAllelicCountsTumor { input: @@ -196,23 +181,6 @@ workflow CNVSomaticPairWorkflow { preemptible_attempts = preemptible_attempts } - Int collect_allelic_counts_normal_disk = normal_bam_size + ref_size + disk_pad - call CNVTasks.CollectAllelicCounts as CollectAllelicCountsNormal { - input: - common_sites = common_sites, - bam = normal_bam, - bam_idx = normal_bam_idx, - ref_fasta = ref_fasta, - ref_fasta_dict = ref_fasta_dict, - ref_fasta_fai = ref_fasta_fai, - minimum_base_quality = minimum_base_quality, - gatk4_jar_override = gatk4_jar_override, - gatk_docker = gatk_docker, - mem_gb = mem_gb_for_collect_allelic_counts, - disk_space_gb = collect_allelic_counts_normal_disk, - preemptible_attempts = preemptible_attempts - } - Int denoise_read_counts_tumor_disk = read_count_pon_size + ceil(size(CollectCountsTumor.counts, "GB")) + disk_pad call DenoiseReadCounts as DenoiseReadCountsTumor { input: @@ -227,21 +195,8 @@ workflow CNVSomaticPairWorkflow { preemptible_attempts = preemptible_attempts } - Int denoise_read_counts_normal_disk = read_count_pon_size + ceil(size(CollectCountsNormal.counts, "GB")) + disk_pad - call DenoiseReadCounts as DenoiseReadCountsNormal { - input: - entity_id = CollectCountsNormal.entity_id, - read_counts = CollectCountsNormal.counts, - read_count_pon = read_count_pon, - number_of_eigensamples = number_of_eigensamples, - gatk4_jar_override = gatk4_jar_override, - gatk_docker = gatk_docker, - mem_gb = mem_gb_for_denoise_read_counts, - disk_space_gb = denoise_read_counts_normal_disk, - preemptible_attempts = preemptible_attempts - } - - Int model_segments_disk = ceil(size(DenoiseReadCountsTumor.denoised_copy_ratios, "GB")) + ceil(size(CollectAllelicCountsTumor.allelic_counts, "GB")) + ceil(size(CollectAllelicCountsNormal.allelic_counts, "GB")) + disk_pad + Int model_segments_normal_portion = if defined(normal_bam) then ceil(size(CollectAllelicCountsNormal.allelic_counts, "GB")) else 0 + Int model_segments_tumor_disk = ceil(size(DenoiseReadCountsTumor.denoised_copy_ratios, "GB")) + ceil(size(CollectAllelicCountsTumor.allelic_counts, "GB")) + model_segments_normal_portion + disk_pad call ModelSegments as ModelSegmentsTumor { input: entity_id = CollectCountsTumor.entity_id, @@ -270,38 +225,7 @@ workflow CNVSomaticPairWorkflow { gatk4_jar_override = gatk4_jar_override, gatk_docker = gatk_docker, mem_gb = mem_gb_for_model_segments, - disk_space_gb = model_segments_disk, - preemptible_attempts = preemptible_attempts - } - - call ModelSegments as ModelSegmentsNormal { - input: - entity_id = CollectCountsNormal.entity_id, - denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios, - allelic_counts = CollectAllelicCountsNormal.allelic_counts, - max_num_segments_per_chromosome = max_num_segments_per_chromosome, - min_total_allele_count = min_total_allele_count, - genotyping_homozygous_log_ratio_threshold = genotyping_homozygous_log_ratio_threshold, - genotyping_base_error_rate = genotyping_base_error_rate, - kernel_variance_copy_ratio = kernel_variance_copy_ratio, - kernel_variance_allele_fraction = kernel_variance_allele_fraction, - kernel_scaling_allele_fraction = kernel_scaling_allele_fraction, - kernel_approximation_dimension = kernel_approximation_dimension, - window_sizes = window_sizes, - num_changepoints_penalty_factor = num_changepoints_penalty_factor, - minor_allele_fraction_prior_alpha = minor_allele_fraction_prior_alpha, - num_samples_copy_ratio = num_samples_copy_ratio, - num_burn_in_copy_ratio = num_burn_in_copy_ratio, - num_samples_allele_fraction = num_samples_allele_fraction, - num_burn_in_allele_fraction = num_burn_in_allele_fraction, - smoothing_threshold_copy_ratio = smoothing_threshold_copy_ratio, - smoothing_threshold_allele_fraction = smoothing_threshold_allele_fraction, - max_num_smoothing_iterations = max_num_smoothing_iterations, - num_smoothing_iterations_per_fit = num_smoothing_iterations_per_fit, - gatk4_jar_override = gatk4_jar_override, - gatk_docker = gatk_docker, - mem_gb = mem_gb_for_model_segments, - disk_space_gb = model_segments_disk, + disk_space_gb = model_segments_tumor_disk, preemptible_attempts = preemptible_attempts } @@ -321,22 +245,6 @@ workflow CNVSomaticPairWorkflow { preemptible_attempts = preemptible_attempts } - Int copy_ratio_segments_normal_disk = ceil(size(DenoiseReadCountsNormal.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsNormal.copy_ratio_only_segments, "GB")) + disk_pad - call CallCopyRatioSegments as CallCopyRatioSegmentsNormal { - input: - entity_id = CollectCountsNormal.entity_id, - copy_ratio_segments = ModelSegmentsNormal.copy_ratio_only_segments, - neutral_segment_copy_ratio_lower_bound = neutral_segment_copy_ratio_lower_bound, - neutral_segment_copy_ratio_upper_bound = neutral_segment_copy_ratio_upper_bound, - outlier_neutral_segment_copy_ratio_z_score_threshold = outlier_neutral_segment_copy_ratio_z_score_threshold, - calling_copy_ratio_z_score_threshold = calling_copy_ratio_z_score_threshold, - gatk4_jar_override = gatk4_jar_override, - gatk_docker = gatk_docker, - mem_gb = mem_gb_for_call_copy_ratio_segments, - disk_space_gb = copy_ratio_segments_normal_disk, - preemptible_attempts = preemptible_attempts - } - # The F=files from other tasks are small enough to just combine into one disk variable and pass to the tumor plotting tasks Int plot_tumor_disk = ref_size + ceil(size(DenoiseReadCountsTumor.standardized_copy_ratios, "GB")) + ceil(size(DenoiseReadCountsTumor.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsTumor.het_allelic_counts, "GB")) + ceil(size(ModelSegmentsTumor.modeled_segments, "GB")) + disk_pad call PlotDenoisedCopyRatios as PlotDenoisedCopyRatiosTumor { @@ -353,22 +261,6 @@ workflow CNVSomaticPairWorkflow { preemptible_attempts = preemptible_attempts } - # The files from other tasks are small enough to just combine into one disk variable and pass to the normal plotting tasks - Int plot_normal_disk = ref_size + ceil(size(DenoiseReadCountsNormal.standardized_copy_ratios, "GB")) + ceil(size(DenoiseReadCountsNormal.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsNormal.het_allelic_counts, "GB")) + ceil(size(ModelSegmentsNormal.modeled_segments, "GB")) + disk_pad - call PlotDenoisedCopyRatios as PlotDenoisedCopyRatiosNormal { - input: - entity_id = CollectCountsNormal.entity_id, - standardized_copy_ratios = DenoiseReadCountsNormal.standardized_copy_ratios, - denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios, - ref_fasta_dict = ref_fasta_dict, - minimum_contig_length = minimum_contig_length, - gatk4_jar_override = gatk4_jar_override, - gatk_docker = gatk_docker, - mem_gb = mem_gb_for_plotting, - disk_space_gb = plot_normal_disk, - preemptible_attempts = preemptible_attempts - } - call PlotModeledSegments as PlotModeledSegmentsTumor { input: entity_id = CollectCountsTumor.entity_id, @@ -384,19 +276,133 @@ workflow CNVSomaticPairWorkflow { preemptible_attempts = preemptible_attempts } - call PlotModeledSegments as PlotModeledSegmentsNormal { - input: - entity_id = CollectCountsNormal.entity_id, - denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios, - het_allelic_counts = ModelSegmentsNormal.het_allelic_counts, - modeled_segments = ModelSegmentsNormal.modeled_segments, - ref_fasta_dict = ref_fasta_dict, - minimum_contig_length = minimum_contig_length, - gatk4_jar_override = gatk4_jar_override, - gatk_docker = gatk_docker, - mem_gb = mem_gb_for_plotting, - disk_space_gb = plot_normal_disk, - preemptible_attempts = preemptible_attempts + Int collect_counts_normal_disk = normal_bam_size + ceil(size(PreprocessIntervals.preprocessed_intervals, "GB")) + disk_pad + if (defined(normal_bam)) { + call CNVTasks.CollectCounts as CollectCountsNormal { + input: + intervals = PreprocessIntervals.preprocessed_intervals, + bam = final_normal_bam, + bam_idx = final_normal_bam_idx, + ref_fasta = ref_fasta, + ref_fasta_fai = ref_fasta_fai, + ref_fasta_dict = ref_fasta_dict, + format = format, + gatk4_jar_override = gatk4_jar_override, + gatk_docker = gatk_docker, + mem_gb = mem_gb_for_collect_counts, + disk_space_gb = collect_counts_normal_disk, + preemptible_attempts = preemptible_attempts + } + + Int collect_allelic_counts_normal_disk = normal_bam_size + ref_size + disk_pad + call CNVTasks.CollectAllelicCounts as CollectAllelicCountsNormal { + input: + common_sites = common_sites, + bam = final_normal_bam, + bam_idx = final_normal_bam_idx, + ref_fasta = ref_fasta, + ref_fasta_dict = ref_fasta_dict, + ref_fasta_fai = ref_fasta_fai, + minimum_base_quality = minimum_base_quality, + gatk4_jar_override = gatk4_jar_override, + gatk_docker = gatk_docker, + mem_gb = mem_gb_for_collect_allelic_counts, + disk_space_gb = collect_allelic_counts_normal_disk, + preemptible_attempts = preemptible_attempts + } + + Int denoise_read_counts_normal_disk = read_count_pon_size + ceil(size(CollectCountsNormal.counts, "GB")) + disk_pad + call DenoiseReadCounts as DenoiseReadCountsNormal { + input: + entity_id = CollectCountsNormal.entity_id, + read_counts = CollectCountsNormal.counts, + read_count_pon = read_count_pon, + number_of_eigensamples = number_of_eigensamples, + gatk4_jar_override = gatk4_jar_override, + gatk_docker = gatk_docker, + mem_gb = mem_gb_for_denoise_read_counts, + disk_space_gb = denoise_read_counts_normal_disk, + preemptible_attempts = preemptible_attempts + } + + Int model_segments_normal_disk = ceil(size(DenoiseReadCountsNormal.denoised_copy_ratios, "GB")) + ceil(size(CollectAllelicCountsNormal.allelic_counts, "GB")) + disk_pad + call ModelSegments as ModelSegmentsNormal { + input: + entity_id = CollectCountsNormal.entity_id, + denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios, + allelic_counts = CollectAllelicCountsNormal.allelic_counts, + max_num_segments_per_chromosome = max_num_segments_per_chromosome, + min_total_allele_count = min_total_allele_count, + genotyping_homozygous_log_ratio_threshold = genotyping_homozygous_log_ratio_threshold, + genotyping_base_error_rate = genotyping_base_error_rate, + kernel_variance_copy_ratio = kernel_variance_copy_ratio, + kernel_variance_allele_fraction = kernel_variance_allele_fraction, + kernel_scaling_allele_fraction = kernel_scaling_allele_fraction, + kernel_approximation_dimension = kernel_approximation_dimension, + window_sizes = window_sizes, + num_changepoints_penalty_factor = num_changepoints_penalty_factor, + minor_allele_fraction_prior_alpha = minor_allele_fraction_prior_alpha, + num_samples_copy_ratio = num_samples_copy_ratio, + num_burn_in_copy_ratio = num_burn_in_copy_ratio, + num_samples_allele_fraction = num_samples_allele_fraction, + num_burn_in_allele_fraction = num_burn_in_allele_fraction, + smoothing_threshold_copy_ratio = smoothing_threshold_copy_ratio, + smoothing_threshold_allele_fraction = smoothing_threshold_allele_fraction, + max_num_smoothing_iterations = max_num_smoothing_iterations, + num_smoothing_iterations_per_fit = num_smoothing_iterations_per_fit, + gatk4_jar_override = gatk4_jar_override, + gatk_docker = gatk_docker, + mem_gb = mem_gb_for_model_segments, + disk_space_gb = model_segments_normal_disk, + preemptible_attempts = preemptible_attempts + } + + Int copy_ratio_segments_normal_disk = ceil(size(DenoiseReadCountsNormal.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsNormal.copy_ratio_only_segments, "GB")) + disk_pad + call CallCopyRatioSegments as CallCopyRatioSegmentsNormal { + input: + entity_id = CollectCountsNormal.entity_id, + copy_ratio_segments = ModelSegmentsNormal.copy_ratio_only_segments, + neutral_segment_copy_ratio_lower_bound = neutral_segment_copy_ratio_lower_bound, + neutral_segment_copy_ratio_upper_bound = neutral_segment_copy_ratio_upper_bound, + outlier_neutral_segment_copy_ratio_z_score_threshold = outlier_neutral_segment_copy_ratio_z_score_threshold, + calling_copy_ratio_z_score_threshold = calling_copy_ratio_z_score_threshold, + gatk4_jar_override = gatk4_jar_override, + gatk_docker = gatk_docker, + mem_gb = mem_gb_for_call_copy_ratio_segments, + disk_space_gb = copy_ratio_segments_normal_disk, + preemptible_attempts = preemptible_attempts + } + + # The files from other tasks are small enough to just combine into one disk variable and pass to the normal plotting tasks + Int plot_normal_disk = ref_size + ceil(size(DenoiseReadCountsNormal.standardized_copy_ratios, "GB")) + ceil(size(DenoiseReadCountsNormal.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsNormal.het_allelic_counts, "GB")) + ceil(size(ModelSegmentsNormal.modeled_segments, "GB")) + disk_pad + call PlotDenoisedCopyRatios as PlotDenoisedCopyRatiosNormal { + input: + entity_id = CollectCountsNormal.entity_id, + standardized_copy_ratios = DenoiseReadCountsNormal.standardized_copy_ratios, + denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios, + ref_fasta_dict = ref_fasta_dict, + minimum_contig_length = minimum_contig_length, + gatk4_jar_override = gatk4_jar_override, + gatk_docker = gatk_docker, + mem_gb = mem_gb_for_plotting, + disk_space_gb = plot_normal_disk, + preemptible_attempts = preemptible_attempts + } + + call PlotModeledSegments as PlotModeledSegmentsNormal { + input: + entity_id = CollectCountsNormal.entity_id, + denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios, + het_allelic_counts = ModelSegmentsNormal.het_allelic_counts, + modeled_segments = ModelSegmentsNormal.modeled_segments, + ref_fasta_dict = ref_fasta_dict, + minimum_contig_length = minimum_contig_length, + gatk4_jar_override = gatk4_jar_override, + gatk_docker = gatk_docker, + mem_gb = mem_gb_for_plotting, + disk_space_gb = plot_normal_disk, + preemptible_attempts = preemptible_attempts + } } if (select_first([is_run_oncotator, false])) { @@ -437,29 +443,29 @@ workflow CNVSomaticPairWorkflow { File scaled_delta_MAD_tumor = PlotDenoisedCopyRatiosTumor.scaled_delta_MAD File modeled_segments_plot_tumor = PlotModeledSegmentsTumor.modeled_segments_plot - File read_counts_entity_id_normal = CollectCountsNormal.entity_id - File read_counts_normal = CollectCountsNormal.counts - File allelic_counts_entity_id_normal = CollectAllelicCountsNormal.entity_id - File allelic_counts_normal = CollectAllelicCountsNormal.allelic_counts - File denoised_copy_ratios_normal = DenoiseReadCountsNormal.denoised_copy_ratios - File standardized_copy_ratios_normal = DenoiseReadCountsNormal.standardized_copy_ratios - File het_allelic_counts_normal = ModelSegmentsNormal.het_allelic_counts - File normal_het_allelic_counts_normal = ModelSegmentsNormal.normal_het_allelic_counts - File copy_ratio_only_segments_normal = ModelSegmentsNormal.copy_ratio_only_segments - File modeled_segments_begin_normal = ModelSegmentsNormal.modeled_segments_begin - File copy_ratio_parameters_begin_normal = ModelSegmentsNormal.copy_ratio_parameters_begin - File allele_fraction_parameters_begin_normal = ModelSegmentsNormal.allele_fraction_parameters_begin - File modeled_segments_normal = ModelSegmentsNormal.modeled_segments - File copy_ratio_parameters_normal = ModelSegmentsNormal.copy_ratio_parameters - File allele_fraction_parameters_normal = ModelSegmentsNormal.allele_fraction_parameters - File called_copy_ratio_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_segments - File denoised_copy_ratios_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_plot - File denoised_copy_ratios_lim_4_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_lim_4_plot - File standardized_MAD_normal = PlotDenoisedCopyRatiosNormal.standardized_MAD - File denoised_MAD_normal = PlotDenoisedCopyRatiosNormal.denoised_MAD - File delta_MAD_normal = PlotDenoisedCopyRatiosNormal.delta_MAD - File scaled_delta_MAD_normal = PlotDenoisedCopyRatiosNormal.scaled_delta_MAD - File modeled_segments_plot_normal = PlotModeledSegmentsNormal.modeled_segments_plot + File? read_counts_entity_id_normal = CollectCountsNormal.entity_id + File? read_counts_normal = CollectCountsNormal.counts + File? allelic_counts_entity_id_normal = CollectAllelicCountsNormal.entity_id + File? allelic_counts_normal = CollectAllelicCountsNormal.allelic_counts + File? denoised_copy_ratios_normal = DenoiseReadCountsNormal.denoised_copy_ratios + File? standardized_copy_ratios_normal = DenoiseReadCountsNormal.standardized_copy_ratios + File? het_allelic_counts_normal = ModelSegmentsNormal.het_allelic_counts + File? normal_het_allelic_counts_normal = ModelSegmentsNormal.normal_het_allelic_counts + File? copy_ratio_only_segments_normal = ModelSegmentsNormal.copy_ratio_only_segments + File? modeled_segments_begin_normal = ModelSegmentsNormal.modeled_segments_begin + File? copy_ratio_parameters_begin_normal = ModelSegmentsNormal.copy_ratio_parameters_begin + File? allele_fraction_parameters_begin_normal = ModelSegmentsNormal.allele_fraction_parameters_begin + File? modeled_segments_normal = ModelSegmentsNormal.modeled_segments + File? copy_ratio_parameters_normal = ModelSegmentsNormal.copy_ratio_parameters + File? allele_fraction_parameters_normal = ModelSegmentsNormal.allele_fraction_parameters + File? called_copy_ratio_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_segments + File? denoised_copy_ratios_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_plot + File? denoised_copy_ratios_lim_4_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_lim_4_plot + File? standardized_MAD_normal = PlotDenoisedCopyRatiosNormal.standardized_MAD + File? denoised_MAD_normal = PlotDenoisedCopyRatiosNormal.denoised_MAD + File? delta_MAD_normal = PlotDenoisedCopyRatiosNormal.delta_MAD + File? scaled_delta_MAD_normal = PlotDenoisedCopyRatiosNormal.scaled_delta_MAD + File? modeled_segments_plot_normal = PlotModeledSegmentsNormal.modeled_segments_plot File oncotated_called_file_tumor = select_first([CNVOncotatorWorkflow.oncotated_called_file, "null"]) File oncotated_called_gene_list_file_tumor = select_first([CNVOncotatorWorkflow.oncotated_called_gene_list_file, "null"]) @@ -761,3 +767,4 @@ task PlotModeledSegments { File modeled_segments_plot = "${output_dir_}/${entity_id}.modeled.png" } } + diff --git a/cnv_somatic_panel_workflow.b37.inputs.json b/cnv_somatic_panel_workflow.b37.inputs.json index ac80969..21b818b 100644 --- a/cnv_somatic_panel_workflow.b37.inputs.json +++ b/cnv_somatic_panel_workflow.b37.inputs.json @@ -11,7 +11,7 @@ "CNVSomaticPanelWorkflow.intervals": "gs://gatk-test-data/cnv/somatic/ice_targets.tsv.interval_list", "##_COMMENT3": "Docker", - "CNVSomaticPanelWorkflow.gatk_docker": "broadinstitute/gatk:4.0.1.2", + "CNVSomaticPanelWorkflow.gatk_docker": "broadinstitute/gatk:4.0.4.0", "##_COMMENT4": "Disk Size Optional", "##CNVSomaticPanelWorkflow.AnnotateIntervals.disk_space_gb": "(optional) Int?", diff --git a/cnv_somatic_panel_workflow.wdl b/cnv_somatic_panel_workflow.wdl index dec38a1..f17d27a 100644 --- a/cnv_somatic_panel_workflow.wdl +++ b/cnv_somatic_panel_workflow.wdl @@ -13,7 +13,6 @@ # # java -jar cromwell.jar run cnv_somatic_panel_workflow.wdl -i my_parameters.json # -# gatk4.0.1.2 tag release ############# import "cnv_common_tasks.wdl" as CNVTasks @@ -69,6 +68,7 @@ workflow CNVSomaticPanelWorkflow { Boolean? do_impute_zeros Float? extreme_outlier_truncation_percentile Int? number_of_eigensamples + Int? maximum_chunk_size Int? mem_gb_for_create_read_count_pon Array[Pair[String, String]] normal_bams_and_bais = zip(normal_bams, normal_bais) @@ -129,6 +129,7 @@ workflow CNVSomaticPanelWorkflow { do_impute_zeros = do_impute_zeros, extreme_outlier_truncation_percentile = extreme_outlier_truncation_percentile, number_of_eigensamples = number_of_eigensamples, + maximum_chunk_size = maximum_chunk_size, annotated_intervals = AnnotateIntervals.annotated_intervals, gatk4_jar_override = gatk4_jar_override, gatk_docker = gatk_docker, @@ -154,6 +155,7 @@ task CreateReadCountPanelOfNormals { Boolean? do_impute_zeros Float? extreme_outlier_truncation_percentile Int? number_of_eigensamples + Int? maximum_chunk_size File? annotated_intervals #do not perform explicit GC correction by default File? gatk4_jar_override @@ -181,6 +183,7 @@ task CreateReadCountPanelOfNormals { --do-impute-zeros ${default="true" do_impute_zeros} \ --extreme-outlier-truncation-percentile ${default="0.1" extreme_outlier_truncation_percentile} \ --number-of-eigensamples ${default="20" number_of_eigensamples} \ + --maximum-chunk-size ${default="16777216" maximum_chunk_size} \ ${"--annotated-intervals " + annotated_intervals} \ --output ${pon_entity_id}.pon.hdf5 >>> @@ -197,3 +200,4 @@ task CreateReadCountPanelOfNormals { File read_count_pon = "${pon_entity_id}.pon.hdf5" } } +