From 075d82d97f509b6431be49f75207ce2a73404f58 Mon Sep 17 00:00:00 2001 From: Jonn Smith Date: Thu, 23 May 2024 13:10:24 -0400 Subject: [PATCH] Cleaned up some outputs and wdl linting warnings. --- wdl/pipelines/ILMN/Alignment/SRFlowcell.wdl | 2 +- .../ILMN/VariantCalling/SRWholeGenome.wdl | 555 ++++++++---------- .../TechAgnostic/Utility/TrainCnnFilters.wdl | 4 +- .../TechAgnostic/VariantCalling/LRCNVs.wdl | 2 +- .../SRJointCallGVCFsWithGenomicsDB.wdl | 21 +- wdl/tasks/QC/AlignedMetrics.wdl | 4 +- wdl/tasks/QC/FastQC.wdl | 2 +- wdl/tasks/Utility/SRUtils.wdl | 6 +- wdl/tasks/Utility/Utils.wdl | 4 +- wdl/tasks/Utility/VariantUtils.wdl | 46 +- wdl/tasks/VariantCalling/HaplotypeCaller.wdl | 4 +- .../VariantCalling/SRJointGenotyping.wdl | 4 +- wdl/tasks/Visualization/NanoPlot.wdl | 6 +- .../Pf_Niare_HaplotypeCaller.wdl | 18 +- 14 files changed, 289 insertions(+), 389 deletions(-) diff --git a/wdl/pipelines/ILMN/Alignment/SRFlowcell.wdl b/wdl/pipelines/ILMN/Alignment/SRFlowcell.wdl index 6af33fe47..064692914 100644 --- a/wdl/pipelines/ILMN/Alignment/SRFlowcell.wdl +++ b/wdl/pipelines/ILMN/Alignment/SRFlowcell.wdl @@ -158,7 +158,7 @@ workflow SRFlowcell { } } - File merged_bam = select_first([t_005_AlignReads.bam, t_006_MergeBamAlignment.bam]) + File merged_bam = select_first([t_006_MergeBamAlignment.bam, t_005_AlignReads.bam]) # Mark Duplicates call SRUTIL.MarkDuplicates as t_007_MarkDuplicates { diff --git a/wdl/pipelines/ILMN/VariantCalling/SRWholeGenome.wdl b/wdl/pipelines/ILMN/VariantCalling/SRWholeGenome.wdl index 20f720d1d..65e780d19 100644 --- a/wdl/pipelines/ILMN/VariantCalling/SRWholeGenome.wdl +++ b/wdl/pipelines/ILMN/VariantCalling/SRWholeGenome.wdl @@ -3,7 +3,7 @@ version 1.0 import "../../../tasks/Utility/Utils.wdl" as Utils import "../../../tasks/Utility/SRUtils.wdl" as SRUTIL import "../../../tasks/Utility/VariantUtils.wdl" as VARUTIL -import "../../../tasks/QC/FastQC.wdl" as FastQC +import "../../../tasks/QC/FastQC.wdl" as FASTQC import "../../../tasks/Utility/Finalize.wdl" as FF import "../../../tasks/QC/AlignedMetrics.wdl" as AM import "../../../tasks/VariantCalling/CallVariantsIllumina.wdl" as VAR @@ -23,12 +23,7 @@ workflow SRWholeGenome { ref_map_file: "Reference map file indicating reference sequence and auxillary file locations" participant_name: "The unique identifier of this sample being processed." - call_small_variants: "If true, will call small variants with DeepVariant." - run_HC_analysis: "If true, will run HaplotypeCaller to produce variant calls. Either this or `run_dv_pepper_analysis` are required." - run_dv_pepper_analysis: "If true, will run DeepVariant to produce variant calls. Either this or `run_HC_analysis` are required." enable_hc_pileup_mode: "If true, will enable `pileup mode` in HaplotypeCaller." - dvp_threads: "Number of threads to use for DeepVariant." - dvp_memory: "Amount of memory (Gb) to use for DeepVariant." ploidy: "Ploidy of the species being variant called." heterozygosity: "HaplotypeCaller Parameter - Heterozygosity value used to compute prior likelihoods for any locus. See the GATKDocs for full details on the meaning of this population genetics concept" heterozygosity_stdev: "HaplotypeCaller Parameter - Standard deviation of heterozygosity for SNP and indel calling." @@ -72,23 +67,14 @@ workflow SRWholeGenome { String? gcs_out_root_dir - Boolean call_small_variants = true - - Boolean run_HC_analysis = true - Boolean run_dv_pepper_analysis = true - Boolean enable_hc_pileup_mode = true - Int dvp_threads = 32 - Int dvp_memory = 128 - Int ploidy = 2 Float heterozygosity = 0.001 Float heterozygosity_stdev = 0.01 Float indel_heterozygosity = 0.000125 - Float snp_calibration_sensitivity = 0.99 Int snp_max_unlabeled_variants = 0 Array[String] snp_recalibration_annotation_values = [ "BaseQRankSum", "ExcessHet", "FS", "HAPCOMP", "HAPDOM", "HEC", "MQ", "MQRankSum", "QD", "ReadPosRankSum", "SOR", "DP" ] @@ -135,7 +121,7 @@ workflow SRWholeGenome { if (length(aligned_bams) > 1) { # Collect sample-level metrics: call AM.SamStatsMap as SamStats { input: bam = bam } - call FastQC.FastQC as FastQC { input: bam = bam, bai = bai } + call FASTQC.FastQC as FastQC { input: bam = bam, bai = bai } call Utils.ComputeGenomeLength as ComputeGenomeLength { input: fasta = ref_map['fasta'] } call SRUTIL.ComputeBamStats as ComputeBamStats { input: bam_file = bam } @@ -167,332 +153,282 @@ workflow SRWholeGenome { #################################################################################################### - # Some input handling: - if ((!run_dv_pepper_analysis) && (!run_HC_analysis)) { - call Utils.StopWorkflow as short_variant_caller_analysis_not_provided { - input: reason = "One of the following must be set to true: run_dv_pepper_analysis(~{run_dv_pepper_analysis}), run_HC_analysis(~{run_HC_analysis})" - } + # Now we handle HaplotypeCaller data: + call HC.CallVariantsWithHaplotypeCaller { + input: + bam = bam, + bai = bai, + sample_id = participant_name, + ref_fasta = ref_map['fasta'], + ref_fasta_fai = ref_map['fai'], + ref_dict = ref_map['dict'], + dbsnp_vcf = ref_map["known_sites_vcf"], + + ploidy = ploidy, + heterozygosity = heterozygosity, + heterozygosity_stdev = heterozygosity_stdev, + indel_heterozygosity = indel_heterozygosity, + + prefix = participant_name + ".haplotype_caller", + + enable_pileup_mode = enable_hc_pileup_mode, + + mito_contig = ref_map['mt_chr_name'], + contigs_names_to_ignore = contigs_names_to_ignore, } - # Handle DeepVariant First: - if (run_dv_pepper_analysis) { - - # Deep Variant runs better with raw base quals because it has already learned the error modes. - # We need to revert our recalibration before calling variants: - call SRUTIL.RevertBaseQualities as RevertBQSRQuals { - input: - bam = bam, - bai = bai, - prefix = basename(bam, ".bam") + ".reverted_base_quals" - } + # Make sure our sample name is correct: + call VARUTIL.RenameSingleSampleVcf as RenameRawHcVcf { + input: + vcf = CallVariantsWithHaplotypeCaller.output_vcf, + vcf_index = CallVariantsWithHaplotypeCaller.output_vcf_index, + prefix = participant_name + ".haplotype_caller.renamed", + new_sample_name = participant_name + } + call VARUTIL.RenameSingleSampleVcf as RenameRawHcGvcf { + input: + vcf = CallVariantsWithHaplotypeCaller.output_gvcf, + vcf_index = CallVariantsWithHaplotypeCaller.output_gvcf_index, + prefix = participant_name + ".haplotype_caller.renamed", + is_gvcf = true, + new_sample_name = participant_name + } - call VAR.CallVariants as CallVariantsWithDeepVariant { - input: - bam = RevertBQSRQuals.bam_out, - bai = RevertBQSRQuals.bai_out, - sample_id = participant_name, - ref_fasta = ref_map['fasta'], - ref_fasta_fai = ref_map['fai'], - ref_dict = ref_map['dict'], + ######################################################################## + # Call VETS: + call VARUTIL.ExtractVariantAnnotations as ExtractIndelVariantAnnotations { + input: + vcf = RenameRawHcVcf.new_sample_name_vcf, + vcf_index = RenameRawHcVcf.new_sample_name_vcf_index, - prefix = participant_name + ".deep_variant", + prefix = participant_name, + mode = "INDEL", - call_small_variants = call_small_variants, + recalibration_annotation_values = indel_recalibration_annotation_values, - run_dv_pepper_analysis = run_dv_pepper_analysis, - dvp_threads = dvp_threads, - dvp_memory = dvp_memory, + known_reference_variants = indel_known_reference_variants, + known_reference_variants_index = indel_known_reference_variants_index, + known_reference_variants_identifier = indel_known_reference_variants_identifier, + is_training = indel_is_training, + is_calibration = indel_is_calibration, - mito_contig = ref_map['mt_chr_name'], - contigs_names_to_ignore = contigs_names_to_ignore, - } + max_unlabeled_variants = indel_max_unlabeled_variants, } - # Now we handle HaplotypeCaller data: - if (run_HC_analysis) { - call HC.CallVariantsWithHaplotypeCaller { - input: - bam = bam, - bai = bai, - sample_id = participant_name, - ref_fasta = ref_map['fasta'], - ref_fasta_fai = ref_map['fai'], - ref_dict = ref_map['dict'], - dbsnp_vcf = ref_map["known_sites_vcf"], + call VARUTIL.ExtractVariantAnnotations as ExtractSnpVariantAnnotations { + input: + vcf = RenameRawHcVcf.new_sample_name_vcf, + vcf_index = RenameRawHcVcf.new_sample_name_vcf_index, - ploidy = ploidy, - heterozygosity = heterozygosity, - heterozygosity_stdev = heterozygosity_stdev, - indel_heterozygosity = indel_heterozygosity, + prefix = participant_name, + mode = "SNP", - prefix = participant_name + ".haplotype_caller", + recalibration_annotation_values = snp_recalibration_annotation_values, - enable_pileup_mode = enable_hc_pileup_mode, + known_reference_variants = snp_known_reference_variants, + known_reference_variants_index = snp_known_reference_variants_index, + known_reference_variants_identifier = snp_known_reference_variants_identifier, + is_training = snp_is_training, + is_calibration = snp_is_calibration, - mito_contig = ref_map['mt_chr_name'], - contigs_names_to_ignore = contigs_names_to_ignore, - } + max_unlabeled_variants = snp_max_unlabeled_variants, + } - # Make sure our sample name is correct: - call VARUTIL.RenameSingleSampleVcf as RenameRawHcVcf { - input: - vcf = CallVariantsWithHaplotypeCaller.output_vcf, - vcf_index = CallVariantsWithHaplotypeCaller.output_vcf_index, - prefix = participant_name + ".haplotype_caller.renamed", - new_sample_name = participant_name - } - call VARUTIL.RenameSingleSampleVcf as RenameRawHcGvcf { - input: - vcf = CallVariantsWithHaplotypeCaller.output_gvcf, - vcf_index = CallVariantsWithHaplotypeCaller.output_gvcf_index, - prefix = participant_name + ".haplotype_caller.renamed", - is_gvcf = true, - new_sample_name = participant_name - } + call VARUTIL.TrainVariantAnnotationsModel as TrainIndelVariantAnnotationsModel { + input: + annotation_hdf5 = ExtractIndelVariantAnnotations.annotation_hdf5, + mode = "INDEL", + prefix = participant_name, + } - ######################################################################## - # Call VETS / VQSR-lite: - call VARUTIL.ExtractVariantAnnotations as ExtractIndelVariantAnnotations { - input: - vcf = RenameRawHcVcf.new_sample_name_vcf, - vcf_index = RenameRawHcVcf.new_sample_name_vcf_index, + call VARUTIL.TrainVariantAnnotationsModel as TrainSnpVariantAnnotationsModel { + input: + annotation_hdf5 = ExtractSnpVariantAnnotations.annotation_hdf5, + mode = "SNP", + prefix = participant_name, + } - prefix = participant_name, - mode = "INDEL", + call VARUTIL.ScoreVariantAnnotations as ScoreSnpVariantAnnotations { + input: + vcf = RenameRawHcVcf.new_sample_name_vcf, + vcf_index = RenameRawHcVcf.new_sample_name_vcf_index, - recalibration_annotation_values = indel_recalibration_annotation_values, + sites_only_extracted_vcf = ExtractSnpVariantAnnotations.sites_only_vcf, + sites_only_extracted_vcf_index = ExtractSnpVariantAnnotations.sites_only_vcf_index, - known_reference_variants = indel_known_reference_variants, - known_reference_variants_index = indel_known_reference_variants_index, - known_reference_variants_identifier = indel_known_reference_variants_identifier, - is_training = indel_is_training, - is_calibration = indel_is_calibration, + model_prefix = participant_name + "_train_SNP", + model_files = flatten([[TrainSnpVariantAnnotationsModel.training_scores, TrainSnpVariantAnnotationsModel.positive_model_scorer_pickle], select_all([ + TrainSnpVariantAnnotationsModel.unlabeled_positive_model_scores, + TrainSnpVariantAnnotationsModel.calibration_set_scores, + TrainSnpVariantAnnotationsModel.negative_model_scorer_pickle + ])]), + prefix = participant_name + "_SNP", + mode = "SNP", - max_unlabeled_variants = indel_max_unlabeled_variants, - } + calibration_sensitivity_threshold = snp_calibration_sensitivity, - call VARUTIL.ExtractVariantAnnotations as ExtractSnpVariantAnnotations { - input: - vcf = RenameRawHcVcf.new_sample_name_vcf, - vcf_index = RenameRawHcVcf.new_sample_name_vcf_index, + recalibration_annotation_values = snp_recalibration_annotation_values, - prefix = participant_name, - mode = "SNP", + known_reference_variants = snp_known_reference_variants, + known_reference_variants_index = snp_known_reference_variants_index, + known_reference_variants_identifier = snp_known_reference_variants_identifier, + is_training = snp_is_training, + is_calibration = snp_is_calibration, + } - recalibration_annotation_values = snp_recalibration_annotation_values, + call VARUTIL.ScoreVariantAnnotations as ScoreIndelVariantAnnotations { + input: + vcf = ScoreSnpVariantAnnotations.scored_vcf, + vcf_index = ScoreSnpVariantAnnotations.scored_vcf_index, - known_reference_variants = snp_known_reference_variants, - known_reference_variants_index = snp_known_reference_variants_index, - known_reference_variants_identifier = snp_known_reference_variants_identifier, - is_training = snp_is_training, - is_calibration = snp_is_calibration, + sites_only_extracted_vcf = ExtractIndelVariantAnnotations.sites_only_vcf, + sites_only_extracted_vcf_index = ExtractIndelVariantAnnotations.sites_only_vcf_index, - max_unlabeled_variants = snp_max_unlabeled_variants, - } + model_prefix = participant_name + "_train_INDEL", + model_files = flatten([[TrainIndelVariantAnnotationsModel.training_scores, TrainIndelVariantAnnotationsModel.positive_model_scorer_pickle], select_all([ + TrainIndelVariantAnnotationsModel.unlabeled_positive_model_scores, + TrainIndelVariantAnnotationsModel.calibration_set_scores, + TrainIndelVariantAnnotationsModel.negative_model_scorer_pickle + ])]), + prefix = participant_name + "_ALL", + mode = "INDEL", - call VARUTIL.TrainVariantAnnotationsModel as TrainIndelVariantAnnotationsModel { - input: - annotation_hdf5 = ExtractIndelVariantAnnotations.annotation_hdf5, - mode = "INDEL", - prefix = participant_name, - } + calibration_sensitivity_threshold = indel_calibration_sensitivity, - call VARUTIL.TrainVariantAnnotationsModel as TrainSnpVariantAnnotationsModel { - input: - annotation_hdf5 = ExtractSnpVariantAnnotations.annotation_hdf5, - mode = "SNP", - prefix = participant_name, - } + recalibration_annotation_values = indel_recalibration_annotation_values, - call VARUTIL.ScoreVariantAnnotations as ScoreSnpVariantAnnotations { - input: - vcf = RenameRawHcVcf.new_sample_name_vcf, - vcf_index = RenameRawHcVcf.new_sample_name_vcf_index, - - sites_only_extracted_vcf = ExtractSnpVariantAnnotations.sites_only_vcf, - sites_only_extracted_vcf_index = ExtractSnpVariantAnnotations.sites_only_vcf_index, - - model_prefix = participant_name + "_train_SNP", - model_files = flatten([[TrainSnpVariantAnnotationsModel.training_scores, TrainSnpVariantAnnotationsModel.positive_model_scorer_pickle], select_all([ - TrainSnpVariantAnnotationsModel.unlabeled_positive_model_scores, - TrainSnpVariantAnnotationsModel.calibration_set_scores, - TrainSnpVariantAnnotationsModel.negative_model_scorer_pickle - ])]), - prefix = participant_name + "_SNP", - mode = "SNP", - - calibration_sensitivity_threshold = snp_calibration_sensitivity, - - recalibration_annotation_values = snp_recalibration_annotation_values, - - known_reference_variants = snp_known_reference_variants, - known_reference_variants_index = snp_known_reference_variants_index, - known_reference_variants_identifier = snp_known_reference_variants_identifier, - is_training = snp_is_training, - is_calibration = snp_is_calibration, - } + known_reference_variants = indel_known_reference_variants, + known_reference_variants_index = indel_known_reference_variants_index, + known_reference_variants_identifier = indel_known_reference_variants_identifier, + is_training = indel_is_training, + is_calibration = indel_is_calibration, + } + ######################################################################## - call VARUTIL.ScoreVariantAnnotations as ScoreIndelVariantAnnotations { + if (defined(fingerprint_haploytpe_db_file)) { + call VARUTIL.ExtractFingerprintAndBarcode as FingerprintAndBarcodeVcf { input: - vcf = ScoreSnpVariantAnnotations.scored_vcf, - vcf_index = ScoreSnpVariantAnnotations.scored_vcf_index, - - sites_only_extracted_vcf = ExtractIndelVariantAnnotations.sites_only_vcf, - sites_only_extracted_vcf_index = ExtractIndelVariantAnnotations.sites_only_vcf_index, - - model_prefix = participant_name + "_train_INDEL", - model_files = flatten([[TrainIndelVariantAnnotationsModel.training_scores, TrainIndelVariantAnnotationsModel.positive_model_scorer_pickle], select_all([ - TrainIndelVariantAnnotationsModel.unlabeled_positive_model_scores, - TrainIndelVariantAnnotationsModel.calibration_set_scores, - TrainIndelVariantAnnotationsModel.negative_model_scorer_pickle - ])]), - prefix = participant_name + "_ALL", - mode = "INDEL", - - calibration_sensitivity_threshold = indel_calibration_sensitivity, - - recalibration_annotation_values = indel_recalibration_annotation_values, - - known_reference_variants = indel_known_reference_variants, - known_reference_variants_index = indel_known_reference_variants_index, - known_reference_variants_identifier = indel_known_reference_variants_identifier, - is_training = indel_is_training, - is_calibration = indel_is_calibration, - } - ######################################################################## - - if (defined(fingerprint_haploytpe_db_file)) { - call VARUTIL.ExtractFingerprintAndBarcode as FingerprintAndBarcodeVcf { - input: - vcf = ScoreIndelVariantAnnotations.scored_vcf, - vcf_index = ScoreIndelVariantAnnotations.scored_vcf_index, - haplotype_database_file = select_first([fingerprint_haploytpe_db_file]), - ref_fasta = ref_map['fasta'], - ref_fasta_fai = ref_map['fai'], - ref_dict = ref_map['dict'], - prefix = participant_name - } + vcf = ScoreIndelVariantAnnotations.scored_vcf, + vcf_index = ScoreIndelVariantAnnotations.scored_vcf_index, + haplotype_database_file = select_first([fingerprint_haploytpe_db_file]), + ref_fasta = ref_map['fasta'], + ref_fasta_fai = ref_map['fai'], + ref_dict = ref_map['dict'], + prefix = participant_name } + } - if (defined(gcs_out_root_dir)) { - - String concrete_gcs_out_root_dir = select_first([gcs_out_root_dir]) + if (defined(gcs_out_root_dir)) { - String outdir = sub(concrete_gcs_out_root_dir, "/$", "") + "/SRWholeGenome/~{participant_name}" + String concrete_gcs_out_root_dir = select_first([gcs_out_root_dir]) - String bam_dir = outdir + "/alignments" - String metrics_dir = outdir + "/metrics" - String smalldir = outdir + "/variants/small" - String recalibration_dir = outdir + "/variants/recalibration_files" + String outdir = sub(concrete_gcs_out_root_dir, "/$", "") + "/SRWholeGenome/~{participant_name}" - # Create a Keyfile for finalization: - File keyfile = select_first([FingerprintAndBarcodeVcf.barcode_file, ScoreIndelVariantAnnotations.scored_vcf_index]) + String bam_dir = outdir + "/alignments" + String metrics_dir = outdir + "/metrics" + String smalldir = outdir + "/variants/small" + String recalibration_dir = outdir + "/variants/recalibration_files" - if (length(aligned_bams) > 1) { - call FF.FinalizeToFile as FinalizeBam { input: outdir = bam_dir, file = bam, name = "~{participant_name}.bam", keyfile=keyfile } - call FF.FinalizeToFile as FinalizeBai { input: outdir = bam_dir, file = bai, name = "~{participant_name}.bam.bai", keyfile=keyfile } + # Create a Keyfile for finalization: + File keyfile = select_first([FingerprintAndBarcodeVcf.barcode_file, ScoreIndelVariantAnnotations.scored_vcf_index]) - call FF.FinalizeToFile as FinalizeFastQCReport { - input: - outdir = metrics_dir, - file = select_first([ FastQC.report ]) - } + if (length(aligned_bams) > 1) { + call FF.FinalizeToFile as FinalizeBam { input: outdir = bam_dir, file = bam, name = "~{participant_name}.bam", keyfile=keyfile } + call FF.FinalizeToFile as FinalizeBai { input: outdir = bam_dir, file = bai, name = "~{participant_name}.bam.bai", keyfile=keyfile } - if (defined(bed_to_compute_coverage)) { - call FF.FinalizeToFile as FinalizeRegionalCoverage { input: outdir = bam_dir, file = select_first([RegionalCoverage.cov_summary]) } - } + call FF.FinalizeToFile as FinalizeFastQCReport { + input: + outdir = metrics_dir, + file = select_first([ FastQC.report ]) } - if (run_dv_pepper_analysis) { - call FF.FinalizeToFile as FinalizeDVPepperVcf { input: outdir = smalldir, file = select_first([CallVariantsWithDeepVariant.dvp_vcf]) } - call FF.FinalizeToFile as FinalizeDVPepperTbi { input: outdir = smalldir, file = select_first([CallVariantsWithDeepVariant.dvp_tbi]) } - call FF.FinalizeToFile as FinalizeDVPepperGVcf { input: outdir = smalldir, file = select_first([CallVariantsWithDeepVariant.dvp_g_vcf]) } - call FF.FinalizeToFile as FinalizeDVPepperGTbi { input: outdir = smalldir, file = select_first([CallVariantsWithDeepVariant.dvp_g_tbi]) } + if (defined(bed_to_compute_coverage)) { + call FF.FinalizeToFile as FinalizeRegionalCoverage { input: outdir = bam_dir, file = select_first([RegionalCoverage.cov_summary]) } } + } - # Finalize the raw Joint Calls: - call FF.FinalizeToFile as FinalizeHCVcf { input: outdir = smalldir, keyfile = keyfile, file = RenameRawHcVcf.new_sample_name_vcf } - call FF.FinalizeToFile as FinalizeHCTbi { input: outdir = smalldir, keyfile = keyfile, file = RenameRawHcVcf.new_sample_name_vcf_index } - call FF.FinalizeToFile as FinalizeHCGVcf { input: outdir = smalldir, keyfile = keyfile, file = RenameRawHcGvcf.new_sample_name_vcf } - call FF.FinalizeToFile as FinalizeHCGTbi { input: outdir = smalldir, keyfile = keyfile, file = RenameRawHcGvcf.new_sample_name_vcf_index } - call FF.FinalizeToFile as FinalizeHCBamOut { input: outdir = smalldir, keyfile = keyfile, file = CallVariantsWithHaplotypeCaller.bamout } - call FF.FinalizeToFile as FinalizeHCBaiOut { input: outdir = smalldir, keyfile = keyfile, file = CallVariantsWithHaplotypeCaller.bamout_index } - - # Finalize the reclibrated / filtered variants: - call FF.FinalizeToFile as FinalizeHCRescoredVcf { input: outdir = smalldir, keyfile = keyfile, file = ScoreIndelVariantAnnotations.scored_vcf } - call FF.FinalizeToFile as FinalizeHCRescoredTbi { input: outdir = smalldir, keyfile = keyfile, file = ScoreIndelVariantAnnotations.scored_vcf_index } - - # Finalize other outputs: - if (defined(fingerprint_haploytpe_db_file)) { - call FF.FinalizeToFile as FinalizeFingerprintVcf { input: outdir = smalldir, keyfile = keyfile, file = select_first([FingerprintAndBarcodeVcf.output_vcf]) } - } + # Finalize the raw Joint Calls: + call FF.FinalizeToFile as FinalizeHCGVcf { input: outdir = smalldir, keyfile = keyfile, file = RenameRawHcGvcf.new_sample_name_vcf } + call FF.FinalizeToFile as FinalizeHCGTbi { input: outdir = smalldir, keyfile = keyfile, file = RenameRawHcGvcf.new_sample_name_vcf_index } + call FF.FinalizeToFile as FinalizeHCBamOut { input: outdir = smalldir, keyfile = keyfile, file = CallVariantsWithHaplotypeCaller.bamout } + call FF.FinalizeToFile as FinalizeHCBaiOut { input: outdir = smalldir, keyfile = keyfile, file = CallVariantsWithHaplotypeCaller.bamout_index } - ################################ - # Finalize the VETS files: - ############ + # Finalize the reclibrated / filtered variants: + call FF.FinalizeToFile as FinalizeHCRescoredVcf { input: outdir = smalldir, keyfile = keyfile, file = ScoreIndelVariantAnnotations.scored_vcf } + call FF.FinalizeToFile as FinalizeHCRescoredTbi { input: outdir = smalldir, keyfile = keyfile, file = ScoreIndelVariantAnnotations.scored_vcf_index } - # ExtractVariantAnnotations: - call FF.FinalizeToFile as FinalizeSnpExtractedAnnotations { input: outdir = recalibration_dir, keyfile = keyfile, file = ExtractSnpVariantAnnotations.annotation_hdf5 } - call FF.FinalizeToFile as FinalizeSnpExtractedSitesOnlyVcf { input: outdir = recalibration_dir, keyfile = keyfile, file = ExtractSnpVariantAnnotations.sites_only_vcf } - call FF.FinalizeToFile as FinalizeSnpExtractedSitesOnlyVcfIndex { input: outdir = recalibration_dir, keyfile = keyfile, file = ExtractSnpVariantAnnotations.sites_only_vcf_index } - if (defined(ExtractSnpVariantAnnotations.unlabeled_annotation_hdf5)) { - call FF.FinalizeToFile as FinalizeSnpExtractedUnlabeledAnnotations { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([ExtractSnpVariantAnnotations.unlabeled_annotation_hdf5]) } - } - call FF.FinalizeToFile as FinalizeIndelExtractedAnnotations { input: outdir = recalibration_dir, keyfile = keyfile, file = ExtractIndelVariantAnnotations.annotation_hdf5 } - call FF.FinalizeToFile as FinalizeIndelExtractedSitesOnlyVcf { input: outdir = recalibration_dir, keyfile = keyfile, file = ExtractIndelVariantAnnotations.sites_only_vcf } - call FF.FinalizeToFile as FinalizeIndelExtractedSitesOnlyVcfIndex { input: outdir = recalibration_dir, keyfile = keyfile, file = ExtractIndelVariantAnnotations.sites_only_vcf_index } - if (defined(ExtractIndelVariantAnnotations.unlabeled_annotation_hdf5)) { - call FF.FinalizeToFile as FinalizeIndelExtractedUnlabeledAnnotations { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([ExtractIndelVariantAnnotations.unlabeled_annotation_hdf5]) } - } + # Finalize other outputs: + if (defined(fingerprint_haploytpe_db_file)) { + call FF.FinalizeToFile as FinalizeFingerprintVcf { input: outdir = smalldir, keyfile = keyfile, file = select_first([FingerprintAndBarcodeVcf.output_vcf]) } + } - # TrainVariantAnnotationsModel - call FF.FinalizeToFile as FinalizeSnpTrainVariantAnnotationsTrainingScores { input: outdir = recalibration_dir, keyfile = keyfile, file = TrainSnpVariantAnnotationsModel.training_scores } - call FF.FinalizeToFile as FinalizeSnpTrainVariantAnnotationsPositiveModelScorer { input: outdir = recalibration_dir, keyfile = keyfile, file = TrainSnpVariantAnnotationsModel.positive_model_scorer_pickle } - if (defined(TrainSnpVariantAnnotationsModel.unlabeled_positive_model_scores)) { - call FF.FinalizeToFile as FinalizeSnpTrainVariantAnnotationsUnlabeledPositiveModelScores { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([TrainSnpVariantAnnotationsModel.unlabeled_positive_model_scores]) } - } - if (defined(TrainSnpVariantAnnotationsModel.calibration_set_scores)) { - call FF.FinalizeToFile as FinalizeSnpTrainVariantAnnotationsCalibrationSetScores { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([TrainSnpVariantAnnotationsModel.calibration_set_scores]) } - } - if (defined(TrainSnpVariantAnnotationsModel.negative_model_scorer_pickle)) { - call FF.FinalizeToFile as FinalizeSnpTrainVariantAnnotationsNegativeModelScorer { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([TrainSnpVariantAnnotationsModel.negative_model_scorer_pickle]) } - } + ################################ + # Finalize the VETS files: + ############ - call FF.FinalizeToFile as FinalizeIndelTrainVariantAnnotationsTrainingScores { input: outdir = recalibration_dir, keyfile = keyfile, file = TrainIndelVariantAnnotationsModel.training_scores } - call FF.FinalizeToFile as FinalizeIndelTrainVariantAnnotationsPositiveModelScorer { input: outdir = recalibration_dir, keyfile = keyfile, file = TrainIndelVariantAnnotationsModel.positive_model_scorer_pickle } - if (defined(TrainIndelVariantAnnotationsModel.unlabeled_positive_model_scores)) { - call FF.FinalizeToFile as FinalizeIndelTrainVariantAnnotationsUnlabeledPositiveModelScores { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([TrainIndelVariantAnnotationsModel.unlabeled_positive_model_scores]) } - } - if (defined(TrainIndelVariantAnnotationsModel.calibration_set_scores)) { - call FF.FinalizeToFile as FinalizeIndelTrainVariantAnnotationsCalibrationSetScores { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([TrainIndelVariantAnnotationsModel.calibration_set_scores]) } - } - if (defined(TrainIndelVariantAnnotationsModel.negative_model_scorer_pickle)) { - call FF.FinalizeToFile as FinalizeIndelTrainVariantAnnotationsNegativeModelScorer { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([TrainIndelVariantAnnotationsModel.negative_model_scorer_pickle]) } - } + # ExtractVariantAnnotations: + call FF.FinalizeToFile as FinalizeSnpExtractedAnnotations { input: outdir = recalibration_dir, keyfile = keyfile, file = ExtractSnpVariantAnnotations.annotation_hdf5 } + call FF.FinalizeToFile as FinalizeSnpExtractedSitesOnlyVcf { input: outdir = recalibration_dir, keyfile = keyfile, file = ExtractSnpVariantAnnotations.sites_only_vcf } + call FF.FinalizeToFile as FinalizeSnpExtractedSitesOnlyVcfIndex { input: outdir = recalibration_dir, keyfile = keyfile, file = ExtractSnpVariantAnnotations.sites_only_vcf_index } + if (defined(ExtractSnpVariantAnnotations.unlabeled_annotation_hdf5)) { + call FF.FinalizeToFile as FinalizeSnpExtractedUnlabeledAnnotations { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([ExtractSnpVariantAnnotations.unlabeled_annotation_hdf5]) } + } + call FF.FinalizeToFile as FinalizeIndelExtractedAnnotations { input: outdir = recalibration_dir, keyfile = keyfile, file = ExtractIndelVariantAnnotations.annotation_hdf5 } + call FF.FinalizeToFile as FinalizeIndelExtractedSitesOnlyVcf { input: outdir = recalibration_dir, keyfile = keyfile, file = ExtractIndelVariantAnnotations.sites_only_vcf } + call FF.FinalizeToFile as FinalizeIndelExtractedSitesOnlyVcfIndex { input: outdir = recalibration_dir, keyfile = keyfile, file = ExtractIndelVariantAnnotations.sites_only_vcf_index } + if (defined(ExtractIndelVariantAnnotations.unlabeled_annotation_hdf5)) { + call FF.FinalizeToFile as FinalizeIndelExtractedUnlabeledAnnotations { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([ExtractIndelVariantAnnotations.unlabeled_annotation_hdf5]) } + } - # ScoreVariantAnnotations - call FF.FinalizeToFile as FinalizeScoreSnpVariantAnnotationsScoredVcf { input: outdir = recalibration_dir, keyfile = keyfile, file = ScoreSnpVariantAnnotations.scored_vcf } - call FF.FinalizeToFile as FinalizeScoreSnpVariantAnnotationsScoredVcfIndex { input: outdir = recalibration_dir, keyfile = keyfile, file = ScoreSnpVariantAnnotations.scored_vcf_index } - if (defined(ScoreSnpVariantAnnotations.annotations_hdf5)) { - call FF.FinalizeToFile as FinalizeScoreSnpVariantAnnotationsAnnotationsHdf5 { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([ScoreSnpVariantAnnotations.annotations_hdf5]) } - } - if (defined(ScoreSnpVariantAnnotations.scores_hdf5)) { - call FF.FinalizeToFile as FinalizeScoreSnpVariantAnnotationsScoresHdf5 { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([ScoreSnpVariantAnnotations.scores_hdf5]) } - } + # TrainVariantAnnotationsModel + call FF.FinalizeToFile as FinalizeSnpTrainVariantAnnotationsTrainingScores { input: outdir = recalibration_dir, keyfile = keyfile, file = TrainSnpVariantAnnotationsModel.training_scores } + call FF.FinalizeToFile as FinalizeSnpTrainVariantAnnotationsPositiveModelScorer { input: outdir = recalibration_dir, keyfile = keyfile, file = TrainSnpVariantAnnotationsModel.positive_model_scorer_pickle } + if (defined(TrainSnpVariantAnnotationsModel.unlabeled_positive_model_scores)) { + call FF.FinalizeToFile as FinalizeSnpTrainVariantAnnotationsUnlabeledPositiveModelScores { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([TrainSnpVariantAnnotationsModel.unlabeled_positive_model_scores]) } + } + if (defined(TrainSnpVariantAnnotationsModel.calibration_set_scores)) { + call FF.FinalizeToFile as FinalizeSnpTrainVariantAnnotationsCalibrationSetScores { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([TrainSnpVariantAnnotationsModel.calibration_set_scores]) } + } + if (defined(TrainSnpVariantAnnotationsModel.negative_model_scorer_pickle)) { + call FF.FinalizeToFile as FinalizeSnpTrainVariantAnnotationsNegativeModelScorer { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([TrainSnpVariantAnnotationsModel.negative_model_scorer_pickle]) } + } - call FF.FinalizeToFile as FinalizeScoreIndelVariantAnnotationsScoredVcf { input: outdir = recalibration_dir, keyfile = keyfile, file = ScoreIndelVariantAnnotations.scored_vcf } - call FF.FinalizeToFile as FinalizeScoreIndelVariantAnnotationsScoredVcfIndex { input: outdir = recalibration_dir, keyfile = keyfile, file = ScoreIndelVariantAnnotations.scored_vcf_index } - if (defined(ScoreIndelVariantAnnotations.annotations_hdf5)) { - call FF.FinalizeToFile as FinalizeScoreIndelVariantAnnotationsAnnotationsHdf5 { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([ScoreIndelVariantAnnotations.annotations_hdf5]) } - } - if (defined(ScoreIndelVariantAnnotations.scores_hdf5)) { - call FF.FinalizeToFile as FinalizeScoreIndelVariantAnnotationsScoresHdf5 { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([ScoreIndelVariantAnnotations.scores_hdf5]) } - } + call FF.FinalizeToFile as FinalizeIndelTrainVariantAnnotationsTrainingScores { input: outdir = recalibration_dir, keyfile = keyfile, file = TrainIndelVariantAnnotationsModel.training_scores } + call FF.FinalizeToFile as FinalizeIndelTrainVariantAnnotationsPositiveModelScorer { input: outdir = recalibration_dir, keyfile = keyfile, file = TrainIndelVariantAnnotationsModel.positive_model_scorer_pickle } + if (defined(TrainIndelVariantAnnotationsModel.unlabeled_positive_model_scores)) { + call FF.FinalizeToFile as FinalizeIndelTrainVariantAnnotationsUnlabeledPositiveModelScores { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([TrainIndelVariantAnnotationsModel.unlabeled_positive_model_scores]) } + } + if (defined(TrainIndelVariantAnnotationsModel.calibration_set_scores)) { + call FF.FinalizeToFile as FinalizeIndelTrainVariantAnnotationsCalibrationSetScores { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([TrainIndelVariantAnnotationsModel.calibration_set_scores]) } + } + if (defined(TrainIndelVariantAnnotationsModel.negative_model_scorer_pickle)) { + call FF.FinalizeToFile as FinalizeIndelTrainVariantAnnotationsNegativeModelScorer { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([TrainIndelVariantAnnotationsModel.negative_model_scorer_pickle]) } + } + + # ScoreVariantAnnotations + call FF.FinalizeToFile as FinalizeScoreSnpVariantAnnotationsScoredVcf { input: outdir = recalibration_dir, keyfile = keyfile, file = ScoreSnpVariantAnnotations.scored_vcf } + call FF.FinalizeToFile as FinalizeScoreSnpVariantAnnotationsScoredVcfIndex { input: outdir = recalibration_dir, keyfile = keyfile, file = ScoreSnpVariantAnnotations.scored_vcf_index } + if (defined(ScoreSnpVariantAnnotations.annotations_hdf5)) { + call FF.FinalizeToFile as FinalizeScoreSnpVariantAnnotationsAnnotationsHdf5 { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([ScoreSnpVariantAnnotations.annotations_hdf5]) } + } + if (defined(ScoreSnpVariantAnnotations.scores_hdf5)) { + call FF.FinalizeToFile as FinalizeScoreSnpVariantAnnotationsScoresHdf5 { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([ScoreSnpVariantAnnotations.scores_hdf5]) } + } + + call FF.FinalizeToFile as FinalizeScoreIndelVariantAnnotationsScoredVcf { input: outdir = recalibration_dir, keyfile = keyfile, file = ScoreIndelVariantAnnotations.scored_vcf } + call FF.FinalizeToFile as FinalizeScoreIndelVariantAnnotationsScoredVcfIndex { input: outdir = recalibration_dir, keyfile = keyfile, file = ScoreIndelVariantAnnotations.scored_vcf_index } + if (defined(ScoreIndelVariantAnnotations.annotations_hdf5)) { + call FF.FinalizeToFile as FinalizeScoreIndelVariantAnnotationsAnnotationsHdf5 { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([ScoreIndelVariantAnnotations.annotations_hdf5]) } + } + if (defined(ScoreIndelVariantAnnotations.scores_hdf5)) { + call FF.FinalizeToFile as FinalizeScoreIndelVariantAnnotationsScoresHdf5 { input: outdir = recalibration_dir, keyfile = keyfile, file = select_first([ScoreIndelVariantAnnotations.scores_hdf5]) } } } # Set up output files here. - # We have to do this because of the options for HaplotypeCaller / DeepVariant / etc. + # We have to do this because of the options for HaplotypeCaller / etc. + + # Only save the input bam files if we had to merge them: if (length(aligned_bams) > 1) { File final_aligned_bam = if defined(gcs_out_root_dir) then select_first([FinalizeBam.gcs_path]) else bam File final_aligned_bai = if defined(gcs_out_root_dir) then select_first([FinalizeBai.gcs_path]) else bai @@ -507,34 +443,24 @@ workflow SRWholeGenome { File final_fingerprint_vcf = if defined(gcs_out_root_dir) then select_first([FinalizeFingerprintVcf.gcs_path]) else select_first([FingerprintAndBarcodeVcf.output_vcf]) } - if (run_dv_pepper_analysis) { - File final_dvp_vcf = if defined(gcs_out_root_dir) then select_first([FinalizeDVPepperVcf.gcs_path]) else select_first([CallVariantsWithDeepVariant.dvp_vcf]) - File final_dvp_tbi = if defined(gcs_out_root_dir) then select_first([FinalizeDVPepperTbi.gcs_path]) else select_first([CallVariantsWithDeepVariant.dvp_tbi]) - File final_dvp_g_vcf = if defined(gcs_out_root_dir) then select_first([FinalizeDVPepperGVcf.gcs_path]) else select_first([CallVariantsWithDeepVariant.dvp_g_vcf]) - File final_dvp_g_tbi = if defined(gcs_out_root_dir) then select_first([FinalizeDVPepperGTbi.gcs_path]) else select_first([CallVariantsWithDeepVariant.dvp_g_tbi]) - } - output { + # Output files for multi-bam inputs: File? aligned_bam = final_aligned_bam File? aligned_bai = final_aligned_bai - + Float? average_identity = tmp_average_identity Float? aligned_num_reads = tmp_aligned_num_reads Float? aligned_num_bases = tmp_aligned_num_bases Float? aligned_frac_bases = tmp_aligned_frac_bases Float? aligned_est_fold_cov = tmp_aligned_est_fold_cov - Float? aligned_read_length_mean = tmp_aligned_read_length_mean - Float? insert_size_average = tmp_insert_size_average Float? insert_size_standard_deviation = tmp_insert_size_standard_deviation Float? pct_properly_paired_reads = tmp_pct_properly_paired_reads - Float? average_identity = tmp_average_identity - File? fastqc_report = final_fastqc_report - File? bed_cov_summary = final_regional_coverage_summary + # Standard output files: File? fingerprint_vcf = final_fingerprint_vcf String? barcode = FingerprintAndBarcodeVcf.barcode @@ -542,20 +468,11 @@ workflow SRWholeGenome { ######################################## - File? dvp_vcf = final_dvp_vcf - File? dvp_tbi = final_dvp_tbi - File? dvp_g_vcf = final_dvp_g_vcf - File? dvp_g_tbi = final_dvp_g_tbi - - ######################################## - - File? hc_g_vcf = select_first([FinalizeHCGVcf.gcs_path, RenameRawHcGvcf.new_sample_name_vcf]) - File? hc_g_tbi = select_first([FinalizeHCGTbi.gcs_path, RenameRawHcGvcf.new_sample_name_vcf_index]) - File? hc_bamout = select_first([FinalizeHCBamOut.gcs_path, CallVariantsWithHaplotypeCaller.bamout]) - File? hc_baiout = select_first([FinalizeHCBaiOut.gcs_path, CallVariantsWithHaplotypeCaller.bamout_index]) - File? hc_raw_vcf = select_first([FinalizeHCVcf.gcs_path, RenameRawHcVcf.new_sample_name_vcf]) - File? hc_raw_tbi = select_first([FinalizeHCTbi.gcs_path, RenameRawHcVcf.new_sample_name_vcf_index]) - File? hc_rescored_vcf = select_first([FinalizeHCRescoredVcf.gcs_path, ScoreIndelVariantAnnotations.scored_vcf]) - File? hc_rescored_tbi = select_first([FinalizeHCRescoredTbi.gcs_path, ScoreIndelVariantAnnotations.scored_vcf_index]) + File vcf = select_first([FinalizeHCRescoredVcf.gcs_path, ScoreIndelVariantAnnotations.scored_vcf]) + File tbi = select_first([FinalizeHCRescoredTbi.gcs_path, ScoreIndelVariantAnnotations.scored_vcf_index]) + File g_vcf = select_first([FinalizeHCGVcf.gcs_path, RenameRawHcGvcf.new_sample_name_vcf]) + File g_tbi = select_first([FinalizeHCGTbi.gcs_path, RenameRawHcGvcf.new_sample_name_vcf_index]) + File bamout = select_first([FinalizeHCBamOut.gcs_path, CallVariantsWithHaplotypeCaller.bamout]) + File baiout = select_first([FinalizeHCBaiOut.gcs_path, CallVariantsWithHaplotypeCaller.bamout_index]) } } diff --git a/wdl/pipelines/TechAgnostic/Utility/TrainCnnFilters.wdl b/wdl/pipelines/TechAgnostic/Utility/TrainCnnFilters.wdl index 396d9fab3..b854f3d5a 100644 --- a/wdl/pipelines/TechAgnostic/Utility/TrainCnnFilters.wdl +++ b/wdl/pipelines/TechAgnostic/Utility/TrainCnnFilters.wdl @@ -342,7 +342,7 @@ task TrainCnn { # Get the max number of threads to use: np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}') - let max_threads=${np}-1 + max_threads=$((np-1)) if [[ $max_threads -le 0 ]] ; then max_threads=1 fi @@ -366,7 +366,7 @@ task TrainCnn { tar --strip-components 1 -xf $f & # Update the number of active threads: - let num_active_threads=${num_active_threads}+1 + num_active_threads=$((num_active_threads+1)) done < ~{write_lines(tensor_tars)} # Wait for the rest of our background processes to finish: diff --git a/wdl/pipelines/TechAgnostic/VariantCalling/LRCNVs.wdl b/wdl/pipelines/TechAgnostic/VariantCalling/LRCNVs.wdl index 80c6e93f8..be9e04de0 100644 --- a/wdl/pipelines/TechAgnostic/VariantCalling/LRCNVs.wdl +++ b/wdl/pipelines/TechAgnostic/VariantCalling/LRCNVs.wdl @@ -578,7 +578,7 @@ task GermlineCNVCallerCohortMode { while [ $CURRENT_SAMPLE -lt $NUM_SAMPLES ]; do CURRENT_SAMPLE_WITH_LEADING_ZEROS=$(printf "%0${NUM_DIGITS}d" $CURRENT_SAMPLE) tar czf ~{cohort_entity_id}-gcnv-calls-shard-~{scatter_index}-sample-$CURRENT_SAMPLE_WITH_LEADING_ZEROS.tar.gz -C ~{output_dir_}/~{cohort_entity_id}-calls/SAMPLE_$CURRENT_SAMPLE . - let CURRENT_SAMPLE=CURRENT_SAMPLE+1 + CURRENT_SAMPLE=$((CURRENT_SAMPLE+1)) done rm -rf contig-ploidy-calls diff --git a/wdl/pipelines/TechAgnostic/VariantCalling/SRJointCallGVCFsWithGenomicsDB.wdl b/wdl/pipelines/TechAgnostic/VariantCalling/SRJointCallGVCFsWithGenomicsDB.wdl index 9449eeefd..fedc16d6b 100644 --- a/wdl/pipelines/TechAgnostic/VariantCalling/SRJointCallGVCFsWithGenomicsDB.wdl +++ b/wdl/pipelines/TechAgnostic/VariantCalling/SRJointCallGVCFsWithGenomicsDB.wdl @@ -338,14 +338,6 @@ workflow SRJointCallGVCFsWithGenomicsDB { File vcf_index_for_merging = select_first([FunctionallyAnnotate.annotated_vcf_index, recalibrated_vcf_index]) } - # Consolidate files: - call VARUTIL.GatherVcfs as GatherRawVcfs { - input: - input_vcfs = joint_vcf, - input_vcf_indices = joint_vcf_index, - prefix = prefix + ".raw.combined" - } - # Consolidate files: call VARUTIL.GatherVcfs as GatherRescoredVcfs { input: @@ -391,9 +383,6 @@ workflow SRJointCallGVCFsWithGenomicsDB { call FF.FinalizeToDir as FinalizeGenomicsDB { input: outdir = outdir + "/GenomicsDB", keyfile = keyfile, files = ImportGVCFsIntoGenomicsDB.output_genomicsdb } - call FF.FinalizeToFile as FinalizeRawVCF { input: outdir = outdir, keyfile = keyfile, file = GatherRawVcfs.output_vcf } - call FF.FinalizeToFile as FinalizeRawTBI { input: outdir = outdir, keyfile = keyfile, file = GatherRawVcfs.output_vcf_index } - call FF.FinalizeToFile as FinalizeVETSVCF { input: outdir = outdir, keyfile = keyfile, file = GatherRescoredVcfs.output_vcf } call FF.FinalizeToFile as FinalizeVETSTBI { input: outdir = outdir, keyfile = keyfile, file = GatherRescoredVcfs.output_vcf_index } @@ -495,18 +484,12 @@ workflow SRJointCallGVCFsWithGenomicsDB { output { Array[String] genomicsDB = select_first([final_genomicsdb_location, ImportGVCFsIntoGenomicsDB.output_genomicsdb]) - File raw_joint_vcf = select_first([FinalizeRawVCF.gcs_path, GatherRawVcfs.output_vcf]) - File raw_joint_vcf_tbi = select_first([FinalizeRawTBI.gcs_path, GatherRawVcfs.output_vcf_index]) - - File joint_recalibrated_vcf = select_first([FinalizeVETSVCF.gcs_path, GatherRescoredVcfs.output_vcf]) - File joint_recalibrated_vcf_tbi = select_first([FinalizeVETSTBI.gcs_path, GatherRescoredVcfs.output_vcf_index]) + File joint_vcf = select_first([FinalizeVETSVCF.gcs_path, GatherRescoredVcfs.output_vcf]) + File joint__vcf_tbi = select_first([FinalizeVETSTBI.gcs_path, GatherRescoredVcfs.output_vcf_index]) File joint_mt = select_first([FinalizeHailMatrixTable.gcs_path, CreateHailMatrixTable.mt_tar]) File joint_zarr = select_first([FinalizeZarr.gcs_path, ConvertToZarr.zarr]) - File? annotated_joint_vcf = annotated_vcf - File? annotated_joint_vcf_tbi = annotated_vcf_tbi - Array[String]? snpEff_summary = final_snpeff_summary Array[String]? snpEff_genes = final_snpEff_genes } diff --git a/wdl/tasks/QC/AlignedMetrics.wdl b/wdl/tasks/QC/AlignedMetrics.wdl index 5672af9db..19c12636c 100644 --- a/wdl/tasks/QC/AlignedMetrics.wdl +++ b/wdl/tasks/QC/AlignedMetrics.wdl @@ -363,7 +363,7 @@ task SamStats { command <<< set -euxo pipefail - np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}') + np=$(grep ^processor /proc/cpuinfo | tail -n1 | awk '{print $NF+1}') samtools stats -@${np} ~{bam} > ~{basename}.sam_stats.txt >>> @@ -407,7 +407,7 @@ task SamStatsMap { command <<< set -euxo pipefail - np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}') + np=$(grep ^processor /proc/cpuinfo | tail -n1 | awk '{print $NF+1}') samtools stats -@${np} ~{bam} > ~{basename}.sam_stats.txt diff --git a/wdl/tasks/QC/FastQC.wdl b/wdl/tasks/QC/FastQC.wdl index 2a89bdf4d..4a8f77ca3 100644 --- a/wdl/tasks/QC/FastQC.wdl +++ b/wdl/tasks/QC/FastQC.wdl @@ -17,7 +17,7 @@ task FastQC { command <<< set -euxo pipefail - num_core=$(cat /proc/cpuinfo | awk '/^processor/{print $3}' | wc -l) + num_core=$(awk '/^processor/{print $3}' /proc/cpuinfo | wc -l) fastqc -t $num_core --extract ~{bam} diff --git a/wdl/tasks/Utility/SRUtils.wdl b/wdl/tasks/Utility/SRUtils.wdl index 869b9dc92..24652e236 100644 --- a/wdl/tasks/Utility/SRUtils.wdl +++ b/wdl/tasks/Utility/SRUtils.wdl @@ -183,7 +183,7 @@ task BwaMem2 { # Make sure we use all our proocesors: np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}') if [[ ${np} -gt 2 ]] ; then - let np=${np}-1 + np=$((np-1)) fi # Breakdown of the arguments: @@ -258,7 +258,7 @@ task MergeBamAlignment { # Make sure we use all our proocesors: np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}') - let np=${np}-1 + np=$((np-1)) java -Dsamjdk.compression_level=2 -Xms8192m -Xmx30768m -jar /usr/picard/picard.jar \ MergeBamAlignment \ @@ -341,7 +341,7 @@ task MarkDuplicates { command <<< tot_mem_mb=$(free -m | grep '^Mem' | awk '{print $2}') - let java_memory_size_mb=${tot_mem_mb}-5120 + java_memory_size_mb=$((tot_mem_mb-5120)) java -Dsamjdk.compression_level=~{compression_level} -Xms${java_memory_size_mb}m -jar /usr/picard/picard.jar \ MarkDuplicates \ diff --git a/wdl/tasks/Utility/Utils.wdl b/wdl/tasks/Utility/Utils.wdl index 5d2df34c3..22f0b13ca 100644 --- a/wdl/tasks/Utility/Utils.wdl +++ b/wdl/tasks/Utility/Utils.wdl @@ -284,7 +284,7 @@ task DownsampleSam { command <<< # Make sure we use all our proocesors: - np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}') + np=$(grep ^processor /proc/cpuinfo | tail -n1 | awk '{print $NF+1}') gatk DownsampleSam --VALIDATION_STRINGENCY SILENT --RANDOM_SEED ~{random_seed} -I ~{bam} -O ~{prefix}.bam -S ~{strategy} -P ~{probability} ~{extra_args} samtools index -@$np ~{prefix}.bam @@ -1571,7 +1571,7 @@ task GetReadsInBedFileRegions { export GCS_OAUTH_TOKEN=`gcloud auth application-default print-access-token` # Make sure we use all our proocesors: - np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}') + np=$(grep ^processor /proc/cpuinfo | tail -n1 | awk '{print $NF+1}') samtools view -@${np} -b -h -L ~{regions_bed} ~{gcs_bam_path} | samtools sort - > ~{prefix}.bam samtools index -@${np} ~{prefix}.bam diff --git a/wdl/tasks/Utility/VariantUtils.wdl b/wdl/tasks/Utility/VariantUtils.wdl index c1d666644..d0867c880 100644 --- a/wdl/tasks/Utility/VariantUtils.wdl +++ b/wdl/tasks/Utility/VariantUtils.wdl @@ -688,8 +688,8 @@ task HardFilterVcf { # Get amount of memory to use: mem_available=$(free -m | grep '^Mem' | awk '{print $2}') - let mem_start=${mem_available}-1000 - let mem_max=${mem_available}-750 + mem_start=$((mem_available-1000)) + mem_max=$((mem_available-750)) gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \ VariantFiltration \ @@ -744,8 +744,8 @@ task MakeSitesOnlyVcf { # Get amount of memory to use: mem_available=$(free -m | grep '^Mem' | awk '{print $2}') - let mem_start=${mem_available}-1000 - let mem_max=${mem_available}-750 + mem_start=$((mem_available-1000)) + mem_max=$((mem_available-750)) gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \ MakeSitesOnlyVcf \ @@ -801,8 +801,8 @@ task AnnotateVcfWithBedRegions { # Get amount of memory to use: mem_available=$(free -m | grep '^Mem' | awk '{print $2}') - let mem_start=${mem_available}-1000 - let mem_max=${mem_available}-750 + mem_start=$((mem_available-1000)) + mem_max=$((mem_available-750)) # We need to generate argument strings from the input arrays. # First we check that the arrays are the same length: @@ -952,8 +952,8 @@ task IndelsVariantRecalibrator { # Get amount of memory to use: mem_available=$(free -g | grep '^Mem' | awk '{print $2}') - let mem_start=${mem_available}-2 - let mem_max=${mem_available}-1 + mem_start=$((mem_available-2)) + mem_max=$((mem_available-1)) gatk --java-options "-Xms${mem_start}g -Xmx${mem_max}g" \ VariantRecalibrator \ @@ -1078,8 +1078,8 @@ task SNPsVariantRecalibratorCreateModel { # Get amount of memory to use: mem_available=$(free -g | grep '^Mem' | awk '{print $2}') - let mem_start=${mem_available}-2 - let mem_max=${mem_available}-1 + mem_start=$((mem_available-2)) + mem_max=$((mem_available-1)) gatk --java-options "-Xms${mem_start}g -Xmx${mem_max}g" \ VariantRecalibrator \ @@ -1158,8 +1158,8 @@ task ApplyVqsr { # Get amount of memory to use: mem_available=$(free -m | grep '^Mem' | awk '{print $2}') - let mem_start=${mem_available}-2000 - let mem_max=${mem_available}-500 + mem_start=$((mem_available-2000)) + mem_max=$((mem_available-500)) gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \ ApplyVQSR \ @@ -1229,8 +1229,8 @@ task SelectVariants { # Get amount of memory to use: mem_available=$(free -m | grep '^Mem' | awk '{print $2}') - let mem_start=${mem_available}-2000 - let mem_max=${mem_available}-500 + mem_start=$((mem_available-2000)) + mem_max=$((mem_available-500)) gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \ SelectVariants \ @@ -1290,8 +1290,8 @@ task RenameSingleSampleVcf { # Get amount of memory to use: mem_available=$(free -m | grep '^Mem' | awk '{print $2}') - let mem_start=${mem_available}-1000 - let mem_max=${mem_available}-750 + mem_start=$((mem_available-1000)) + mem_max=$((mem_available-750)) gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \ RenameSampleInVcf \ @@ -1693,8 +1693,8 @@ task ExtractVariantAnnotations { # Get amount of memory to use: mem_available=$(free -g | grep '^Mem' | awk '{print $2}') - let mem_start=${mem_available}-2 - let mem_max=${mem_available}-2 + mem_start=$((mem_available-2)) + mem_max=$((mem_available-2)) gatk --java-options "-Xms${mem_start}g -Xmx${mem_max}g -DGATK_STACKTRACE_ON_USER_EXCEPTION=true" \ ExtractVariantAnnotations \ @@ -1771,8 +1771,8 @@ task TrainVariantAnnotationsModel { # Get amount of memory to use: mem_available=$(free -g | grep '^Mem' | awk '{print $2}') - let mem_start=${mem_available}-2 - let mem_max=${mem_available}-2 + mem_start=$((mem_available-2)) + mem_max=$((mem_available-2)) gatk --java-options "-Xms${mem_start}g -Xmx${mem_max}g -DGATK_STACKTRACE_ON_USER_EXCEPTION=true" \ TrainVariantAnnotationsModel \ @@ -1895,10 +1895,10 @@ task ScoreVariantAnnotations { # Get amount of memory to use: mem_available=$(free -g | grep '^Mem' | awk '{print $2}') - let mem_start=${mem_available}-2 - let mem_max=${mem_available}-2 + mem_start=$((mem_available-2)) + mem_max=$((mem_available-2)) - mode_lower=$(echo ~{mode} | tr 'A-Z' 'a-z') + mode_lower=$(echo ~{mode} | tr '[:upper:]' '[:lower:]') # Set up model files: mkdir model_files diff --git a/wdl/tasks/VariantCalling/HaplotypeCaller.wdl b/wdl/tasks/VariantCalling/HaplotypeCaller.wdl index 92fa42897..d557f72a8 100644 --- a/wdl/tasks/VariantCalling/HaplotypeCaller.wdl +++ b/wdl/tasks/VariantCalling/HaplotypeCaller.wdl @@ -191,7 +191,7 @@ task HaplotypeCaller_GATK4_VCF { # which do not rely on the output format of the `free` command. available_memory_mb=$(free -m | awk '/^Mem/ {print $2}') - let java_memory_size_mb=available_memory_mb-1024 + let java_memory_size_mb=$((available_memory_mb-1024)) echo Total available memory: ${available_memory_mb} MB >&2 echo Memory reserved for Java: ${java_memory_size_mb} MB >&2 @@ -278,7 +278,7 @@ task MergeBamouts { # If the number of processors = 1, then `let` will return 1 here: # So we need to turn off `set -e` for this command: set +e - let mthreads=${np}-1 + mthreads=$((np-1)) set -e samtools merge -@${mthreads} ~{prefix}.bam ~{sep=" " bams} diff --git a/wdl/tasks/VariantCalling/SRJointGenotyping.wdl b/wdl/tasks/VariantCalling/SRJointGenotyping.wdl index e30d2ee77..b3f21e7bb 100644 --- a/wdl/tasks/VariantCalling/SRJointGenotyping.wdl +++ b/wdl/tasks/VariantCalling/SRJointGenotyping.wdl @@ -46,7 +46,7 @@ task CreateSampleNameMap { # Create a temporary file to store file sizes in: size_file=$(mktemp) - let i=1 + i=1 while read file_path ; do # Get our sample list from our file: @@ -64,7 +64,7 @@ task CreateSampleNameMap { # Add the file size to the size file: gsutil du -sac ${file_path} | tail -n1 | awk '{print $1}' >> ${size_file} - let i=$i+1 + i=$((i+1)) if [[ $i -gt ~{re_auth_interval} ]] ; then # Periodically we should update the token so we don't have problems with long file lists: export GCS_OAUTH_TOKEN=$(gcloud auth application-default print-access-token) diff --git a/wdl/tasks/Visualization/NanoPlot.wdl b/wdl/tasks/Visualization/NanoPlot.wdl index 4c487baf9..561f67595 100644 --- a/wdl/tasks/Visualization/NanoPlot.wdl +++ b/wdl/tasks/Visualization/NanoPlot.wdl @@ -24,7 +24,7 @@ task NanoPlotFromSummary { command <<< set -euxo pipefail - num_core=$(cat /proc/cpuinfo | awk '/^processor/{print $3}' | wc -l) + num_core=$(awk '/^processor/{print $3}' /proc/cpuinfo | wc -l) NanoPlot -t ${num_core} \ -c orangered \ @@ -120,7 +120,7 @@ task NanoPlotFromRichFastqs { command <<< set -euxo pipefail - num_core=$(cat /proc/cpuinfo | awk '/^processor/{print $3}' | wc -l) + num_core=$(awk '/^processor/{print $3}' /proc/cpuinfo | wc -l) NanoPlot -t ${num_core} \ -c orangered \ @@ -205,7 +205,7 @@ task NanoPlotFromBam { touch ~{bai} # avoid the warning bai is older than bam - num_core=$(cat /proc/cpuinfo | awk '/^processor/{print $3}' | wc -l) + num_core=$(awk '/^processor/{print $3}' /proc/cpuinfo | wc -l) NanoPlot -t ${num_core} \ -c orangered \ diff --git a/wdl/tasks/Z_One_Off_Analyses/Pf_Niare_HaplotypeCaller.wdl b/wdl/tasks/Z_One_Off_Analyses/Pf_Niare_HaplotypeCaller.wdl index 29f7e4dd6..8158dc762 100644 --- a/wdl/tasks/Z_One_Off_Analyses/Pf_Niare_HaplotypeCaller.wdl +++ b/wdl/tasks/Z_One_Off_Analyses/Pf_Niare_HaplotypeCaller.wdl @@ -148,7 +148,7 @@ task HaplotypeCaller_NIARE_GATK4_VCF { # which do not rely on the output format of the `free` command. available_memory_mb=$(free -m | awk '/^Mem/ {print $2}') - let java_memory_size_mb=available_memory_mb-1024 + java_memory_size_mb=$((available_memory_mb-1024)) echo Total available memory: ${available_memory_mb} MB >&2 echo Memory reserved for Java: ${java_memory_size_mb} MB >&2 @@ -256,7 +256,7 @@ task MergeBamouts { # If the number of processors = 1, then `let` will return 1 here: # So we need to turn off `set -e` for this command: set +e - let mthreads=${np}-1 + mthreads=$((np-1)) set -e samtools merge -@${mthreads} ~{prefix}.bam ~{sep=" " bams} @@ -327,7 +327,7 @@ task GenomicsDbImport { # which do not rely on the output format of the `free` command. available_memory_mb=$(free -m | awk '/^Mem/ {print $2}') - let java_memory_size_mb=available_memory_mb-1024 + java_memory_size_mb=$((available_memory_mb-1024)) echo Total available memory: ${available_memory_mb} MB >&2 echo Memory reserved for Java: ${java_memory_size_mb} MB >&2 @@ -429,7 +429,7 @@ task GenotypeGVCFs { fi available_memory_mb=$(free -m | awk '/^Mem/ {print $2}') - let java_memory_size_mb=available_memory_mb-1024 + java_memory_size_mb=$((available_memory_mb-1024)) echo Total available memory: ${available_memory_mb} MB >&2 echo Memory reserved for Java: ${java_memory_size_mb} MB >&2 @@ -579,7 +579,7 @@ task VariantRecalibratorIndel { # which do not rely on the output format of the `free` command. available_memory_mb=$(free -m | awk '/^Mem/ {print $2}') - let java_memory_size_mb=available_memory_mb-1024 + java_memory_size_mb=$((available_memory_mb-1024)) echo Total available memory: ${available_memory_mb} MB >&2 echo Memory reserved for Java: ${java_memory_size_mb} MB >&2 @@ -663,7 +663,7 @@ task VariantRecalibratorSnp { # which do not rely on the output format of the `free` command. available_memory_mb=$(free -m | awk '/^Mem/ {print $2}') - let java_memory_size_mb=available_memory_mb-1024 + java_memory_size_mb=$((available_memory_mb-1024)) echo Total available memory: ${available_memory_mb} MB >&2 echo Memory reserved for Java: ${java_memory_size_mb} MB >&2 @@ -744,7 +744,7 @@ task ApplyVqsrIndel { # which do not rely on the output format of the `free` command. available_memory_mb=$(free -m | awk '/^Mem/ {print $2}') - let java_memory_size_mb=available_memory_mb-1024 + java_memory_size_mb=$((available_memory_mb-1024)) echo Total available memory: ${available_memory_mb} MB >&2 echo Memory reserved for Java: ${java_memory_size_mb} MB >&2 @@ -820,7 +820,7 @@ task ApplyVqsrSnp { # which do not rely on the output format of the `free` command. available_memory_mb=$(free -m | awk '/^Mem/ {print $2}') - let java_memory_size_mb=available_memory_mb-1024 + java_memory_size_mb=$((available_memory_mb-1024)) echo Total available memory: ${available_memory_mb} MB >&2 echo Memory reserved for Java: ${java_memory_size_mb} MB >&2 @@ -899,7 +899,7 @@ task MergeMultiAllelicSitesPostRecalibration { np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}') available_memory_mb=$(free -m | awk '/^Mem/ {print $2}') - let java_memory_size_mb=available_memory_mb-1024 + java_memory_size_mb=$((available_memory_mb-1024)) echo Total available memory: ${available_memory_mb} MB >&2 echo Memory reserved for Java: ${java_memory_size_mb} MB >&2