Cleaned up some outputs and wdl linting warnings.

broadinstitute · May 23, 2024 · 075d82d · 075d82d
1 parent d42cc23
commit 075d82d
Show file tree

Hide file tree

Showing 14 changed files with 289 additions and 389 deletions.
diff --git a/wdl/pipelines/ILMN/Alignment/SRFlowcell.wdl b/wdl/pipelines/ILMN/Alignment/SRFlowcell.wdl
@@ -158,7 +158,7 @@ workflow SRFlowcell {
         }
     }
 
-    File merged_bam = select_first([t_005_AlignReads.bam, t_006_MergeBamAlignment.bam])
+    File merged_bam = select_first([t_006_MergeBamAlignment.bam, t_005_AlignReads.bam])
 
     # Mark Duplicates
     call SRUTIL.MarkDuplicates as t_007_MarkDuplicates {

diff --git a/wdl/pipelines/ILMN/VariantCalling/SRWholeGenome.wdl b/wdl/pipelines/ILMN/VariantCalling/SRWholeGenome.wdl
diff --git a/wdl/pipelines/TechAgnostic/Utility/TrainCnnFilters.wdl b/wdl/pipelines/TechAgnostic/Utility/TrainCnnFilters.wdl
@@ -342,7 +342,7 @@ task TrainCnn {
 
         # Get the max number of threads to use:
         np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
-        let max_threads=${np}-1
+        max_threads=$((np-1))
         if [[ $max_threads -le 0 ]] ; then
             max_threads=1
         fi
@@ -366,7 +366,7 @@ task TrainCnn {
             tar --strip-components 1 -xf $f &
 
             # Update the number of active threads:
-            let num_active_threads=${num_active_threads}+1
+            num_active_threads=$((num_active_threads+1))
         done < ~{write_lines(tensor_tars)}
 
         # Wait for the rest of our background processes to finish:

diff --git a/wdl/pipelines/TechAgnostic/VariantCalling/LRCNVs.wdl b/wdl/pipelines/TechAgnostic/VariantCalling/LRCNVs.wdl
@@ -578,7 +578,7 @@ task GermlineCNVCallerCohortMode {
         while [ $CURRENT_SAMPLE -lt $NUM_SAMPLES ]; do
             CURRENT_SAMPLE_WITH_LEADING_ZEROS=$(printf "%0${NUM_DIGITS}d" $CURRENT_SAMPLE)
             tar czf ~{cohort_entity_id}-gcnv-calls-shard-~{scatter_index}-sample-$CURRENT_SAMPLE_WITH_LEADING_ZEROS.tar.gz -C ~{output_dir_}/~{cohort_entity_id}-calls/SAMPLE_$CURRENT_SAMPLE .
-            let CURRENT_SAMPLE=CURRENT_SAMPLE+1
+            CURRENT_SAMPLE=$((CURRENT_SAMPLE+1))
         done
 
         rm -rf contig-ploidy-calls

diff --git a/wdl/pipelines/TechAgnostic/VariantCalling/SRJointCallGVCFsWithGenomicsDB.wdl b/wdl/pipelines/TechAgnostic/VariantCalling/SRJointCallGVCFsWithGenomicsDB.wdl
@@ -338,14 +338,6 @@ workflow SRJointCallGVCFsWithGenomicsDB {
         File vcf_index_for_merging = select_first([FunctionallyAnnotate.annotated_vcf_index, recalibrated_vcf_index])
     }
 
-    # Consolidate files:
-    call VARUTIL.GatherVcfs as GatherRawVcfs {
-        input:
-            input_vcfs = joint_vcf,
-            input_vcf_indices = joint_vcf_index,
-            prefix = prefix + ".raw.combined"
-    }
-
     # Consolidate files:
     call VARUTIL.GatherVcfs as GatherRescoredVcfs {
         input:
@@ -391,9 +383,6 @@ workflow SRJointCallGVCFsWithGenomicsDB {
 
         call FF.FinalizeToDir as FinalizeGenomicsDB { input: outdir = outdir + "/GenomicsDB", keyfile = keyfile, files = ImportGVCFsIntoGenomicsDB.output_genomicsdb }
 
-        call FF.FinalizeToFile as FinalizeRawVCF { input: outdir = outdir, keyfile = keyfile, file = GatherRawVcfs.output_vcf }
-        call FF.FinalizeToFile as FinalizeRawTBI { input: outdir = outdir, keyfile = keyfile, file = GatherRawVcfs.output_vcf_index }
-
         call FF.FinalizeToFile as FinalizeVETSVCF { input: outdir = outdir, keyfile = keyfile, file = GatherRescoredVcfs.output_vcf }
         call FF.FinalizeToFile as FinalizeVETSTBI { input: outdir = outdir, keyfile = keyfile, file = GatherRescoredVcfs.output_vcf_index }
 
@@ -495,18 +484,12 @@ workflow SRJointCallGVCFsWithGenomicsDB {
     output {
         Array[String] genomicsDB = select_first([final_genomicsdb_location, ImportGVCFsIntoGenomicsDB.output_genomicsdb])
 
-        File raw_joint_vcf     = select_first([FinalizeRawVCF.gcs_path, GatherRawVcfs.output_vcf])
-        File raw_joint_vcf_tbi = select_first([FinalizeRawTBI.gcs_path, GatherRawVcfs.output_vcf_index])
-
-        File joint_recalibrated_vcf     = select_first([FinalizeVETSVCF.gcs_path, GatherRescoredVcfs.output_vcf])
-        File joint_recalibrated_vcf_tbi = select_first([FinalizeVETSTBI.gcs_path, GatherRescoredVcfs.output_vcf_index])
+        File joint_vcf     = select_first([FinalizeVETSVCF.gcs_path, GatherRescoredVcfs.output_vcf])
+        File joint__vcf_tbi = select_first([FinalizeVETSTBI.gcs_path, GatherRescoredVcfs.output_vcf_index])
 
         File joint_mt = select_first([FinalizeHailMatrixTable.gcs_path, CreateHailMatrixTable.mt_tar])
         File joint_zarr = select_first([FinalizeZarr.gcs_path, ConvertToZarr.zarr])
 
-        File? annotated_joint_vcf     = annotated_vcf
-        File? annotated_joint_vcf_tbi = annotated_vcf_tbi
-
         Array[String]? snpEff_summary = final_snpeff_summary
         Array[String]? snpEff_genes = final_snpEff_genes
     }

diff --git a/wdl/tasks/QC/AlignedMetrics.wdl b/wdl/tasks/QC/AlignedMetrics.wdl
@@ -363,7 +363,7 @@ task SamStats {
     command <<<
         set -euxo pipefail
 
-        np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
+        np=$(grep ^processor /proc/cpuinfo | tail -n1 | awk '{print $NF+1}')
 
         samtools stats -@${np} ~{bam} > ~{basename}.sam_stats.txt
     >>>
@@ -407,7 +407,7 @@ task SamStatsMap {
     command <<<
         set -euxo pipefail
 
-        np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
+        np=$(grep ^processor /proc/cpuinfo | tail -n1 | awk '{print $NF+1}')
 
         samtools stats -@${np} ~{bam} > ~{basename}.sam_stats.txt
 

diff --git a/wdl/tasks/QC/FastQC.wdl b/wdl/tasks/QC/FastQC.wdl
@@ -17,7 +17,7 @@ task FastQC {
     command <<<
         set -euxo pipefail
 
-        num_core=$(cat /proc/cpuinfo | awk '/^processor/{print $3}' | wc -l)
+        num_core=$(awk '/^processor/{print $3}' /proc/cpuinfo | wc -l)
 
         fastqc -t $num_core --extract ~{bam}
 

diff --git a/wdl/tasks/Utility/SRUtils.wdl b/wdl/tasks/Utility/SRUtils.wdl
@@ -183,7 +183,7 @@ task BwaMem2 {
         # Make sure we use all our proocesors:
         np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
         if [[ ${np} -gt 2 ]] ; then
-            let np=${np}-1
+            np=$((np-1))
         fi
 
         # Breakdown of the arguments:
@@ -258,7 +258,7 @@ task MergeBamAlignment {
 
         # Make sure we use all our proocesors:
         np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
-        let np=${np}-1
+        np=$((np-1))
 
         java -Dsamjdk.compression_level=2 -Xms8192m -Xmx30768m -jar /usr/picard/picard.jar \
             MergeBamAlignment \
@@ -341,7 +341,7 @@ task MarkDuplicates {
 
     command <<<
         tot_mem_mb=$(free -m | grep '^Mem' | awk '{print $2}')
-        let java_memory_size_mb=${tot_mem_mb}-5120
+        java_memory_size_mb=$((tot_mem_mb-5120))
 
         java -Dsamjdk.compression_level=~{compression_level} -Xms${java_memory_size_mb}m -jar /usr/picard/picard.jar \
             MarkDuplicates \

diff --git a/wdl/tasks/Utility/Utils.wdl b/wdl/tasks/Utility/Utils.wdl
@@ -284,7 +284,7 @@ task DownsampleSam {
     command <<<
 
         # Make sure we use all our proocesors:
-        np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
+        np=$(grep ^processor /proc/cpuinfo | tail -n1 | awk '{print $NF+1}')
 
         gatk DownsampleSam --VALIDATION_STRINGENCY SILENT --RANDOM_SEED ~{random_seed} -I ~{bam} -O ~{prefix}.bam -S ~{strategy} -P ~{probability} ~{extra_args}
         samtools index -@$np ~{prefix}.bam
@@ -1571,7 +1571,7 @@ task GetReadsInBedFileRegions {
         export GCS_OAUTH_TOKEN=`gcloud auth application-default print-access-token`
 
         # Make sure we use all our proocesors:
-        np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
+        np=$(grep ^processor /proc/cpuinfo | tail -n1 | awk '{print $NF+1}')
 
         samtools view -@${np} -b -h -L ~{regions_bed} ~{gcs_bam_path} | samtools sort - > ~{prefix}.bam
         samtools index -@${np} ~{prefix}.bam

diff --git a/wdl/tasks/Utility/VariantUtils.wdl b/wdl/tasks/Utility/VariantUtils.wdl
@@ -688,8 +688,8 @@ task HardFilterVcf {
 
         # Get amount of memory to use:
         mem_available=$(free -m | grep '^Mem' | awk '{print $2}')
-        let mem_start=${mem_available}-1000
-        let mem_max=${mem_available}-750
+        mem_start=$((mem_available-1000))
+        mem_max=$((mem_available-750))
 
         gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \
             VariantFiltration \
@@ -744,8 +744,8 @@ task MakeSitesOnlyVcf {
 
         # Get amount of memory to use:
         mem_available=$(free -m | grep '^Mem' | awk '{print $2}')
-        let mem_start=${mem_available}-1000
-        let mem_max=${mem_available}-750
+        mem_start=$((mem_available-1000))
+        mem_max=$((mem_available-750))
 
         gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \
             MakeSitesOnlyVcf \
@@ -801,8 +801,8 @@ task AnnotateVcfWithBedRegions {
 
         # Get amount of memory to use:
         mem_available=$(free -m | grep '^Mem' | awk '{print $2}')
-        let mem_start=${mem_available}-1000
-        let mem_max=${mem_available}-750
+        mem_start=$((mem_available-1000))
+        mem_max=$((mem_available-750))
 
         # We need to generate argument strings from the input arrays.
         # First we check that the arrays are the same length:
@@ -952,8 +952,8 @@ task IndelsVariantRecalibrator {
 
         # Get amount of memory to use:
         mem_available=$(free -g | grep '^Mem' | awk '{print $2}')
-        let mem_start=${mem_available}-2
-        let mem_max=${mem_available}-1
+        mem_start=$((mem_available-2))
+        mem_max=$((mem_available-1))
 
         gatk --java-options "-Xms${mem_start}g -Xmx${mem_max}g" \
             VariantRecalibrator \
@@ -1078,8 +1078,8 @@ task SNPsVariantRecalibratorCreateModel {
 
         # Get amount of memory to use:
         mem_available=$(free -g | grep '^Mem' | awk '{print $2}')
-        let mem_start=${mem_available}-2
-        let mem_max=${mem_available}-1
+        mem_start=$((mem_available-2))
+        mem_max=$((mem_available-1))
 
         gatk --java-options "-Xms${mem_start}g -Xmx${mem_max}g" \
             VariantRecalibrator \
@@ -1158,8 +1158,8 @@ task ApplyVqsr {
 
         # Get amount of memory to use:
         mem_available=$(free -m | grep '^Mem' | awk '{print $2}')
-        let mem_start=${mem_available}-2000
-        let mem_max=${mem_available}-500
+        mem_start=$((mem_available-2000))
+        mem_max=$((mem_available-500))
 
         gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \
             ApplyVQSR \
@@ -1229,8 +1229,8 @@ task SelectVariants {
 
         # Get amount of memory to use:
         mem_available=$(free -m | grep '^Mem' | awk '{print $2}')
-        let mem_start=${mem_available}-2000
-        let mem_max=${mem_available}-500
+        mem_start=$((mem_available-2000))
+        mem_max=$((mem_available-500))
 
         gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \
             SelectVariants \
@@ -1290,8 +1290,8 @@ task RenameSingleSampleVcf {
 
         # Get amount of memory to use:
         mem_available=$(free -m | grep '^Mem' | awk '{print $2}')
-        let mem_start=${mem_available}-1000
-        let mem_max=${mem_available}-750
+        mem_start=$((mem_available-1000))
+        mem_max=$((mem_available-750))
 
         gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \
             RenameSampleInVcf \
@@ -1693,8 +1693,8 @@ task ExtractVariantAnnotations {
 
         # Get amount of memory to use:
         mem_available=$(free -g | grep '^Mem' | awk '{print $2}')
-        let mem_start=${mem_available}-2
-        let mem_max=${mem_available}-2
+        mem_start=$((mem_available-2))
+        mem_max=$((mem_available-2))
 
         gatk --java-options "-Xms${mem_start}g -Xmx${mem_max}g -DGATK_STACKTRACE_ON_USER_EXCEPTION=true" \
           ExtractVariantAnnotations \
@@ -1771,8 +1771,8 @@ task TrainVariantAnnotationsModel {
 
         # Get amount of memory to use:
         mem_available=$(free -g | grep '^Mem' | awk '{print $2}')
-        let mem_start=${mem_available}-2
-        let mem_max=${mem_available}-2
+        mem_start=$((mem_available-2))
+        mem_max=$((mem_available-2))
 
         gatk --java-options "-Xms${mem_start}g -Xmx${mem_max}g -DGATK_STACKTRACE_ON_USER_EXCEPTION=true" \
             TrainVariantAnnotationsModel \
@@ -1895,10 +1895,10 @@ task ScoreVariantAnnotations {
 
         # Get amount of memory to use:
         mem_available=$(free -g | grep '^Mem' | awk '{print $2}')
-        let mem_start=${mem_available}-2
-        let mem_max=${mem_available}-2
+        mem_start=$((mem_available-2))
+        mem_max=$((mem_available-2))
 
-        mode_lower=$(echo ~{mode} | tr 'A-Z' 'a-z')
+        mode_lower=$(echo ~{mode} | tr '[:upper:]' '[:lower:]')
 
         # Set up model files:
         mkdir model_files

diff --git a/wdl/tasks/VariantCalling/HaplotypeCaller.wdl b/wdl/tasks/VariantCalling/HaplotypeCaller.wdl
@@ -191,7 +191,7 @@ task HaplotypeCaller_GATK4_VCF {
         #       which do not rely on the output format of the `free` command.
 
         available_memory_mb=$(free -m | awk '/^Mem/ {print $2}')
-        let java_memory_size_mb=available_memory_mb-1024
+        let java_memory_size_mb=$((available_memory_mb-1024))
         echo Total available memory: ${available_memory_mb} MB >&2
         echo Memory reserved for Java: ${java_memory_size_mb} MB >&2
 
@@ -278,7 +278,7 @@ task MergeBamouts {
         # If the number of processors = 1, then `let` will return 1 here:
         # So we need to turn off `set -e` for this command:
         set +e
-        let mthreads=${np}-1
+        mthreads=$((np-1))
         set -e
 
         samtools merge -@${mthreads} ~{prefix}.bam ~{sep=" " bams}

diff --git a/wdl/tasks/VariantCalling/SRJointGenotyping.wdl b/wdl/tasks/VariantCalling/SRJointGenotyping.wdl
@@ -46,7 +46,7 @@ task CreateSampleNameMap {
         # Create a temporary file to store file sizes in:
         size_file=$(mktemp)
 
-        let i=1
+        i=1
         while read file_path ; do
 
             # Get our sample list from our file:
@@ -64,7 +64,7 @@ task CreateSampleNameMap {
             # Add the file size to the size file:
             gsutil du -sac ${file_path} | tail -n1 | awk '{print $1}' >> ${size_file}
 
-            let i=$i+1
+            i=$((i+1))
             if [[ $i -gt ~{re_auth_interval} ]] ; then
                 # Periodically we should update the token so we don't have problems with long file lists:
                 export GCS_OAUTH_TOKEN=$(gcloud auth application-default print-access-token)

diff --git a/wdl/tasks/Visualization/NanoPlot.wdl b/wdl/tasks/Visualization/NanoPlot.wdl
@@ -24,7 +24,7 @@ task NanoPlotFromSummary {
     command <<<
         set -euxo pipefail
 
-        num_core=$(cat /proc/cpuinfo | awk '/^processor/{print $3}' | wc -l)
+        num_core=$(awk '/^processor/{print $3}' /proc/cpuinfo | wc -l)
 
         NanoPlot -t ${num_core} \
                  -c orangered \
@@ -120,7 +120,7 @@ task NanoPlotFromRichFastqs {
     command <<<
         set -euxo pipefail
 
-        num_core=$(cat /proc/cpuinfo | awk '/^processor/{print $3}' | wc -l)
+        num_core=$(awk '/^processor/{print $3}' /proc/cpuinfo | wc -l)
 
         NanoPlot -t ${num_core} \
                  -c orangered \
@@ -205,7 +205,7 @@ task NanoPlotFromBam {
 
         touch ~{bai} # avoid the warning bai is older than bam
 
-        num_core=$(cat /proc/cpuinfo | awk '/^processor/{print $3}' | wc -l)
+        num_core=$(awk '/^processor/{print $3}' /proc/cpuinfo | wc -l)
 
         NanoPlot -t ${num_core} \
                  -c orangered \