Skip to content

Commit

Permalink
Cleaned up some outputs and wdl linting warnings.
Browse files Browse the repository at this point in the history
  • Loading branch information
jonn-smith committed May 23, 2024
1 parent d42cc23 commit 075d82d
Show file tree
Hide file tree
Showing 14 changed files with 289 additions and 389 deletions.
2 changes: 1 addition & 1 deletion wdl/pipelines/ILMN/Alignment/SRFlowcell.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ workflow SRFlowcell {
}
}

File merged_bam = select_first([t_005_AlignReads.bam, t_006_MergeBamAlignment.bam])
File merged_bam = select_first([t_006_MergeBamAlignment.bam, t_005_AlignReads.bam])

# Mark Duplicates
call SRUTIL.MarkDuplicates as t_007_MarkDuplicates {
Expand Down
555 changes: 236 additions & 319 deletions wdl/pipelines/ILMN/VariantCalling/SRWholeGenome.wdl

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions wdl/pipelines/TechAgnostic/Utility/TrainCnnFilters.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ task TrainCnn {

# Get the max number of threads to use:
np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
let max_threads=${np}-1
max_threads=$((np-1))
if [[ $max_threads -le 0 ]] ; then
max_threads=1
fi
Expand All @@ -366,7 +366,7 @@ task TrainCnn {
tar --strip-components 1 -xf $f &

# Update the number of active threads:
let num_active_threads=${num_active_threads}+1
num_active_threads=$((num_active_threads+1))
done < ~{write_lines(tensor_tars)}

# Wait for the rest of our background processes to finish:
Expand Down
2 changes: 1 addition & 1 deletion wdl/pipelines/TechAgnostic/VariantCalling/LRCNVs.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ task GermlineCNVCallerCohortMode {
while [ $CURRENT_SAMPLE -lt $NUM_SAMPLES ]; do
CURRENT_SAMPLE_WITH_LEADING_ZEROS=$(printf "%0${NUM_DIGITS}d" $CURRENT_SAMPLE)
tar czf ~{cohort_entity_id}-gcnv-calls-shard-~{scatter_index}-sample-$CURRENT_SAMPLE_WITH_LEADING_ZEROS.tar.gz -C ~{output_dir_}/~{cohort_entity_id}-calls/SAMPLE_$CURRENT_SAMPLE .
let CURRENT_SAMPLE=CURRENT_SAMPLE+1
CURRENT_SAMPLE=$((CURRENT_SAMPLE+1))
done

rm -rf contig-ploidy-calls
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -338,14 +338,6 @@ workflow SRJointCallGVCFsWithGenomicsDB {
File vcf_index_for_merging = select_first([FunctionallyAnnotate.annotated_vcf_index, recalibrated_vcf_index])
}

# Consolidate files:
call VARUTIL.GatherVcfs as GatherRawVcfs {
input:
input_vcfs = joint_vcf,
input_vcf_indices = joint_vcf_index,
prefix = prefix + ".raw.combined"
}

# Consolidate files:
call VARUTIL.GatherVcfs as GatherRescoredVcfs {
input:
Expand Down Expand Up @@ -391,9 +383,6 @@ workflow SRJointCallGVCFsWithGenomicsDB {

call FF.FinalizeToDir as FinalizeGenomicsDB { input: outdir = outdir + "/GenomicsDB", keyfile = keyfile, files = ImportGVCFsIntoGenomicsDB.output_genomicsdb }

call FF.FinalizeToFile as FinalizeRawVCF { input: outdir = outdir, keyfile = keyfile, file = GatherRawVcfs.output_vcf }
call FF.FinalizeToFile as FinalizeRawTBI { input: outdir = outdir, keyfile = keyfile, file = GatherRawVcfs.output_vcf_index }

call FF.FinalizeToFile as FinalizeVETSVCF { input: outdir = outdir, keyfile = keyfile, file = GatherRescoredVcfs.output_vcf }
call FF.FinalizeToFile as FinalizeVETSTBI { input: outdir = outdir, keyfile = keyfile, file = GatherRescoredVcfs.output_vcf_index }

Expand Down Expand Up @@ -495,18 +484,12 @@ workflow SRJointCallGVCFsWithGenomicsDB {
output {
Array[String] genomicsDB = select_first([final_genomicsdb_location, ImportGVCFsIntoGenomicsDB.output_genomicsdb])

File raw_joint_vcf = select_first([FinalizeRawVCF.gcs_path, GatherRawVcfs.output_vcf])
File raw_joint_vcf_tbi = select_first([FinalizeRawTBI.gcs_path, GatherRawVcfs.output_vcf_index])

File joint_recalibrated_vcf = select_first([FinalizeVETSVCF.gcs_path, GatherRescoredVcfs.output_vcf])
File joint_recalibrated_vcf_tbi = select_first([FinalizeVETSTBI.gcs_path, GatherRescoredVcfs.output_vcf_index])
File joint_vcf = select_first([FinalizeVETSVCF.gcs_path, GatherRescoredVcfs.output_vcf])
File joint__vcf_tbi = select_first([FinalizeVETSTBI.gcs_path, GatherRescoredVcfs.output_vcf_index])

File joint_mt = select_first([FinalizeHailMatrixTable.gcs_path, CreateHailMatrixTable.mt_tar])
File joint_zarr = select_first([FinalizeZarr.gcs_path, ConvertToZarr.zarr])

File? annotated_joint_vcf = annotated_vcf
File? annotated_joint_vcf_tbi = annotated_vcf_tbi

Array[String]? snpEff_summary = final_snpeff_summary
Array[String]? snpEff_genes = final_snpEff_genes
}
Expand Down
4 changes: 2 additions & 2 deletions wdl/tasks/QC/AlignedMetrics.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ task SamStats {
command <<<
set -euxo pipefail

np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
np=$(grep ^processor /proc/cpuinfo | tail -n1 | awk '{print $NF+1}')

samtools stats -@${np} ~{bam} > ~{basename}.sam_stats.txt
>>>
Expand Down Expand Up @@ -407,7 +407,7 @@ task SamStatsMap {
command <<<
set -euxo pipefail

np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
np=$(grep ^processor /proc/cpuinfo | tail -n1 | awk '{print $NF+1}')

samtools stats -@${np} ~{bam} > ~{basename}.sam_stats.txt

Expand Down
2 changes: 1 addition & 1 deletion wdl/tasks/QC/FastQC.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ task FastQC {
command <<<
set -euxo pipefail

num_core=$(cat /proc/cpuinfo | awk '/^processor/{print $3}' | wc -l)
num_core=$(awk '/^processor/{print $3}' /proc/cpuinfo | wc -l)

fastqc -t $num_core --extract ~{bam}

Expand Down
6 changes: 3 additions & 3 deletions wdl/tasks/Utility/SRUtils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ task BwaMem2 {
# Make sure we use all our proocesors:
np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
if [[ ${np} -gt 2 ]] ; then
let np=${np}-1
np=$((np-1))
fi

# Breakdown of the arguments:
Expand Down Expand Up @@ -258,7 +258,7 @@ task MergeBamAlignment {

# Make sure we use all our proocesors:
np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
let np=${np}-1
np=$((np-1))

java -Dsamjdk.compression_level=2 -Xms8192m -Xmx30768m -jar /usr/picard/picard.jar \
MergeBamAlignment \
Expand Down Expand Up @@ -341,7 +341,7 @@ task MarkDuplicates {

command <<<
tot_mem_mb=$(free -m | grep '^Mem' | awk '{print $2}')
let java_memory_size_mb=${tot_mem_mb}-5120
java_memory_size_mb=$((tot_mem_mb-5120))

java -Dsamjdk.compression_level=~{compression_level} -Xms${java_memory_size_mb}m -jar /usr/picard/picard.jar \
MarkDuplicates \
Expand Down
4 changes: 2 additions & 2 deletions wdl/tasks/Utility/Utils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ task DownsampleSam {
command <<<

# Make sure we use all our proocesors:
np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
np=$(grep ^processor /proc/cpuinfo | tail -n1 | awk '{print $NF+1}')

gatk DownsampleSam --VALIDATION_STRINGENCY SILENT --RANDOM_SEED ~{random_seed} -I ~{bam} -O ~{prefix}.bam -S ~{strategy} -P ~{probability} ~{extra_args}
samtools index -@$np ~{prefix}.bam
Expand Down Expand Up @@ -1571,7 +1571,7 @@ task GetReadsInBedFileRegions {
export GCS_OAUTH_TOKEN=`gcloud auth application-default print-access-token`

# Make sure we use all our proocesors:
np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
np=$(grep ^processor /proc/cpuinfo | tail -n1 | awk '{print $NF+1}')

samtools view -@${np} -b -h -L ~{regions_bed} ~{gcs_bam_path} | samtools sort - > ~{prefix}.bam
samtools index -@${np} ~{prefix}.bam
Expand Down
46 changes: 23 additions & 23 deletions wdl/tasks/Utility/VariantUtils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -688,8 +688,8 @@ task HardFilterVcf {

# Get amount of memory to use:
mem_available=$(free -m | grep '^Mem' | awk '{print $2}')
let mem_start=${mem_available}-1000
let mem_max=${mem_available}-750
mem_start=$((mem_available-1000))
mem_max=$((mem_available-750))

gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \
VariantFiltration \
Expand Down Expand Up @@ -744,8 +744,8 @@ task MakeSitesOnlyVcf {

# Get amount of memory to use:
mem_available=$(free -m | grep '^Mem' | awk '{print $2}')
let mem_start=${mem_available}-1000
let mem_max=${mem_available}-750
mem_start=$((mem_available-1000))
mem_max=$((mem_available-750))

gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \
MakeSitesOnlyVcf \
Expand Down Expand Up @@ -801,8 +801,8 @@ task AnnotateVcfWithBedRegions {

# Get amount of memory to use:
mem_available=$(free -m | grep '^Mem' | awk '{print $2}')
let mem_start=${mem_available}-1000
let mem_max=${mem_available}-750
mem_start=$((mem_available-1000))
mem_max=$((mem_available-750))

# We need to generate argument strings from the input arrays.
# First we check that the arrays are the same length:
Expand Down Expand Up @@ -952,8 +952,8 @@ task IndelsVariantRecalibrator {

# Get amount of memory to use:
mem_available=$(free -g | grep '^Mem' | awk '{print $2}')
let mem_start=${mem_available}-2
let mem_max=${mem_available}-1
mem_start=$((mem_available-2))
mem_max=$((mem_available-1))

gatk --java-options "-Xms${mem_start}g -Xmx${mem_max}g" \
VariantRecalibrator \
Expand Down Expand Up @@ -1078,8 +1078,8 @@ task SNPsVariantRecalibratorCreateModel {

# Get amount of memory to use:
mem_available=$(free -g | grep '^Mem' | awk '{print $2}')
let mem_start=${mem_available}-2
let mem_max=${mem_available}-1
mem_start=$((mem_available-2))
mem_max=$((mem_available-1))

gatk --java-options "-Xms${mem_start}g -Xmx${mem_max}g" \
VariantRecalibrator \
Expand Down Expand Up @@ -1158,8 +1158,8 @@ task ApplyVqsr {

# Get amount of memory to use:
mem_available=$(free -m | grep '^Mem' | awk '{print $2}')
let mem_start=${mem_available}-2000
let mem_max=${mem_available}-500
mem_start=$((mem_available-2000))
mem_max=$((mem_available-500))

gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \
ApplyVQSR \
Expand Down Expand Up @@ -1229,8 +1229,8 @@ task SelectVariants {

# Get amount of memory to use:
mem_available=$(free -m | grep '^Mem' | awk '{print $2}')
let mem_start=${mem_available}-2000
let mem_max=${mem_available}-500
mem_start=$((mem_available-2000))
mem_max=$((mem_available-500))

gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \
SelectVariants \
Expand Down Expand Up @@ -1290,8 +1290,8 @@ task RenameSingleSampleVcf {

# Get amount of memory to use:
mem_available=$(free -m | grep '^Mem' | awk '{print $2}')
let mem_start=${mem_available}-1000
let mem_max=${mem_available}-750
mem_start=$((mem_available-1000))
mem_max=$((mem_available-750))

gatk --java-options "-Xms${mem_start}m -Xmx${mem_max}m" \
RenameSampleInVcf \
Expand Down Expand Up @@ -1693,8 +1693,8 @@ task ExtractVariantAnnotations {

# Get amount of memory to use:
mem_available=$(free -g | grep '^Mem' | awk '{print $2}')
let mem_start=${mem_available}-2
let mem_max=${mem_available}-2
mem_start=$((mem_available-2))
mem_max=$((mem_available-2))

gatk --java-options "-Xms${mem_start}g -Xmx${mem_max}g -DGATK_STACKTRACE_ON_USER_EXCEPTION=true" \
ExtractVariantAnnotations \
Expand Down Expand Up @@ -1771,8 +1771,8 @@ task TrainVariantAnnotationsModel {

# Get amount of memory to use:
mem_available=$(free -g | grep '^Mem' | awk '{print $2}')
let mem_start=${mem_available}-2
let mem_max=${mem_available}-2
mem_start=$((mem_available-2))
mem_max=$((mem_available-2))

gatk --java-options "-Xms${mem_start}g -Xmx${mem_max}g -DGATK_STACKTRACE_ON_USER_EXCEPTION=true" \
TrainVariantAnnotationsModel \
Expand Down Expand Up @@ -1895,10 +1895,10 @@ task ScoreVariantAnnotations {

# Get amount of memory to use:
mem_available=$(free -g | grep '^Mem' | awk '{print $2}')
let mem_start=${mem_available}-2
let mem_max=${mem_available}-2
mem_start=$((mem_available-2))
mem_max=$((mem_available-2))

mode_lower=$(echo ~{mode} | tr 'A-Z' 'a-z')
mode_lower=$(echo ~{mode} | tr '[:upper:]' '[:lower:]')

# Set up model files:
mkdir model_files
Expand Down
4 changes: 2 additions & 2 deletions wdl/tasks/VariantCalling/HaplotypeCaller.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ task HaplotypeCaller_GATK4_VCF {
# which do not rely on the output format of the `free` command.

available_memory_mb=$(free -m | awk '/^Mem/ {print $2}')
let java_memory_size_mb=available_memory_mb-1024
let java_memory_size_mb=$((available_memory_mb-1024))
echo Total available memory: ${available_memory_mb} MB >&2
echo Memory reserved for Java: ${java_memory_size_mb} MB >&2

Expand Down Expand Up @@ -278,7 +278,7 @@ task MergeBamouts {
# If the number of processors = 1, then `let` will return 1 here:
# So we need to turn off `set -e` for this command:
set +e
let mthreads=${np}-1
mthreads=$((np-1))
set -e

samtools merge -@${mthreads} ~{prefix}.bam ~{sep=" " bams}
Expand Down
4 changes: 2 additions & 2 deletions wdl/tasks/VariantCalling/SRJointGenotyping.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ task CreateSampleNameMap {
# Create a temporary file to store file sizes in:
size_file=$(mktemp)

let i=1
i=1
while read file_path ; do

# Get our sample list from our file:
Expand All @@ -64,7 +64,7 @@ task CreateSampleNameMap {
# Add the file size to the size file:
gsutil du -sac ${file_path} | tail -n1 | awk '{print $1}' >> ${size_file}

let i=$i+1
i=$((i+1))
if [[ $i -gt ~{re_auth_interval} ]] ; then
# Periodically we should update the token so we don't have problems with long file lists:
export GCS_OAUTH_TOKEN=$(gcloud auth application-default print-access-token)
Expand Down
6 changes: 3 additions & 3 deletions wdl/tasks/Visualization/NanoPlot.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ task NanoPlotFromSummary {
command <<<
set -euxo pipefail

num_core=$(cat /proc/cpuinfo | awk '/^processor/{print $3}' | wc -l)
num_core=$(awk '/^processor/{print $3}' /proc/cpuinfo | wc -l)

NanoPlot -t ${num_core} \
-c orangered \
Expand Down Expand Up @@ -120,7 +120,7 @@ task NanoPlotFromRichFastqs {
command <<<
set -euxo pipefail

num_core=$(cat /proc/cpuinfo | awk '/^processor/{print $3}' | wc -l)
num_core=$(awk '/^processor/{print $3}' /proc/cpuinfo | wc -l)

NanoPlot -t ${num_core} \
-c orangered \
Expand Down Expand Up @@ -205,7 +205,7 @@ task NanoPlotFromBam {

touch ~{bai} # avoid the warning bai is older than bam

num_core=$(cat /proc/cpuinfo | awk '/^processor/{print $3}' | wc -l)
num_core=$(awk '/^processor/{print $3}' /proc/cpuinfo | wc -l)

NanoPlot -t ${num_core} \
-c orangered \
Expand Down

0 comments on commit 075d82d

Please sign in to comment.