From 014f69fea1424f08e5878a7c1dfb7ca7a118bbf3 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Thu, 10 Aug 2023 14:51:25 -0400 Subject: [PATCH 1/3] bugfix output location of fasta in download_annotations; pass NCBI API key to tasks: download_fasta, download_annotations bugfix output location of fasta in download_annotations; pass NCBI API key to tasks: download_fasta, download_annotations --- pipes/WDL/tasks/tasks_ncbi.wdl | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/pipes/WDL/tasks/tasks_ncbi.wdl b/pipes/WDL/tasks/tasks_ncbi.wdl index bcd46bc5c..ba7f9c25f 100644 --- a/pipes/WDL/tasks/tasks_ncbi.wdl +++ b/pipes/WDL/tasks/tasks_ncbi.wdl @@ -5,17 +5,25 @@ task download_fasta { String out_prefix Array[String]+ accessions String emailAddress + String apiKeyNCBI String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2" } + parameter_meta { + out_prefix: { description: "basename of the output fasta file. Will contain multiple sequences if multiple accessions are specified" } + accessions: { description: "accessions of sequences to download" } + apiKeyNCBI: { description: "NCBI API key for more frequent requests; see: https://support.nlm.nih.gov/knowledgebase/article/KA-05317/en-us" } + } + command { ncbi.py --version | tee VERSION ncbi.py fetch_fastas \ + --combinedFilePrefix ${out_prefix} \ + ~{'--api_key ' + apiKeyNCBI} \ ${emailAddress} \ . \ - ${sep=' ' accessions} \ - --combinedFilePrefix ${out_prefix} \ + ${sep=' ' accessions} } output { @@ -36,31 +44,42 @@ task download_annotations { input { Array[String]+ accessions String emailAddress + String apiKey String combined_out_prefix String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2" } + parameter_meta { + combined_out_prefix: { description: "basename of the output fasta file. Will contain multiple sequences if multiple accessions are specified" } + accessions: { description: "accessions for which sequences and feature tables will be downloaded" } + apiKeyNCBI: { description: "NCBI API key for more frequent requests; see: https://support.nlm.nih.gov/knowledgebase/article/KA-05317/en-us" } + } + command <<< set -ex -o pipefail ncbi.py --version | tee VERSION ncbi.py fetch_feature_tables \ + ~{'--api_key ' + apiKeyNCBI} \ ~{emailAddress} \ ./ \ ~{sep=' ' accessions} \ --loglevel DEBUG mkdir -p combined + pushd combined ncbi.py fetch_fastas \ + --combinedFilePrefix "~{combined_out_prefix}" \ + ~{'--api_key ' + apiKeyNCBI} \ + --forceOverwrite \ ~{emailAddress} \ ./ \ ~{sep=' ' accessions} \ - --combinedFilePrefix "combined/~{combined_out_prefix}" \ - --forceOverwrite \ --loglevel DEBUG + popd >>> output { - File combined_fasta = "~{combined_out_prefix}.fasta" + File combined_fasta = "combined/~{combined_out_prefix}.fasta" Array[File] genomes_fasta = glob("*.fasta") Array[File] features_tbl = glob("*.tbl") String viralngs_version = read_string("VERSION") From 81c17a5772890c3b00819d5b512463dc8012a362 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Thu, 10 Aug 2023 14:56:17 -0400 Subject: [PATCH 2/3] write combined fasta as temp file, then move to final output path in subdir write combined fasta as temp file, then move to final output path in subdir to avoid potential collision in the vase where the output prefix matches one of the accessions --- pipes/WDL/tasks/tasks_ncbi.wdl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pipes/WDL/tasks/tasks_ncbi.wdl b/pipes/WDL/tasks/tasks_ncbi.wdl index ba7f9c25f..66fbf7770 100644 --- a/pipes/WDL/tasks/tasks_ncbi.wdl +++ b/pipes/WDL/tasks/tasks_ncbi.wdl @@ -18,16 +18,18 @@ task download_fasta { command { ncbi.py --version | tee VERSION + mkdir -p combined ncbi.py fetch_fastas \ - --combinedFilePrefix ${out_prefix} \ + --combinedFilePrefix "tmp.${out_prefix}" \ ~{'--api_key ' + apiKeyNCBI} \ ${emailAddress} \ . \ ${sep=' ' accessions} + mv "tmp.${out_prefix}.fasta" "combined/${out_prefix}.fasta" } output { - File sequences_fasta = "${out_prefix}.fasta" + File sequences_fasta = "combined/${out_prefix}.fasta" String viralngs_version = read_string("VERSION") } @@ -66,16 +68,15 @@ task download_annotations { ~{sep=' ' accessions} \ --loglevel DEBUG mkdir -p combined - pushd combined ncbi.py fetch_fastas \ - --combinedFilePrefix "~{combined_out_prefix}" \ + --combinedFilePrefix "temp.~{combined_out_prefix}" \ ~{'--api_key ' + apiKeyNCBI} \ --forceOverwrite \ ~{emailAddress} \ ./ \ ~{sep=' ' accessions} \ --loglevel DEBUG - popd + mv "temp.~{combined_out_prefix}.fasta" "combined/~{combined_out_prefix}.fasta" >>> output { From fe0e8601ebf11fde7ab70b35e34f4dbcc949988c Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Thu, 10 Aug 2023 16:39:56 -0400 Subject: [PATCH 3/3] s/apiKey/apiKeyNCBI/ --- pipes/WDL/tasks/tasks_ncbi.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/tasks/tasks_ncbi.wdl b/pipes/WDL/tasks/tasks_ncbi.wdl index 66fbf7770..c192f60de 100644 --- a/pipes/WDL/tasks/tasks_ncbi.wdl +++ b/pipes/WDL/tasks/tasks_ncbi.wdl @@ -46,7 +46,7 @@ task download_annotations { input { Array[String]+ accessions String emailAddress - String apiKey + String apiKeyNCBI String combined_out_prefix String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2"