Skip to content

Commit

Permalink
bugfix fetch_sra_to_bam (#542)
Browse files Browse the repository at this point in the history
* in fetch_sra_to_bam, make json parsing of metadata more flexible in the possible return values it can process

* add email address and ncbi api key to inputs for Fetch_SRA_to_BAM; pass terra email from workflow (iff running on terra)

* add braces

* select_first email

* array

* pass empty string

* add '-db sra' to efetch call

* bump pinned docker versions; temporarily skip enforcement of version pinning for viral-classify in tasks_megablast.wdl

* temporarily skip version pinning of viral-classify in tasks_megablast.wdl
  • Loading branch information
tomkinsc committed Jun 19, 2024
1 parent 519a6a1 commit 9f43dde
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 19 deletions.
6 changes: 3 additions & 3 deletions pipes/WDL/tasks/tasks_megablast.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ task trim_rmdup_subsamp {
Int machine_mem_gb = 128
Int cpu = 16
Int disk_size_gb = 100
String docker ="quay.io/broadinstitute/viral-assemble:2.3.1.3"
String docker ="quay.io/broadinstitute/viral-assemble:2.3.1.4"
}
parameter_meta {
inBam: {
Expand Down Expand Up @@ -167,7 +167,7 @@ task ChunkBlastHits {
Int machine_mem_gb = 64
Int cpu = 16
Int disk_size_gb = 300
String docker = "quay.io/broadinstitute/viral-classify:fn_blast"
String docker = "quay.io/broadinstitute/viral-classify:fn_blast" #skip-global-version-pin
}
String fasta_basename = basename(inFasta, ".fasta")
#setting current working directory as logging outputs
Expand Down Expand Up @@ -247,7 +247,7 @@ task blastoff {
Int machine_mem_gb = 64
Int cpu = 16
Int disk_size_gb = 300
String docker = "quay.io/broadinstitute/viral-classify:fn_blast"
String docker = "quay.io/broadinstitute/viral-classify:fn_blast" #skip-global-version-pin

}
#setting current working directory as logging outputs
Expand Down
16 changes: 8 additions & 8 deletions pipes/WDL/tasks/tasks_metagenomics.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ task kraken2 {
Int? min_base_qual

Int machine_mem_gb = 90
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.0"
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.2"
}

parameter_meta {
Expand Down Expand Up @@ -335,7 +335,7 @@ task report_primary_kraken_taxa {
File kraken_summary_report
String focal_taxon = "Viruses"

String docker = "quay.io/broadinstitute/viral-classify:2.2.4.0"
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.2"
}
String out_basename = basename(kraken_summary_report, '.txt')
Int disk_size = 50
Expand Down Expand Up @@ -386,7 +386,7 @@ task filter_refs_to_found_taxa {
File taxdump_tgz
Int min_read_count = 100

String docker = "quay.io/broadinstitute/viral-classify:2.2.4.0"
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.2"
}
String ref_basename = basename(taxid_to_ref_accessions_tsv, '.tsv')
String hits_basename = basename(focal_report_tsv, '.tsv')
Expand Down Expand Up @@ -437,7 +437,7 @@ task build_kraken2_db {
Int? zstd_compression_level

Int machine_mem_gb = 100
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.0"
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.2"
}

Int disk_size = 750
Expand Down Expand Up @@ -579,7 +579,7 @@ task blastx {
File krona_taxonomy_db_tgz

Int machine_mem_gb = 8
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.0"
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.2"
}

parameter_meta {
Expand Down Expand Up @@ -669,7 +669,7 @@ task krona {
Int? magnitude_column

Int machine_mem_gb = 3
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.0"
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.2"
}

Int disk_size = 50
Expand Down Expand Up @@ -776,7 +776,7 @@ task filter_bam_to_taxa {
String out_filename_suffix = "filtered"

Int machine_mem_gb = 26
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.0"
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.2"
}

String out_basename = basename(classified_bam, ".bam") + "." + out_filename_suffix
Expand Down Expand Up @@ -863,7 +863,7 @@ task kaiju {
File krona_taxonomy_db_tgz # taxonomy/taxonomy.tab

Int machine_mem_gb = 100
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.0"
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.2"
}

String input_basename = basename(reads_unmapped_bam, ".bam")
Expand Down
9 changes: 7 additions & 2 deletions pipes/WDL/tasks/tasks_ncbi_tools.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ task Fetch_SRA_to_BAM {
String SRA_ID

String? sample_name
String? email_address
String? ncbi_api_key
Int? machine_mem_gb
String docker = "quay.io/broadinstitute/ncbi-tools:2.10.7.10"
}
Expand All @@ -16,8 +18,11 @@ task Fetch_SRA_to_BAM {
}
command <<<
set -e
~{if defined(ncbi_api_key) then "export NCBI_API_KEY=~{ncbi_api_key}}" else ""}

# fetch SRA metadata on this record
esearch -db sra -q "~{SRA_ID}" | efetch -mode json -json > SRA.json
esearch ~{if defined(email_address) then "-email ~{email_address}" else ""} -db sra -query "~{SRA_ID}" | efetch -db sra ~{if defined(email_address) then "-email ~{email_address}" else ""} -mode json -json > SRA.json

cp SRA.json "~{SRA_ID}.json"

# pull reads from SRA and make a fully annotated BAM -- must succeed
Expand All @@ -26,7 +31,7 @@ task Fetch_SRA_to_BAM {
MODEL=$(jq -r ".EXPERIMENT_PACKAGE_SET.EXPERIMENT_PACKAGE.EXPERIMENT.PLATFORM.$PLATFORM.INSTRUMENT_MODEL" SRA.json)
SAMPLE=$(jq -r '.EXPERIMENT_PACKAGE_SET.EXPERIMENT_PACKAGE.SAMPLE.IDENTIFIERS.EXTERNAL_ID|select(.namespace == "BioSample")|.content' SRA.json)
LIBRARY=$(jq -r .EXPERIMENT_PACKAGE_SET.EXPERIMENT_PACKAGE.EXPERIMENT.alias SRA.json)
RUNDATE=$(jq -r '.EXPERIMENT_PACKAGE_SET.EXPERIMENT_PACKAGE.RUN_SET.RUN.SRAFiles|if (.SRAFile|type) == "object" then .SRAFile.date else [.SRAFile[]|select(.supertype == "Original")][0].date end' SRA.json | cut -f 1 -d ' ')
RUNDATE=$(jq -r '(.EXPERIMENT_PACKAGE_SET.EXPERIMENT_PACKAGE.RUN_SET | (if (.RUN|type) == "object" then (.RUN) else (.RUN[] | select(any(.; .accession == "~{SRA_ID}"))) end) | .SRAFiles) | if (.SRAFile|type) == "object" then .SRAFile.date else [.SRAFile[]|select(.supertype == "Original" or .supertype=="Primary ETL")][0].date end' SRA.json | cut -f 1 -d ' ')

if [[ -n "~{sample_name}" ]]; then
SAMPLE="~{sample_name}"
Expand Down
2 changes: 1 addition & 1 deletion pipes/WDL/tasks/tasks_reports.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ task aggregate_metagenomics_reports {
String aggregate_taxlevel_focus = "species"
Int aggregate_top_N_hits = 5

String docker = "quay.io/broadinstitute/viral-classify:2.2.4.0"
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.2"
}

parameter_meta {
Expand Down
6 changes: 3 additions & 3 deletions pipes/WDL/tasks/tasks_taxon_filter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ task deplete_taxa {

Int cpu = 8
Int machine_mem_gb = 15
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.0"
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.2"
}

parameter_meta {
Expand Down Expand Up @@ -113,7 +113,7 @@ task filter_to_taxon {
String neg_control_prefixes_space_separated = "neg water NTC"

Int machine_mem_gb = 15
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.0"
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.2"
}

# do this in two steps in case the input doesn't actually have "cleaned" in the name
Expand Down Expand Up @@ -172,7 +172,7 @@ task build_lastal_db {
File sequences_fasta

Int machine_mem_gb = 7
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.0"
String docker = "quay.io/broadinstitute/viral-classify:2.2.4.2"
}

String db_name = basename(sequences_fasta, ".fasta")
Expand Down
13 changes: 12 additions & 1 deletion pipes/WDL/workflows/fetch_sra_to_bam.wdl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
version 1.0

import "../tasks/tasks_ncbi_tools.wdl" as ncbi_tools
import "../tasks/tasks_terra.wdl" as terra

workflow fetch_sra_to_bam {
meta {
Expand All @@ -10,7 +11,17 @@ workflow fetch_sra_to_bam {
allowNestedInputs: true
}

call ncbi_tools.Fetch_SRA_to_BAM
call terra.check_terra_env

#if(check_terra_env.is_running_on_terra) {
call ncbi_tools.Fetch_SRA_to_BAM {
input:
email_address = select_first([check_terra_env.user_email, ""])
}
#}
#if(!check_terra_env.is_running_on_terra) {
# call ncbi_tools.Fetch_SRA_to_BAM
#}

output {
File reads_ubam = Fetch_SRA_to_BAM.reads_ubam
Expand Down
2 changes: 1 addition & 1 deletion requirements-modules.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
broadinstitute/viral-core=2.3.1
broadinstitute/viral-assemble=2.3.1.4
broadinstitute/viral-classify=2.2.4.0
broadinstitute/viral-classify=2.2.4.2
broadinstitute/viral-phylo=2.1.20.2
broadinstitute/py3-bio=0.1.2
broadinstitute/beast-beagle-cuda=1.10.5pre
Expand Down

0 comments on commit 9f43dde

Please sign in to comment.