Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add check_terra_env task #497

Merged
merged 33 commits into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
f7effd4
add get_gcloud_env_info task for env config inspection
tomkinsc Oct 20, 2023
df8109e
add dump_gcloud_env_info workflow
tomkinsc Oct 20, 2023
c7c4112
conditional tee of optional command
tomkinsc Oct 20, 2023
ce3c206
evaluate optional input length in bash
tomkinsc Oct 20, 2023
09786bf
maybe cromwell sets WORKSPACE_NAMESPACE when invoking docker?
tomkinsc Oct 20, 2023
dd14346
can we obtain a gcloud bearer token?
tomkinsc Oct 20, 2023
5cb175d
WORKSPACE_NAMESPACE is not available from cromwell/rawls
tomkinsc Oct 20, 2023
6a68cd8
can we return current gcloud project?
tomkinsc Oct 20, 2023
2489cb9
try obtaining workspace info from google project resource labels
tomkinsc Oct 20, 2023
44c8397
try to reach leonardo server
tomkinsc Oct 20, 2023
60cfa93
list content of /cromwell_root
tomkinsc Oct 20, 2023
0d3ab84
try returning workspace info based on API return
tomkinsc Oct 20, 2023
ab29a5b
try passing in and resolving directly
tomkinsc Oct 20, 2023
fe82ebf
rename outputs
tomkinsc Oct 20, 2023
76a7a34
rename outputs
tomkinsc Oct 20, 2023
23912ed
make workspace inputs optional
tomkinsc Oct 20, 2023
40625a4
variable GOOGLE_PROJECT_ID
tomkinsc Oct 20, 2023
cbd3a14
quiet curl
tomkinsc Oct 20, 2023
f611820
remove additional command
tomkinsc Oct 20, 2023
e6efed4
remove cruft
tomkinsc Oct 20, 2023
e19c752
change docker image to ncbi-tools since it has jq and viral-baseimage…
tomkinsc Oct 20, 2023
5a125dd
use viral-core (w/ jq) for get_gcloud_env_info task
tomkinsc Oct 20, 2023
851f0b1
rename get_gcloud_env_info -> check_terra_env; emit booleans for exec…
tomkinsc Oct 26, 2023
5c6d5db
restore google_project_id.txt bits removed during prior commit's tidying
tomkinsc Oct 26, 2023
aaf0bf6
correction following refactor
tomkinsc Oct 26, 2023
8037292
include workspace bucket path among outputs of check_terra_env task
tomkinsc Oct 26, 2023
bfc8495
try API-less acquisition of workspace info; stub out create_or_update…
tomkinsc Oct 27, 2023
2625022
return input table name and row ID in check_terra_env task
tomkinsc Nov 1, 2023
5dd36ce
TOP_LEVEL_SUBMISSION_ID
tomkinsc Nov 1, 2023
4b0b560
handle non-table input, and single-row table input in check_terra_env
tomkinsc Nov 1, 2023
79b3951
in check_terra_env task, output top-level submission ID, and workspac…
tomkinsc Nov 3, 2023
ee7d99f
directly request workspace name and namespace from API
tomkinsc Nov 3, 2023
6062ea5
Merge branch 'master' into ct-terra-backend-env-exploration
tomkinsc Nov 3, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ workflows:
primaryDescriptorPath: /pipes/WDL/workflows/downsample.wdl
testParameterFiles:
- empty.json
- name: dump_gcloud_env_info
subclass: WDL
primaryDescriptorPath: /pipes/WDL/workflows/dump_gcloud_env_info.wdl
testParameterFiles:
- empty.json
- name: fastq_to_ubam
subclass: WDL
primaryDescriptorPath: /pipes/WDL/workflows/fastq_to_ubam.wdl
Expand Down
2 changes: 1 addition & 1 deletion github_actions_ci/version-wdl-runtimes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# skip this replacement for any version string line with the comment "#skip-global-version-pin"
#
# requires $MODULE_VERSIONS to be set to point to a text file with equal-sign-separated values
# export MODULE_VERSIONS="./requirements-modules.txt" && ./github_actions_ci/check-wdl-runtimes.sh
# export MODULE_VERSIONS="./requirements-modules.txt" && ./github_actions_ci/version-wdl-runtimes.sh

printf "Updating docker image tags in WDL files with those in ${MODULE_VERSIONS}\n\n"

Expand Down
194 changes: 194 additions & 0 deletions pipes/WDL/tasks/tasks_terra.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,163 @@ task gcs_copy {
}
}

task check_terra_env {
input {
String docker = "quay.io/broadinstitute/viral-core:2.2.2" #skip-global-version-pin
}
meta {
description: "task for inspection of backend to determine whether the task is running on Terra and/or GCP"
}
command <<<
set -ex

# create gcloud-related output file
touch gcloud_config_info.log
touch google_project_id.txt

# create Terra-related output files
touch workspace_name.txt
touch workspace_namespace.txt
touch workspace_bucket_path.txt
touch input_table_name.txt
touch input_row_id.txt

# write system environment variables to output file
env | tee -a env_info.log

GOOGLE_PROJECT_ID="$(gcloud config list --format='value(core.project)')"
echo "$GOOGLE_PROJECT_ID" > google_project_id.txt

# check whether gcloud project has a "terra-" prefix
# to determine if running on Terra
if case ${GOOGLE_PROJECT_ID} in terra-*) ;; *) false;; esac; then
# (shell-portable regex conditional)
echo "Job appears to be running on Terra (GCP project ID: ${GOOGLE_PROJECT_ID})"
echo "true" > RUNNING_ON_TERRA
else
echo "NOT running on Terra"
echo "false" > RUNNING_ON_TERRA
fi

# check if running on GCP
if curl -s metadata.google.internal -i | grep -E 'Metadata-Flavor:\s+Google'; then
echo "Cloud platform appears to be GCP";
echo "true" > RUNNING_ON_GCP

# write gcloud env info to output files
gcloud info | tee -a gcloud_config_info.log
else
echo "NOT running on GCP";
echo "false" > RUNNING_ON_GCP
fi

if grep "true" RUNNING_ON_GCP && grep "true" RUNNING_ON_TERRA; then
echo "Running on Terra+GCP"

# === Determine Terra workspace ID and submission ID for the workspace responsible for this job

# Scrape various workflow / workspace info from the localization and delocalization scripts.
# from: https://github.com/broadinstitute/gatk/blob/ah_var_store/scripts/variantstore/wdl/GvsUtils.wdl#L35-L40
WORKSPACE_ID="$(sed -n -E 's!.*gs://fc-(secure-)?([^\/]+).*!\2!p' /cromwell_root/gcs_delocalization.sh | sort -u | tee workspace_id.txt)"
echo "WORKSPACE_ID: ${WORKSPACE_ID}"

# bucket path prefix
#BUCKET_PREFIX="$(sed -n -E 's!.*(gs://(fc-(secure-)?[^\/]+)).*!\1!p' /cromwell_root/gcs_delocalization.sh | sort -u | tee bucket_prefix.txt)"
#echo "BUCKET_PREFIX: ${BUCKET_PREFIX}"

# top-level submission ID
TOP_LEVEL_SUBMISSION_ID="$(sed -n -E 's!.*gs://fc-(secure-)?([^\/]+)/submissions/([^\/]+).*!\3!p' /cromwell_root/gcs_delocalization.sh | sort -u | tee top_level_submission_id.txt)"
echo "TOP_LEVEL_SUBMISSION_ID: ${TOP_LEVEL_SUBMISSION_ID}"

# workflow job ID within submission
#WORKFLOW_ID="$(sed -n -E 's!.*gs://fc-(secure-)?([^\/]+)/submissions/([^\/]+)/([^\/]+)/([^\/]+).*!\5!p' /cromwell_root/gcs_delocalization.sh | sort -u)"

# other way to obtain Terra project ID, via scraping rather than from gcloud call used above
#GOOGLE_PROJECT_ID="$(sed -n -E 's!.*(terra-[0-9a-f]+).*# project to use if requester pays$!\1!p' /cromwell_root/gcs_localization.sh | sort -u)"
# =======================================

# === request workspace name AND namespace from API, based on bucket path / ID ===
curl -s -X 'GET' \
"https://api.firecloud.org/api/workspaces/id/${WORKSPACE_ID}?fields=workspace.name%2Cworkspace.namespace%2Cworkspace.googleProject" \
-H 'accept: application/json' \
-H "Authorization: Bearer $(gcloud auth print-access-token)" > workspace_info.json


WORKSPACE_NAME="$(jq -cr '.workspace.name | select (.!=null)' workspace_info.json)"
WORKSPACE_NAME_URL_ENCODED="$(jq -rn --arg x "${WORKSPACE_NAME}" '$x|@uri')"
WORKSPACE_NAMESPACE="$(jq -cr '.workspace.namespace | select (.!=null)' workspace_info.json)"
WORKSPACE_BUCKET="gs://${WORKSPACE_ID}"

echo "${WORKSPACE_NAME}" | tee workspace_name.txt
echo "${WORKSPACE_NAMESPACE}" | tee workspace_namespace.txt
echo "${WORKSPACE_BUCKET}" | tee workspace_bucket_path.txt

# --- less direct way of obtaining workspace info by matching Terra project ID --
# preserved here for potential utility in obtaining workspace info for other projects/workspaces
# get list of workspaces, limiting the output to only the fields we need
#curl -s -X 'GET' \
#'https://api.firecloud.org/api/workspaces?fields=workspace.name%2Cworkspace.namespace%2Cworkspace.bucketName%2Cworkspace.googleProject' \
#-H 'accept: application/json' \
#-H "Authorization: Bearer $(gcloud auth print-access-token)" > workspace_list.json

# extract workspace name
#WORKSPACE_NAME=$(jq -cr '.[] | select( .workspace.googleProject == "'${GOOGLE_PROJECT_ID}'" ).workspace | .name' workspace_list.json)

# extract workspace namespace
#WORKSPACE_NAMESPACE=$(jq -cr '.[] | select( .workspace.googleProject == "'${GOOGLE_PROJECT_ID}'" ).workspace | .namespace' workspace_list.json)
#WORKSPACE_NAME_URL_ENCODED="$(jq -rn --arg x "${WORKSPACE_NAME}" '$x|@uri')"

# extract workspace bucket
#WORKSPACE_BUCKET=$(jq -cr '.[] | select( .workspace.googleProject == "'${GOOGLE_PROJECT_ID}'" ).workspace | .bucketName' workspace_list.json)
# --- end less direct way of obtaining workspace info ---
# =======================================


# === obtain info on job submission inputs (table name, row ID)===
touch submission_metadata.json
curl -s -X 'GET' \
"https://api.firecloud.org/api/workspaces/${WORKSPACE_NAMESPACE}/${WORKSPACE_NAME_URL_ENCODED}/submissions/${TOP_LEVEL_SUBMISSION_ID}" \
-H 'accept: application/json' \
-H "Authorization: Bearer $(gcloud auth print-access-token)" > submission_metadata.json

INPUT_TABLE_NAME="$(jq -cr 'if .submissionEntity == null then "" elif (.workflows | length)==1 then .submissionEntity.entityType else [.workflows[].workflowEntity.entityType] | join(",") end' submission_metadata.json)"
INPUT_ROW_ID="$(jq -cr 'if .submissionEntity == null then "" elif (.workflows | length)==1 then .submissionEntity.entityName else [.workflows[].workflowEntity.entityName] | join(",") end' submission_metadata.json)"

echo "$INPUT_TABLE_NAME" | tee input_table_name.txt
echo "$INPUT_ROW_ID" | tee input_row_id.txt
# =======================================
else
echo "Not running on Terra+GCP"
fi

>>>
output {
Boolean is_running_on_terra = read_boolean("RUNNING_ON_TERRA")
Boolean is_backed_by_gcp = read_boolean("RUNNING_ON_GCP")

String google_project_id = read_string("google_project_id.txt")

String workspace_id = read_string("workspace_id.txt")
String workspace_name = read_string("workspace_name.txt")
String workspace_namespace = read_string("workspace_namespace.txt")
String workspace_bucket_path = read_string("workspace_bucket_path.txt")

String input_table_name = read_string("input_table_name.txt")
String input_row_id = read_string("input_row_id.txt")

String top_level_submission_id = read_string("top_level_submission_id.txt")

File env_info = "env_info.log"
File gcloud_config_info = "gcloud_config_info.log"
}
runtime {
docker: docker
memory: "1 GB"
cpu: 1
maxRetries: 1
}
}

task upload_reads_assemblies_entities_tsv {
input {
String workspace_name
Expand Down Expand Up @@ -160,3 +317,40 @@ task download_entities_tsv {
File tsv_file = '~{outname}'
}
}

task create_or_update_sample_tables {
input {
String flowcell_run_id

String workspace_namespace
String workspace_name
String workspace_bucket

String docker = "quay.io/broadinstitute/viral-core:2.2.2" #skip-global-version-pin
}

meta {
volatile: true
}

command <<<
python3<<CODE

workspace_project = '~{workspace_namespace}'
workspace_name = '~{workspace_name}'
workspace_bucket = '~{workspace_bucket}'
table_name = ''

CODE
>>>
runtime {
docker: docker
memory: "2 GB"
cpu: 1
maxRetries: 2
}
output {
File stdout_log = stdout()
File stderr_log = stderr()
}
}
35 changes: 35 additions & 0 deletions pipes/WDL/workflows/dump_gcloud_env_info.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
version 1.0

#DX_SKIP_WORKFLOW

import "../tasks/tasks_terra.wdl" as terra

workflow dump_gcloud_env_info {
meta {
description: "Write system and gcloud environment info to output files."
author: "Broad Viral Genomics"
email: "viral-ngs@broadinstitute.org"
}

call terra.check_terra_env

output {
Boolean is_running_on_terra = check_terra_env.is_running_on_terra
Boolean is_backed_by_gcp = check_terra_env.is_backed_by_gcp

String google_project_id = check_terra_env.google_project_id

String workspace_id = check_terra_env.workspace_id
String workspace_name = check_terra_env.workspace_name
String workspace_namespace = check_terra_env.workspace_namespace
String workspace_bucket_path = check_terra_env.workspace_bucket_path

String input_table_name = check_terra_env.input_table_name
String input_row_id = check_terra_env.input_row_id

String top_level_submission_id = check_terra_env.top_level_submission_id

File env_info = check_terra_env.env_info
File gcloud_config_info = check_terra_env.gcloud_config_info
}
}
Loading