In [1]:
import os
import json

import requests
from requests.exceptions import HTTPError
from requests.auth import HTTPBasicAuth

## Parameters

In [2]:
# this cell is tagged `parameters` and will be overridden by papermill
workflow_name = "asapseq"
workflow_id = "dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6"

path_secrets_file = "/Users/chunj/keys/cromwell-secrets-aws-nvirginia.json"
path_base_data = "./asapseq"
skip_download = False

In [3]:
# name conversion to match the actually WDL workflow name
names = {
    "hashtag": "Hashtag",
    "citeseq": "CiteSeq",
    "asapseq": "AsapSeq",
    "cellplex": "Hashtag"
}

In [4]:
workflow_name = names[workflow_name]
workflow_name

'AsapSeq'

## APIs to Cromwell

In [5]:
def get_secrets(path_secrets_file):

    with open(path_secrets_file, "rt") as fin:
        data = json.loads(fin.read())

    return data

def prep_api_call(secrets):

    api_version = "v1"
    url = secrets["url"]
    url = f"{url}/api/workflows/{api_version}"

    auth = HTTPBasicAuth(secrets["username"], secrets["password"])

    return url, auth

def get_metadata(secrets, workflow_id):

    base_url, auth = prep_api_call(secrets)

    try:
        response = requests.patch(
            url=f"{base_url}/{workflow_id}/metadata?expandSubWorkflows=true",
            headers={
                "Content-Type": "application/json",
                "Accept": "application/json"
            },
            auth=auth
        )

        # if response.status_code == 200:
        data = response.json()

        return data

    except HTTPError as err:
        print(err)

## Get Metadata

In [6]:
metadata = get_metadata(
    get_secrets(path_secrets_file),
    workflow_id
)

In [7]:
print(json.dumps(metadata, indent=2))

{
  "workflowName": "AsapSeq",
  "workflowProcessingEvents": [
    {
      "cromwellId": "cromid-aa4a0ba",
      "description": "Finished",
      "timestamp": "2021-07-26T03:55:21.758Z",
      "cromwellVersion": "59"
    },
    {
      "cromwellId": "cromid-aa4a0ba",
      "description": "PickedUp",
      "timestamp": "2021-07-26T03:20:46.992Z",
      "cromwellVersion": "59"
    }
  ],
  "metadataSource": "Unarchived",
  "actualWorkflowLanguageVersion": "1.0",
  "submittedFiles": {
    "workflow": "version 1.0\n\nimport \"modules/Preprocess.wdl\" as Preprocess\nimport \"modules/HtoDemuxSeurat.wdl\" as HtoDemuxSeurat\nimport \"modules/HtoDemuxKMeans.wdl\" as HtoDemuxKMeans\nimport \"modules/ReformatFastq.wdl\" as ReformatFastq\nimport \"modules/AnnData.wdl\" as AnnData\n\nworkflow AsapSeq {\n\n    input {\n        Array[File] fastqR1\n        Array[File] fastqR2\n        Array[File] fastqR3\n\n        String conjugation = \"TotalSeqA\"\n        Boolean noReverseComplementR2 = false\n\n 

## Get Sample Name

In [8]:
sample_name = metadata["labels"]["sample"]
sample_name

'asapseq-test'

## Get Counts

In [9]:
def download(sample_name, workflow_id, subfolder, items):
    
    if skip_download:
        return
    
    if not isinstance(items, list):
        items = [items]
        
    path_output = f"{path_base_data}/{sample_name}/{workflow_id}/{subfolder}"
    
    os.makedirs(path_output, exist_ok=True)
        
    for item in items:
        cmd_download = f"aws s3 cp {item} {path_output}"
        print(cmd_download)
        os.system(cmd_download)

In [10]:
meta_cite_seq_count = metadata["calls"][f"{workflow_name}.Preprocess"][0]["subWorkflowMetadata"]["calls"]["Preprocess.CiteSeqCount"][0]

In [11]:
print(json.dumps(meta_cite_seq_count, indent=2))

{
  "executionStatus": "Done",
  "stdout": "s3://dp-lab-gwf-core/cromwell-execution/AsapSeq/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/call-Preprocess/Preprocess/ce493bee-ed3a-4a8c-8753-d0acfe03f553/call-CiteSeqCount/CiteSeqCount-stdout.log",
  "backendStatus": "Succeeded",
  "compressedDockerSize": 383353488,
  "commandLine": "set -euo pipefail\n\n# preserve one core for the main process just in case\nlet n_threads=4-1\n\nCITE-seq-Count \\\n    -R1 /cromwell_root/dp-lab-gwf-core/cromwell-execution/AsapSeq/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/call-Preprocess/Preprocess/ce493bee-ed3a-4a8c-8753-d0acfe03f553/call-TrimR1/cacheCopy/R1.fastq.gz \\\n    -R2 /cromwell_root/dp-lab-gwf-core/cromwell-execution/AsapSeq/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/call-Preprocess/Preprocess/ce493bee-ed3a-4a8c-8753-d0acfe03f553/call-TrimR2/cacheCopy/R2.fastq.gz \\\n    --tags /cromwell_root/dp-lab-data/collaborators/sunj/AsapseqTest/ASAPseq_test_ATAC_HTO/Hashtag_results/tag-list.csv \\\n    -cbf 1 -cbl 16 \\\n   

### Download Read Count

In [12]:
download(
    sample_name, workflow_id,
    "reads",
    meta_cite_seq_count["outputs"]["outReadCount"]
)

aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/AsapSeq/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/call-Preprocess/Preprocess/ce493bee-ed3a-4a8c-8753-d0acfe03f553/call-CiteSeqCount/glob-8e7f6a2dd9fb1323e5ebc5c1c063f6df/barcodes.tsv.gz ./asapseq/asapseq-test/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/reads
aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/AsapSeq/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/call-Preprocess/Preprocess/ce493bee-ed3a-4a8c-8753-d0acfe03f553/call-CiteSeqCount/glob-8e7f6a2dd9fb1323e5ebc5c1c063f6df/features.tsv.gz ./asapseq/asapseq-test/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/reads
aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/AsapSeq/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/call-Preprocess/Preprocess/ce493bee-ed3a-4a8c-8753-d0acfe03f553/call-CiteSeqCount/glob-8e7f6a2dd9fb1323e5ebc5c1c063f6df/matrix.mtx.gz ./asapseq/asapseq-test/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/reads


### Download UMI Count

In [13]:
download(
    sample_name, workflow_id,
    "umis",
    meta_cite_seq_count["outputs"]["outUmiCount"]
)

aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/AsapSeq/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/call-Preprocess/Preprocess/ce493bee-ed3a-4a8c-8753-d0acfe03f553/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/barcodes.tsv.gz ./asapseq/asapseq-test/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/umis
aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/AsapSeq/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/call-Preprocess/Preprocess/ce493bee-ed3a-4a8c-8753-d0acfe03f553/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/features.tsv.gz ./asapseq/asapseq-test/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/umis
aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/AsapSeq/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/call-Preprocess/Preprocess/ce493bee-ed3a-4a8c-8753-d0acfe03f553/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/matrix.mtx.gz ./asapseq/asapseq-test/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/umis


## Download Report

In [14]:
download(
    sample_name, workflow_id,
    "report",
    meta_cite_seq_count["outputs"]["outReport"]
)

aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/AsapSeq/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/call-Preprocess/Preprocess/ce493bee-ed3a-4a8c-8753-d0acfe03f553/call-CiteSeqCount/results/run_report.yaml ./asapseq/asapseq-test/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/report


## Get Demux Classification

In [15]:
# skip if CITE-seq because it does not require demultiplexing
if workflow_name != "CiteSeq":
    meta_demux = metadata["calls"][f"{workflow_name}.HtoDemuxKMeans"][0]
    download(
        sample_name, workflow_id,
        "",
        meta_demux["outputs"]["outClass"]
    )

aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/AsapSeq/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/call-HtoDemuxKMeans/classification.tsv.gz ./asapseq/asapseq-test/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/


## Download AnnData

In [16]:
if workflow_name != "CiteSeq":
    attr_adata = f"{workflow_name}.adataFinal"
else:
    # because CITE-seq does not require demultiplexing,
    # final output is a bit different
    attr_adata = f"{workflow_name}.adata"    

download(
    sample_name=sample_name, workflow_id=workflow_id, subfolder="", items=metadata["outputs"][attr_adata]
)

aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/AsapSeq/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/call-UpdateAnnData/ASAPseq_test_HTO_IGO_11671_3.h5ad ./asapseq/asapseq-test/dbc6304d-26bf-42a1-b3bb-4fa65ffaf7a6/


## Output Sample Name

In [17]:
# 1. DO NOT DELETE
# 2. MUST BE THE FINAL LINE
print(sample_name)

asapseq-test
