In [1]:
import os
import json

import requests
from requests.exceptions import HTTPError
from requests.auth import HTTPBasicAuth

## Parameters

In [2]:
# this cell is tagged `parameters` and will be overridden by papermill

workflow_id = "bb49f3ef-64cf-434c-89d9-eaf202c46a76"

path_secrets_file = "/Users/chunj/keys/cromwell-secrets-aws-nvirginia.json"
path_base_data = "./citeseq"
skip_download = False

## APIs to Cromwell

In [3]:
def get_secrets(path_secrets_file):

    with open(path_secrets_file, "rt") as fin:
        data = json.loads(fin.read())

    return data

def prep_api_call(secrets):

    api_version = "v1"
    url = secrets["url"]
    url = f"{url}/api/workflows/{api_version}"

    auth = HTTPBasicAuth(secrets["username"], secrets["password"])

    return url, auth

def get_metadata(secrets, workflow_id):

    base_url, auth = prep_api_call(secrets)

    try:
        response = requests.patch(
            url=f"{base_url}/{workflow_id}/metadata?expandSubWorkflows=true",
            headers={
                "Content-Type": "application/json",
                "Accept": "application/json"
            },
            auth=auth
        )

        # if response.status_code == 200:
        data = response.json()

        return data

    except HTTPError as err:
        print(err)

## Get Metadata

In [4]:
metadata = get_metadata(
    get_secrets(path_secrets_file),
    workflow_id
)

In [5]:
print(json.dumps(metadata, indent=2))

{
  "workflowName": "CiteSeq",
  "workflowProcessingEvents": [
    {
      "cromwellId": "cromid-aa4a0ba",
      "description": "Finished",
      "timestamp": "2021-07-26T04:20:12.908Z",
      "cromwellVersion": "59"
    },
    {
      "cromwellId": "cromid-aa4a0ba",
      "description": "PickedUp",
      "timestamp": "2021-07-26T03:20:46.992Z",
      "cromwellVersion": "59"
    }
  ],
  "metadataSource": "Unarchived",
  "actualWorkflowLanguageVersion": "1.0",
  "submittedFiles": {
    "workflow": "version 1.0\n\nimport \"modules/Preprocess.wdl\" as Preprocess\n\nworkflow CiteSeq {\n\n    input {\n        Array[File] uriFastqR1\n        Array[File] uriFastqR2\n\n        Int lengthR1\n        Int lengthR2\n\n        String sampleName\n\n        File cellBarcodeWhitelistUri\n        String cellBarcodeWhiteListMethod\n\n        # set to false if TotalSeq-A is used\n        # set to true if TotalSeq-B or C is used\n        Boolean translate10XBarcodes\n\n        String scRnaSeqPlatform = \

## Get Sample Name

In [6]:
sample_name = metadata["labels"]["sample"]
sample_name

'2090_CS1429a_T_1_CD45pos_citeseq_1_CITE'

## Get Counts

In [7]:
def download(sample_name, workflow_id, subfolder, items):
    
    if skip_download:
        return
    
    if not isinstance(items, list):
        items = [items]
        
    path_output = f"{path_base_data}/{sample_name}/{workflow_id}/{subfolder}"
    
    os.makedirs(path_output, exist_ok=True)
        
    for item in items:
        cmd_download = f"aws s3 cp {item} {path_output}"
        print(cmd_download)
        os.system(cmd_download)

In [8]:
meta_cite_seq_count = metadata["calls"]["CiteSeq.Preprocess"][0]["subWorkflowMetadata"]["calls"]["Preprocess.CiteSeqCount"][0]

In [9]:
print(json.dumps(meta_cite_seq_count, indent=2))

{
  "executionStatus": "Done",
  "stdout": "s3://dp-lab-gwf-core/cromwell-execution/CiteSeq/bb49f3ef-64cf-434c-89d9-eaf202c46a76/call-Preprocess/Preprocess/09c7a69b-5bf3-48a4-90a9-0c84d254102d/call-CiteSeqCount/CiteSeqCount-stdout.log",
  "backendStatus": "Succeeded",
  "commandLine": "set -euo pipefail\n\n# preserve one core for the main process just in case\nlet n_threads=4-1\n\nCITE-seq-Count \\\n    -R1 /cromwell_root/dp-lab-gwf-core/cromwell-execution/CiteSeq/bb49f3ef-64cf-434c-89d9-eaf202c46a76/call-Preprocess/Preprocess/09c7a69b-5bf3-48a4-90a9-0c84d254102d/call-TrimR1/R1.fastq.gz \\\n    -R2 /cromwell_root/dp-lab-gwf-core/cromwell-execution/CiteSeq/bb49f3ef-64cf-434c-89d9-eaf202c46a76/call-Preprocess/Preprocess/09c7a69b-5bf3-48a4-90a9-0c84d254102d/call-TrimR2/R2.fastq.gz \\\n    --tags /cromwell_root/dp-lab-data/SCRI_Projects/HTAN_CITEseq/CS1429a_T_1_CD45pos_citeseq_1_CITE/Hashtag_results/tag-list.csv \\\n    -cbf 1 -cbl 16 \\\n    -umif 17 -umil 28 \\\n    --bc_collapsing_dist 

### Download Read Count

In [10]:
download(
    sample_name, workflow_id,
    "reads",
    meta_cite_seq_count["outputs"]["outReadCount"]
)

aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/CiteSeq/bb49f3ef-64cf-434c-89d9-eaf202c46a76/call-Preprocess/Preprocess/09c7a69b-5bf3-48a4-90a9-0c84d254102d/call-CiteSeqCount/glob-8e7f6a2dd9fb1323e5ebc5c1c063f6df/barcodes.tsv.gz ./citeseq/2090_CS1429a_T_1_CD45pos_citeseq_1_CITE/bb49f3ef-64cf-434c-89d9-eaf202c46a76/reads
aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/CiteSeq/bb49f3ef-64cf-434c-89d9-eaf202c46a76/call-Preprocess/Preprocess/09c7a69b-5bf3-48a4-90a9-0c84d254102d/call-CiteSeqCount/glob-8e7f6a2dd9fb1323e5ebc5c1c063f6df/features.tsv.gz ./citeseq/2090_CS1429a_T_1_CD45pos_citeseq_1_CITE/bb49f3ef-64cf-434c-89d9-eaf202c46a76/reads
aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/CiteSeq/bb49f3ef-64cf-434c-89d9-eaf202c46a76/call-Preprocess/Preprocess/09c7a69b-5bf3-48a4-90a9-0c84d254102d/call-CiteSeqCount/glob-8e7f6a2dd9fb1323e5ebc5c1c063f6df/matrix.mtx.gz ./citeseq/2090_CS1429a_T_1_CD45pos_citeseq_1_CITE/bb49f3ef-64cf-434c-89d9-eaf202c46a76/reads


### Download UMI Count

In [11]:
download(
    sample_name, workflow_id,
    "umis",
    meta_cite_seq_count["outputs"]["outUmiCount"]
)

aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/CiteSeq/bb49f3ef-64cf-434c-89d9-eaf202c46a76/call-Preprocess/Preprocess/09c7a69b-5bf3-48a4-90a9-0c84d254102d/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/barcodes.tsv.gz ./citeseq/2090_CS1429a_T_1_CD45pos_citeseq_1_CITE/bb49f3ef-64cf-434c-89d9-eaf202c46a76/umis
aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/CiteSeq/bb49f3ef-64cf-434c-89d9-eaf202c46a76/call-Preprocess/Preprocess/09c7a69b-5bf3-48a4-90a9-0c84d254102d/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/features.tsv.gz ./citeseq/2090_CS1429a_T_1_CD45pos_citeseq_1_CITE/bb49f3ef-64cf-434c-89d9-eaf202c46a76/umis
aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/CiteSeq/bb49f3ef-64cf-434c-89d9-eaf202c46a76/call-Preprocess/Preprocess/09c7a69b-5bf3-48a4-90a9-0c84d254102d/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/matrix.mtx.gz ./citeseq/2090_CS1429a_T_1_CD45pos_citeseq_1_CITE/bb49f3ef-64cf-434c-89d9-eaf202c46a76/umis


## Download Report

In [12]:
download(
    sample_name, workflow_id,
    "report",
    meta_cite_seq_count["outputs"]["outReport"]
)

aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/CiteSeq/bb49f3ef-64cf-434c-89d9-eaf202c46a76/call-Preprocess/Preprocess/09c7a69b-5bf3-48a4-90a9-0c84d254102d/call-CiteSeqCount/results/run_report.yaml ./citeseq/2090_CS1429a_T_1_CD45pos_citeseq_1_CITE/bb49f3ef-64cf-434c-89d9-eaf202c46a76/report


## Download AnnData

In [13]:
download(
    sample_name=sample_name, workflow_id=workflow_id, subfolder="", items=metadata["outputs"]["CiteSeq.adata"]
)

aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/CiteSeq/bb49f3ef-64cf-434c-89d9-eaf202c46a76/call-Preprocess/Preprocess/09c7a69b-5bf3-48a4-90a9-0c84d254102d/call-ToAnnData/2090_CS1429a_T_1_CD45pos_citeseq_1_CITE.h5ad ./citeseq/2090_CS1429a_T_1_CD45pos_citeseq_1_CITE/bb49f3ef-64cf-434c-89d9-eaf202c46a76/


## Output Sample Name

In [14]:
# 1. DO NOT DELETE
# 2. MUST BE THE FINAL LINE
print(sample_name)

2090_CS1429a_T_1_CD45pos_citeseq_1_CITE
