In [2]:
import os
import json

import requests
from requests.exceptions import HTTPError
from requests.auth import HTTPBasicAuth

## Parameters

In [3]:
# this cell is tagged `parameters` and will be overridden by papermill

workflow_id = "9c72e274-712a-4137-811c-4f1d6cc2eeb1"

path_secrets_file = "/Users/chunj/keys/secrets-aws.json"
path_base_data = "./citeseq"
skip_download = False

## APIs to Cromwell

In [4]:
def get_secrets(path_secrets_file):

    with open(path_secrets_file, "rt") as fin:
        data = json.loads(fin.read())

    return data

def prep_api_call(secrets):

    api_version = "v1"
    url = secrets["url"]
    url = f"{url}/api/workflows/{api_version}"

    auth = HTTPBasicAuth(secrets["username"], secrets["password"])

    return url, auth

def get_metadata(secrets, workflow_id):

    base_url, auth = prep_api_call(secrets)

    try:
        response = requests.patch(
            url=f"{base_url}/{workflow_id}/metadata?expandSubWorkflows=true",
            headers={
                "Content-Type": "application/json",
                "Accept": "application/json"
            },
            auth=auth
        )

        # if response.status_code == 200:
        data = response.json()

        return data

    except HTTPError as err:
        print(err)

## Get Metadata

In [5]:
metadata = get_metadata(
    get_secrets(path_secrets_file),
    workflow_id
)

In [6]:
print(json.dumps(metadata, indent=2))

{
  "workflowName": "CiteSeq",
  "workflowProcessingEvents": [
    {
      "cromwellId": "cromid-553c754",
      "description": "Finished",
      "timestamp": "2020-10-27T04:16:04.433Z",
      "cromwellVersion": "51"
    },
    {
      "cromwellId": "cromid-553c754",
      "description": "PickedUp",
      "timestamp": "2020-10-27T03:27:26.635Z",
      "cromwellVersion": "51"
    }
  ],
  "metadataSource": "Unarchived",
  "actualWorkflowLanguageVersion": "1.0",
  "submittedFiles": {
    "workflow": "version 1.0\n\nimport \"modules/Preprocess.wdl\" as Preprocess\nimport \"modules/ToAnnData.wdl\" as ToAnnData\n\nworkflow CiteSeq {\n\n    input {\n        Array[File] uriFastqR1\n        Array[File] uriFastqR2\n\n        Int lengthR1\n        Int lengthR2\n\n        String sampleName\n\n        File cellBarcodeWhitelistUri\n        String cellBarcodeWhiteListMethod\n\n        # set to false if TotalSeq-A is used\n        # set to true if TotalSeq-B or C is used\n        Boolean translate10X

## Get Sample Name

In [7]:
sample_name = metadata["labels"]["sample"]
sample_name

'2091_CS1429a_T_1_CD45pos_citeseq_2_CITE'

## Get Counts

In [8]:
def download(sample_name, workflow_id, subfolder, items):
    
    if skip_download:
        return
    
    if not isinstance(items, list):
        items = [items]
        
    path_output = f"{path_base_data}/{sample_name}/{workflow_id}/{subfolder}"
    
    os.makedirs(path_output, exist_ok=True)
        
    for item in items:
        cmd_download = f"aws s3 cp {item} {path_output}"
        print(cmd_download)
        os.system(cmd_download)

In [9]:
meta_cite_seq_count = metadata["calls"]["CiteSeq.Preprocess"][0]["subWorkflowMetadata"]["calls"]["Preprocess.CiteSeqCount"][0]

In [10]:
print(json.dumps(meta_cite_seq_count, indent=2))

{
  "executionStatus": "Done",
  "stdout": "s3://dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-Preprocess/Preprocess/5820c268-1c20-45a3-b6c6-e032cc764a1e/call-CiteSeqCount/CiteSeqCount-stdout.log",
  "backendStatus": "Succeeded",
  "compressedDockerSize": 392564039,
  "commandLine": "set -euo pipefail\n\n# preserve one core for the main process just in case\nlet n_threads=4-1\n\nCITE-seq-Count \\\n    -R1 /cromwell_root/dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-Preprocess/Preprocess/5820c268-1c20-45a3-b6c6-e032cc764a1e/call-TrimR1/R1.fastq.gz \\\n    -R2 /cromwell_root/dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-Preprocess/Preprocess/5820c268-1c20-45a3-b6c6-e032cc764a1e/call-TrimR2/R2.fastq.gz \\\n    --tags /cromwell_root/dp-lab-data/sc-seq/Project_11244/CS1429a_T_1_CD45pos_citeseq_2_CITE/Hashtag_results/tag-list.csv \\\n    -cbf 1 -cbl 16 \\\n    -umif 17 -umil 28 \\\n   

### Download Read Count

In [11]:
download(
    sample_name, workflow_id,
    "reads",
    meta_cite_seq_count["outputs"]["outReadCount"]
)

aws s3 cp s3://dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-Preprocess/Preprocess/5820c268-1c20-45a3-b6c6-e032cc764a1e/call-CiteSeqCount/glob-8e7f6a2dd9fb1323e5ebc5c1c063f6df/barcodes.tsv.gz ./citeseq/2091_CS1429a_T_1_CD45pos_citeseq_2_CITE/9c72e274-712a-4137-811c-4f1d6cc2eeb1/reads
aws s3 cp s3://dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-Preprocess/Preprocess/5820c268-1c20-45a3-b6c6-e032cc764a1e/call-CiteSeqCount/glob-8e7f6a2dd9fb1323e5ebc5c1c063f6df/features.tsv.gz ./citeseq/2091_CS1429a_T_1_CD45pos_citeseq_2_CITE/9c72e274-712a-4137-811c-4f1d6cc2eeb1/reads
aws s3 cp s3://dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-Preprocess/Preprocess/5820c268-1c20-45a3-b6c6-e032cc764a1e/call-CiteSeqCount/glob-8e7f6a2dd9fb1323e5ebc5c1c063f6df/matrix.mtx.gz ./citeseq/2091_CS1429a_T_1_CD45pos_citeseq_2_CITE/9c72e274-712a-4137-811c-4f1d6cc2eeb1/reads


### Download UMI Count

In [12]:
download(
    sample_name, workflow_id,
    "umis",
    meta_cite_seq_count["outputs"]["outUmiCount"]
)

aws s3 cp s3://dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-Preprocess/Preprocess/5820c268-1c20-45a3-b6c6-e032cc764a1e/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/barcodes.tsv.gz ./citeseq/2091_CS1429a_T_1_CD45pos_citeseq_2_CITE/9c72e274-712a-4137-811c-4f1d6cc2eeb1/umis
aws s3 cp s3://dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-Preprocess/Preprocess/5820c268-1c20-45a3-b6c6-e032cc764a1e/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/features.tsv.gz ./citeseq/2091_CS1429a_T_1_CD45pos_citeseq_2_CITE/9c72e274-712a-4137-811c-4f1d6cc2eeb1/umis
aws s3 cp s3://dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-Preprocess/Preprocess/5820c268-1c20-45a3-b6c6-e032cc764a1e/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/matrix.mtx.gz ./citeseq/2091_CS1429a_T_1_CD45pos_citeseq_2_CITE/9c72e274-712a-4137-811c-4f1d6cc2eeb1/umis


## Download Report

In [13]:
download(
    sample_name, workflow_id,
    "report",
    meta_cite_seq_count["outputs"]["outReport"]
)

aws s3 cp s3://dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-Preprocess/Preprocess/5820c268-1c20-45a3-b6c6-e032cc764a1e/call-CiteSeqCount/results/run_report.yaml ./citeseq/2091_CS1429a_T_1_CD45pos_citeseq_2_CITE/9c72e274-712a-4137-811c-4f1d6cc2eeb1/report


## Download h5ad

In [14]:
meta_adata = metadata["calls"]["CiteSeq.CiteSeqToAnnData"][0]

In [15]:
meta_adata

{'executionStatus': 'Done',
 'stdout': 's3://dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-CiteSeqToAnnData/CiteSeqToAnnData-stdout.log',
 'backendStatus': 'Succeeded',
 'compressedDockerSize': 459883916,
 'commandLine': 'set -euo pipefail\n\nmkdir umi-counts\ncp /cromwell_root/dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-Preprocess/Preprocess/5820c268-1c20-45a3-b6c6-e032cc764a1e/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/barcodes.tsv.gz /cromwell_root/dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-Preprocess/Preprocess/5820c268-1c20-45a3-b6c6-e032cc764a1e/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/features.tsv.gz /cromwell_root/dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-Preprocess/Preprocess/5820c268-1c20-45a3-b6c6-e032cc764a1e/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/matrix.mtx.gz ./umi-counts/\n\nm

In [16]:
download(
    sample_name=sample_name,
    workflow_id=workflow_id,
    subfolder="",
    items=meta_adata["outputs"]["outAdata"]
)

aws s3 cp s3://dp-lab-batch/cromwell-execution/CiteSeq/9c72e274-712a-4137-811c-4f1d6cc2eeb1/call-CiteSeqToAnnData/2091_CS1429a_T_1_CD45pos_citeseq_2_CITE.CITE-seq.h5ad ./citeseq/2091_CS1429a_T_1_CD45pos_citeseq_2_CITE/9c72e274-712a-4137-811c-4f1d6cc2eeb1/


## Output Sample Name

In [17]:
# 1. DO NOT DELETE
# 2. MUST BE THE FINAL LINE
print(sample_name)

2091_CS1429a_T_1_CD45pos_citeseq_2_CITE
