In [1]:
import os
import json

import requests
from requests.exceptions import HTTPError
from requests.auth import HTTPBasicAuth

## Parameters

In [2]:
# this cell is tagged `parameters` and will be overridden by papermill

workflow_id = "c3a941bf-62ed-469e-8ee6-d7448cdee457"

path_secrets_file = "/Users/chunj/keys/cromwell-secrets-aws-nvirginia.json"
path_base_data = "./hashtag"
skip_download = False

## APIs to Cromwell

In [3]:
def get_secrets(path_secrets_file):

    with open(path_secrets_file, "rt") as fin:
        data = json.loads(fin.read())

    return data

def prep_api_call(secrets):

    api_version = "v1"
    url = secrets["url"]
    url = f"{url}/api/workflows/{api_version}"

    auth = HTTPBasicAuth(secrets["username"], secrets["password"])

    return url, auth

def get_metadata(secrets, workflow_id):

    base_url, auth = prep_api_call(secrets)

    try:
        response = requests.patch(
            url=f"{base_url}/{workflow_id}/metadata?expandSubWorkflows=true",
            headers={
                "Content-Type": "application/json",
                "Accept": "application/json"
            },
            auth=auth
        )

        # if response.status_code == 200:
        data = response.json()

        return data

    except HTTPError as err:
        print(err)

## Get Metadata

In [4]:
metadata = get_metadata(
    get_secrets(path_secrets_file),
    workflow_id
)

In [6]:
print(json.dumps(metadata, indent=2))

{
  "workflowName": "Hashtag",
  "workflowProcessingEvents": [
    {
      "cromwellId": "cromid-aa4a0ba",
      "description": "PickedUp",
      "timestamp": "2021-07-26T01:44:57.333Z",
      "cromwellVersion": "59"
    },
    {
      "cromwellId": "cromid-aa4a0ba",
      "description": "Finished",
      "timestamp": "2021-07-26T01:57:04.657Z",
      "cromwellVersion": "59"
    }
  ],
  "metadataSource": "Unarchived",
  "actualWorkflowLanguageVersion": "1.0",
  "submittedFiles": {
    "workflow": "version 1.0\n\nimport \"modules/Preprocess.wdl\" as Preprocess\nimport \"modules/HtoDemuxSeurat.wdl\" as HtoDemuxSeurat\nimport \"modules/HtoDemuxKMeans.wdl\" as HtoDemuxKMeans\nimport \"modules/Combine.wdl\" as Combine\nimport \"modules/AnnData.wdl\" as AnnData\n\nworkflow Hashtag {\n\n    input {\n        Array[File] uriFastqR1\n        Array[File] uriFastqR2\n\n        Int lengthR1\n        Int lengthR2\n\n        String sampleName\n\n        File cellBarcodeWhitelistUri\n        String c

## Get Sample Name

In [7]:
sample_name = metadata["labels"]["sample"]
sample_name

'1973_HD1915_7xNK_FB_HTO'

## Get Counts

In [10]:
def download(sample_name, workflow_id, subfolder, items):
    
    if skip_download:
        return
    
    if not isinstance(items, list):
        items = [items]
        
    path_output = f"{path_base_data}/{sample_name}/{workflow_id}/{subfolder}"
    
    os.makedirs(path_output, exist_ok=True)
        
    for item in items:
        cmd_download = f"aws s3 cp {item} {path_output}"
        print(cmd_download)
        os.system(cmd_download)

In [11]:
try:
    # sharp v0.0.4 (before 2020-10-26)
    workflow_name = "Sharp"
    meta_cite_seq_count = metadata["calls"]["Sharp.CiteSeqCount"][0]
except:
    
    try:
        # sharp v0.0.5
        workflow_name = "Sharp"
        meta_cite_seq_count = metadata["calls"]["Sharp.Preprocess"][0]["subWorkflowMetadata"]["calls"]["Preprocess.CiteSeqCount"][0]
    except:
        # sharp v0.0.9
        workflow_name = "Hashtag"
        meta_cite_seq_count = metadata["calls"]["Hashtag.Preprocess"][0]["subWorkflowMetadata"]["calls"]["Preprocess.CiteSeqCount"][0]

In [12]:
print(json.dumps(meta_cite_seq_count, indent=2))

{
  "executionStatus": "Done",
  "stdout": "s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-Preprocess/Preprocess/744288fb-47e7-49ef-83e1-d79443b5b9d3/call-CiteSeqCount/CiteSeqCount-stdout.log",
  "compressedDockerSize": 383353488,
  "shardIndex": -1,
  "outputs": {
    "outUncorrected": "s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-Preprocess/Preprocess/744288fb-47e7-49ef-83e1-d79443b5b9d3/call-CiteSeqCount/cacheCopy/results/uncorrected_cells/dense_umis.tsv",
    "outReadCount": [
      "s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-Preprocess/Preprocess/744288fb-47e7-49ef-83e1-d79443b5b9d3/call-CiteSeqCount/cacheCopy/glob-8e7f6a2dd9fb1323e5ebc5c1c063f6df/barcodes.tsv.gz",
      "s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-Preprocess/Preprocess/744288fb-47e7-49ef-83e1-d79443b5b9d3/call-CiteSeqCount/cacheCopy/gl

### Download Read Count

In [13]:
download(
    sample_name, workflow_id,
    "reads",
    meta_cite_seq_count["outputs"]["outReadCount"]
)

aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-Preprocess/Preprocess/744288fb-47e7-49ef-83e1-d79443b5b9d3/call-CiteSeqCount/cacheCopy/glob-8e7f6a2dd9fb1323e5ebc5c1c063f6df/barcodes.tsv.gz ./hashtag/1973_HD1915_7xNK_FB_HTO/c3a941bf-62ed-469e-8ee6-d7448cdee457/reads
aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-Preprocess/Preprocess/744288fb-47e7-49ef-83e1-d79443b5b9d3/call-CiteSeqCount/cacheCopy/glob-8e7f6a2dd9fb1323e5ebc5c1c063f6df/features.tsv.gz ./hashtag/1973_HD1915_7xNK_FB_HTO/c3a941bf-62ed-469e-8ee6-d7448cdee457/reads
aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-Preprocess/Preprocess/744288fb-47e7-49ef-83e1-d79443b5b9d3/call-CiteSeqCount/cacheCopy/glob-8e7f6a2dd9fb1323e5ebc5c1c063f6df/matrix.mtx.gz ./hashtag/1973_HD1915_7xNK_FB_HTO/c3a941bf-62ed-469e-8ee6-d7448cdee457/reads


### Download UMI Count

In [14]:
download(
    sample_name, workflow_id,
    "umis",
    meta_cite_seq_count["outputs"]["outUmiCount"]
)

aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-Preprocess/Preprocess/744288fb-47e7-49ef-83e1-d79443b5b9d3/call-CiteSeqCount/cacheCopy/glob-5b2373ebac80816456a7726e786fc4d4/barcodes.tsv.gz ./hashtag/1973_HD1915_7xNK_FB_HTO/c3a941bf-62ed-469e-8ee6-d7448cdee457/umis
aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-Preprocess/Preprocess/744288fb-47e7-49ef-83e1-d79443b5b9d3/call-CiteSeqCount/cacheCopy/glob-5b2373ebac80816456a7726e786fc4d4/features.tsv.gz ./hashtag/1973_HD1915_7xNK_FB_HTO/c3a941bf-62ed-469e-8ee6-d7448cdee457/umis
aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-Preprocess/Preprocess/744288fb-47e7-49ef-83e1-d79443b5b9d3/call-CiteSeqCount/cacheCopy/glob-5b2373ebac80816456a7726e786fc4d4/matrix.mtx.gz ./hashtag/1973_HD1915_7xNK_FB_HTO/c3a941bf-62ed-469e-8ee6-d7448cdee457/umis


## Download Report

In [15]:
download(
    sample_name, workflow_id,
    "report",
    meta_cite_seq_count["outputs"]["outReport"]
)

aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-Preprocess/Preprocess/744288fb-47e7-49ef-83e1-d79443b5b9d3/call-CiteSeqCount/cacheCopy/results/run_report.yaml ./hashtag/1973_HD1915_7xNK_FB_HTO/c3a941bf-62ed-469e-8ee6-d7448cdee457/report


## Get Demux Classification

In [16]:
meta_demux = metadata["calls"][f"{workflow_name}.HtoDemuxKMeans"][0]

In [17]:
print(json.dumps(meta_demux, indent=2))

{
  "executionStatus": "Done",
  "stdout": "s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-HtoDemuxKMeans/HtoDemuxKMeans-stdout.log",
  "compressedDockerSize": 331772747,
  "shardIndex": -1,
  "outputs": {
    "outLog": "s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-HtoDemuxKMeans/cacheCopy/demux_kmeans.log",
    "outStats": "s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-HtoDemuxKMeans/cacheCopy/stats.yml",
    "outClass": "s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-HtoDemuxKMeans/cacheCopy/classification.tsv.gz"
  },
  "runtimeAttributes": {
    "failOnStderr": "false",
    "queueArn": "arn:aws:batch:us-east-1:583643567512:job-queue/priority-gwf-core",
    "disks": "local-disk /cromwell_root",
    "continueOnReturnCode": "0",
    "docker": "hisplan/cromwell-hto-demux-kmeans:0.5.0",
    "scriptBucketName": "dp-

In [18]:
download(
    sample_name, workflow_id,
    "",
    meta_demux["outputs"]["outClass"]
)

aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-HtoDemuxKMeans/cacheCopy/classification.tsv.gz ./hashtag/1973_HD1915_7xNK_FB_HTO/c3a941bf-62ed-469e-8ee6-d7448cdee457/


## Download Final AnnData

In [19]:
download(
    sample_name=sample_name, workflow_id=workflow_id, subfolder="", items=metadata["outputs"]["Hashtag.adataFinal"]
)

aws s3 cp s3://dp-lab-gwf-core/cromwell-execution/Hashtag/c3a941bf-62ed-469e-8ee6-d7448cdee457/call-UpdateAnnData/1973_HD1915_7xNK_FB_HTO.h5ad ./hashtag/1973_HD1915_7xNK_FB_HTO/c3a941bf-62ed-469e-8ee6-d7448cdee457/


## Output Sample Name

In [20]:
# 1. DO NOT DELETE
# 2. MUST BE THE FINAL LINE
print(sample_name)

1973_HD1915_7xNK_FB_HTO
