In [1]:
import os
import json

import requests
from requests.exceptions import HTTPError
from requests.auth import HTTPBasicAuth

## Parameters

In [18]:
# this cell is tagged `parameters` and will be overridden by papermill

workflow_id = "af1fa66d-943c-4343-84c7-8d3f476f34c1"
# workflow_id = "36faf89a-e615-4baa-84a3-3de42994ff9f"

path_secrets_file = "/Users/chunj/keys/secrets-aws.json"
path_base_data = "./hashtag"
skip_download = True

## APIs to Cromwell

In [19]:
def get_secrets(path_secrets_file):

    with open(path_secrets_file, "rt") as fin:
        data = json.loads(fin.read())

    return data

def prep_api_call(secrets):

    api_version = "v1"
    url = secrets["url"]
    url = f"{url}/api/workflows/{api_version}"

    auth = HTTPBasicAuth(secrets["username"], secrets["password"])

    return url, auth

def get_metadata(secrets, workflow_id):

    base_url, auth = prep_api_call(secrets)

    try:
        response = requests.patch(
            url=f"{base_url}/{workflow_id}/metadata?expandSubWorkflows=true",
            headers={
                "Content-Type": "application/json",
                "Accept": "application/json"
            },
            auth=auth
        )

        # if response.status_code == 200:
        data = response.json()

        return data

    except HTTPError as err:
        print(err)

## Get Metadata

In [20]:
metadata = get_metadata(
    get_secrets(path_secrets_file),
    workflow_id
)

In [21]:
print(json.dumps(metadata, indent=2))

{
  "workflowName": "Sharp",
  "workflowProcessingEvents": [
    {
      "cromwellId": "cromid-7c5523d",
      "description": "Finished",
      "timestamp": "2020-01-11T04:04:17.363Z",
      "cromwellVersion": "45.1"
    },
    {
      "cromwellId": "cromid-7c5523d",
      "description": "PickedUp",
      "timestamp": "2020-01-11T02:20:04.917Z",
      "cromwellVersion": "45.1"
    }
  ],
  "metadataSource": "Unarchived",
  "actualWorkflowLanguageVersion": "1.0",
  "submittedFiles": {
    "workflow": "version 1.0\n\nimport \"modules/MergeFastq.wdl\" as MergeFastq\nimport \"modules/FastQC.wdl\" as FastQC\nimport \"modules/Cutadapt.wdl\" as Cutadapt\nimport \"modules/CutInDropSpacer.wdl\" as CutInDropSpacer\nimport \"modules/PrepCBWhitelist.wdl\" as PrepCBWhitelist\nimport \"modules/Count.wdl\" as Count\nimport \"modules/HtoDemuxSeurat.wdl\" as HtoDemuxSeurat\nimport \"modules/HtoDemuxKMeans.wdl\" as HtoDemuxKMeans\nimport \"modules/Combine.wdl\" as Combine\n\nworkflow Sharp {\n\n    inpu

## Get Sample Name

In [22]:
sample_name = metadata["labels"]["sample"]
sample_name

'1713_A_mScarlet_HTO'

## Get Counts

In [23]:
def download(sample_name, workflow_id, subfolder, items):
    
    if skip_download:
        return
    
    if not isinstance(items, list):
        items = [items]
        
    path_output = f"{path_base_data}/{sample_name}/{workflow_id}/{subfolder}"
    
    os.makedirs(path_output, exist_ok=True)
        
    for item in items:
        cmd_download = f"aws s3 cp {item} {path_output}"
        print(cmd_download)
        os.system(cmd_download)

In [24]:
try:
    # sharp v0.0.4 (before 2020-10-26)
    meta_cite_seq_count = metadata["calls"]["Sharp.CiteSeqCount"][0]
except:
    # sharp v0.0.5 
    meta_cite_seq_count = metadata["calls"]["Sharp.Preprocess"][0]["subWorkflowMetadata"]["calls"]["Preprocess.CiteSeqCount"][0]

In [25]:
print(json.dumps(meta_cite_seq_count, indent=2))

{
  "executionStatus": "Done",
  "stdout": "s3://dp-lab-batch/cromwell-execution/Sharp/af1fa66d-943c-4343-84c7-8d3f476f34c1/call-CiteSeqCount/CiteSeqCount-stdout.log",
  "backendStatus": "Succeeded",
  "compressedDockerSize": 392564039,
  "commandLine": "set -euo pipefail\n\nCITE-seq-Count \\\n    -R1 /cromwell_root/dp-lab-batch/cromwell-execution/Sharp/af1fa66d-943c-4343-84c7-8d3f476f34c1/call-TrimR1/R1.fastq.gz \\\n    -R2 /cromwell_root/dp-lab-batch/cromwell-execution/Sharp/af1fa66d-943c-4343-84c7-8d3f476f34c1/call-TrimR2/R2.fastq.gz \\\n    --tags /cromwell_root/dp-lab-data/sc-seq/Project_10477/A_mScarlet_HTO/Hashtag_results/tag-list.csv \\\n    -cbf 1 -cbl 16 \\\n    -umif 17 -umil 28 \\\n    --bc_collapsing_dist 1 \\\n    --umi_collapsing_dist 1 \\\n    --start-trim 10  \\\n    --expected_cells 0 \\\n    --whitelist /cromwell_root/dp-lab-batch/cromwell-execution/Sharp/af1fa66d-943c-4343-84c7-8d3f476f34c1/call-WhitelistFromSeqcDenseMatrix/cb-whitelist.txt \\\n    --output results 

### Download Read Count

In [26]:
download(
    sample_name, workflow_id,
    "reads",
    meta_cite_seq_count["outputs"]["outReadCount"]
)

### Download UMI Count

In [12]:
download(
    sample_name, workflow_id,
    "umis",
    meta_cite_seq_count["outputs"]["outUmiCount"]
)

aws s3 cp s3://dp-lab-batch/cromwell-execution/Sharp/36faf89a-e615-4baa-84a3-3de42994ff9f/call-Preprocess/Preprocess/3db70e37-6b61-4983-90da-d0ece27e59c5/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/barcodes.tsv.gz ./data/2092_Adeno_fixed_InDrop2_2_HTO/36faf89a-e615-4baa-84a3-3de42994ff9f/umis
aws s3 cp s3://dp-lab-batch/cromwell-execution/Sharp/36faf89a-e615-4baa-84a3-3de42994ff9f/call-Preprocess/Preprocess/3db70e37-6b61-4983-90da-d0ece27e59c5/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/features.tsv.gz ./data/2092_Adeno_fixed_InDrop2_2_HTO/36faf89a-e615-4baa-84a3-3de42994ff9f/umis
aws s3 cp s3://dp-lab-batch/cromwell-execution/Sharp/36faf89a-e615-4baa-84a3-3de42994ff9f/call-Preprocess/Preprocess/3db70e37-6b61-4983-90da-d0ece27e59c5/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/matrix.mtx.gz ./data/2092_Adeno_fixed_InDrop2_2_HTO/36faf89a-e615-4baa-84a3-3de42994ff9f/umis


## Download Report

In [13]:
download(
    sample_name, workflow_id,
    "report",
    meta_cite_seq_count["outputs"]["outReport"]
)

aws s3 cp s3://dp-lab-batch/cromwell-execution/Sharp/36faf89a-e615-4baa-84a3-3de42994ff9f/call-Preprocess/Preprocess/3db70e37-6b61-4983-90da-d0ece27e59c5/call-CiteSeqCount/results/run_report.yaml ./data/2092_Adeno_fixed_InDrop2_2_HTO/36faf89a-e615-4baa-84a3-3de42994ff9f/report


## Get Demux Classification

In [14]:
meta_demux = metadata["calls"]["Sharp.HtoDemuxKMeans"][0]

In [15]:
print(json.dumps(meta_demux, indent=2))

{
  "executionStatus": "Done",
  "stdout": "s3://dp-lab-batch/cromwell-execution/Sharp/36faf89a-e615-4baa-84a3-3de42994ff9f/call-HtoDemuxKMeans/HtoDemuxKMeans-stdout.log",
  "backendStatus": "Succeeded",
  "compressedDockerSize": 336074657,
  "commandLine": "set -euo pipefail\n\nmkdir inputs\n\ncp /cromwell_root/dp-lab-batch/cromwell-execution/Sharp/36faf89a-e615-4baa-84a3-3de42994ff9f/call-Preprocess/Preprocess/3db70e37-6b61-4983-90da-d0ece27e59c5/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/barcodes.tsv.gz /cromwell_root/dp-lab-batch/cromwell-execution/Sharp/36faf89a-e615-4baa-84a3-3de42994ff9f/call-Preprocess/Preprocess/3db70e37-6b61-4983-90da-d0ece27e59c5/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/features.tsv.gz /cromwell_root/dp-lab-batch/cromwell-execution/Sharp/36faf89a-e615-4baa-84a3-3de42994ff9f/call-Preprocess/Preprocess/3db70e37-6b61-4983-90da-d0ece27e59c5/call-CiteSeqCount/glob-5b2373ebac80816456a7726e786fc4d4/matrix.mtx.gz ./inputs/\n\n# --hto-umi-

In [16]:
download(
    sample_name, workflow_id,
    "",
    meta_demux["outputs"]["outClass"]
)

aws s3 cp s3://dp-lab-batch/cromwell-execution/Sharp/36faf89a-e615-4baa-84a3-3de42994ff9f/call-HtoDemuxKMeans/classification.tsv.gz ./data/2092_Adeno_fixed_InDrop2_2_HTO/36faf89a-e615-4baa-84a3-3de42994ff9f/


## Output Sample Name

In [17]:
# 1. DO NOT DELETE
# 2. MUST BE THE FINAL LINE
print(sample_name)

2092_Adeno_fixed_InDrop2_2_HTO
