In [1]:
import re, subprocess, boto3, json, shlex, mysql, os, urllib, logging
import pandas as pd
import numpy as np
from s3path import S3Path
from pathlib import Path
from tqdm.notebook import tqdm
from packaging import version
pd.set_option("display.max_colwidth", 40)

# Define Helper Functions

In [2]:
# FASTQ reads/indices required for each workflow
fastq_map = {
    'Hashtag': ['R1','R2'],
    'CiteSeq': ['R1','R2'],
    'AsapSeq': ['R1','R2','R3'],
    'CellRangerATAC': ['I1','R1','R2','R3'],
    'CellRangerGex': ['I1','R1','R2'],
}

In [3]:
# Get fastq file paths on S3 for each file id
# Returns dictionary from id to s3 path
# Throws exception if FASTQs don't exist for any id
def get_fastqs(
    path: str, # path to directory containing FASTQ files
    fastq_file_ids: list, # FASTQ file ids needed for this run type (e.g. I1, R1, R2, etc.)
    folder: str = "",
):
    fastq_map = dict()
    _, bucket, key, _, _ = urllib.parse.urlsplit(f"{path}/{folder}")
    for fid in fastq_file_ids:
        files = get_s3_objects(
            bucket, key.lstrip("/"),
            re.compile(f"_{fid}_\d{{3}}.fastq.gz$")
        )
        try:
            assert files, f"AssertionError: Missing `{fid}` archives!"
            fastq_map[fid] = [os.path.join("s3://", bucket, str(f)) for f in files]
        except AssertionError as err:
            logging.warning("%s\n\t %s", err, path)
            return
    return fastq_map

In [4]:
# from SCRIdb
def get_s3_objects(bucket, key, pattern, full_uri=False):
    
    s3r = boto3.resource("s3")
    bucket_s3 = s3r.Bucket(bucket)
    objects = []
    for obj in bucket_s3.objects.filter(Prefix=key):
        hit = pattern.search(obj.key)
        if hit:
            objects.append(obj.key)
    if full_uri:
        objects = [f"s3://{bucket}/{o}" for o in objects]
    return objects

In [5]:
# Extract FASTQ sample name from list of files
# Note: FASTQ name is file name up to lane id (e.g. L001, L002, etc.)
def get_fastqs_name(fastqs):
    fastq_name_re = r".*/(.*)_S\d+_L\d{3}_[A-Za-z]\d_\d{3}.fastq.gz$"
    fastq_names = [re.match(fastq_name_re, x)[1] for x in fastqs]
    assert len(set(fastq_names)) == 1 # make sure all names are same
    return fastq_names[0]

In [6]:
# Numpy encoder for JSON from pandas series
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

In [7]:
def execute_query(query, user, password):
    with connect(
        host="peer-lab-db.cggxmlwgzzpw.us-east-1.rds.amazonaws.com",
        database="peer_lab_db",
        user=user,
        password=password,
    ) as connection:
        with connection.cursor(buffered=True) as cursor:
            cursor.execute(query)
            result = cursor.fetchall()
    return result

In [8]:
# Get species from database for given sample
from mysql.connector import connect, Error

def get_species(sample_id, user, password):
    try:
        table_sample_data = "peer_lab_db.sample_data"
        table_species = "peer_lab_db.species"
        table_genome_idx = "peer_lab_db.genome_index"
        query = f"""
        SELECT {table_species}.Species
        FROM {table_species}
        LEFT JOIN {table_genome_idx}
        ON {table_species}.id = {table_genome_idx}.species_id
        LEFT JOIN {table_sample_data}
        ON {table_genome_idx}.id = {table_sample_data}.genomeIndex_id
        WHERE {table_sample_data}.id = {sample_id}
        """
        result = execute_query(query, user, password)[0][0]
        return result
    except Error as e:
        print(f"Error: {e}")

In [9]:
# Get species from database for given sample
from mysql.connector import connect, Error

def get_sc_tech(sample_id, user, password):
    try:
        table_sample_data = "peer_lab_db.sample_data"
        table_sc_tech = "peer_lab_db.sc_tech"
        table_genome_idx = "peer_lab_db.genome_index"
        query = f"""
        SELECT {table_sc_tech}.sc_Tech
        FROM {table_sc_tech}
        LEFT JOIN {table_genome_idx}
        ON {table_sc_tech}.id = {table_genome_idx}.scTech_id
        LEFT JOIN {table_sample_data}
        ON {table_genome_idx}.id = {table_sample_data}.genomeIndex_id
        WHERE {table_sample_data}.id = {sample_id}
        """
        result = execute_query(query, user, password)[0][0]
        return result
    except Error as e:
        print(f"Error: {e}")

In [10]:
# Get species from database for given sample
from mysql.connector import connect, Error

def get_sample_id(sample_name, user, password):
    try:
        table_sample_data = "peer_lab_db.sample_data"
        query = f"""
        SELECT {table_sample_data}.id
        FROM {table_sample_data}
        WHERE {table_sample_data}.Sample="{sample_name}"
        """
        result = execute_query(query, user, password)[0][0]
        return result
    except Error as e:
        print(f"Error: {e}")

In [11]:
# Get species from database for given sample
from mysql.connector import connect, Error

def get_project_id(sample_id, user, password):
    try:
        table_sample_data = "peer_lab_db.sample_data"
        table_project_data = "peer_lab_db.project_data"
        query = f"""
        SELECT {table_project_data}.projectName
        FROM {table_project_data}
        LEFT JOIN {table_sample_data}
        ON {table_project_data}.id = {table_sample_data}.projectData_id
        WHERE {table_sample_data}.id = {sample_id}
        """
        result = execute_query(query, user, password)[0][0]
        return result
    except Error as e:
        print(f"Error: {e}")

In [12]:
def get_SEQC_version(loc):
    try:
        cmd = f"aws s3 cp {loc}/seqc-results/seqc_log.txt -"
        out = subprocess.run(shlex.split(cmd), universal_newlines=True, capture_output=True).__dict__["stdout"]
        version = re.match(r".*SEQC=v(\d+\.\d+\.\d+).*", out)[1]
        return version
    except:
        return "N/A"

In [13]:
def get_file_prefix(loc):
    try:
        cmd = f"aws s3 ls {loc}/seqc-results/"
        out = subprocess.run(shlex.split(cmd), universal_newlines=True, capture_output=True).__dict__["stdout"]
        
        # Note: I'm expecting the aligned bam file to be in loc
        bam_pattern = re.compile(r"(.*)_Aligned\.out\.bam$")
        filename = list(filter(bam_pattern.match, out.split()))[0]
        file_prefix = re.match(bam_pattern, filename)[1]
        return file_prefix
    except:
        raise ValueError(f"BAM file not found in {loc}")
        return ""

In [14]:
# FASTQ reads/indices required for each workflow
# Shoudl replace with JSON file
cr_reference_map = {
    'CellRangerArc':
    {
        'Human': "https://cf.10xgenomics.com/supp/cell-arc/refdata-cellranger-arc-GRCh38-2020-A.tar.gz",
        'Mouse': "https://cf.10xgenomics.com/supp/cell-arc/refdata-cellranger-arc-mm10-2020-A-2.0.0.tar.gz",
    },
    'CellRangerATAC':
    {
        'Human': "https://cf.10xgenomics.com/supp/cell-atac/refdata-cellranger-arc-GRCh38-2020-A-2.0.0.tar.gz",
        'Mouse': "https://cf.10xgenomics.com/supp/cell-atac/refdata-cellranger-arc-mm10-2020-A-2.0.0.tar.gz",
    },
    'CellRangerGex':
    {
        'Human': "https://cf.10xgenomics.com/supp/cell-exp/refdata-gex-GRCh38-2020-A.tar.gz",
        'Mouse': "https://cf.10xgenomics.com/supp/cell-exp/refdata-gex-mm10-2020-A.tar.gz",
    },
}

def get_cr_reference(sample_id, prefix, user, password):
    # Get species from database to decide reference
    species = get_species(sample_id, user, password)
    
    # Map to reference locations
    try:
        return cr_reference_map[prefix][species]
    except:
        raise ValueError(f"Unknown Species: {species}")

In [15]:
def get_bc_whitelist(sample_id):
    # Get version from database to decide whitelist
    sc_tech = get_sc_tech(sample_id, creds["user"], creds["password"])
    
    # Map to reference locations
    if "V3" in sc_tech:
        return "s3://seqc-public/barcodes/ten_x_v3/flat/3M-february-2018.txt"
    elif "V2" in sc_tech:
        return "s3://seqc-public/barcodes/ten_x_v2/flat/737K-august-2016.txt"
    else:
        raise ValueError(f"Unknown Technology: {sc_tech}")

In [16]:
def run(
    workflow_path: str,
    execp: str,
    secrets: str,
    inputs: str,
    labels: str,
    options: str,
):
    # change working directory to the pipeline package
    oldwd = os.getcwd()
    os.chdir(workflow_path)
    
    # execute the pipeline command
    cmd = f"{workflow_path}/{execp} -k {secrets} -i {inputs} -l {labels} -o {options}"
    var = subprocess.run(shlex.split(cmd), universal_newlines=True, capture_output=True)
    out = var.__dict__
    
    # change working directory back
    os.chdir(oldwd)
    
    return out

## Additional functions

### DB query

In [17]:
# Common query col: id, request_id, Sample
def get_sample_name(query, query_col, creds):
    
    user = creds['user']
    password = creds['password']
    
    try:
        table_sample_data = "peer_lab_db.sample_data"
        query = f"""
        SELECT {table_sample_data}.Sample
        FROM {table_sample_data}
        WHERE {table_sample_data}.{query_col}="{query}"
        """
        sample_names = []
        results = execute_query(query, user, password)
        for result in results:
            sample_names.append(result[0])
        return sample_names
    except Error as e:
        print(f"Error: {e}")
    
    
def get_aws_path(query, query_col, creds):
    
    user = creds['user']
    password = creds['password']
    
    try:
        table_sample_data = "peer_lab_db.sample_data"
        query = f"""
        SELECT {table_sample_data}.AWS_storage
        FROM {table_sample_data}
        WHERE {table_sample_data}.{query_col}="{query}"
        """
        sample_paths = []
        results = execute_query(query, user, password)
        for result in results:
            sample_paths.append(result[0])
        return sample_paths
    except Error as e:
        print(f"Error: {e}")
        
            
def get_sample_id(query, query_col, creds):
    
    user = creds['user']
    password = creds['password']
    
    try:
        table_sample_data = "peer_lab_db.sample_data"
        query = f"""
        SELECT {table_sample_data}.id
        FROM {table_sample_data}
        WHERE {table_sample_data}.{query_col}="{query}"
        """
        sample_ids = []
        results = execute_query(query, user, password)
        for result in results:
            sample_ids.append(result[0])
        return sample_ids
    except Error as e:
        print(f"Error: {e}")
        
def format_sample_aws(querys, query_col, creds):
    sample_names = []
    sample_paths = []
    sample_ids = []
    
    for query in querys:
        sample_names += get_sample_name(query, query_col, creds)
        sample_paths += get_aws_path(query, query_col, creds)
        sample_ids += get_sample_id(query, query_col, creds)
        
    sample_paths = [s.strip('/') for s in sample_paths] # remove trailing slash if exists
    
    samples = pd.DataFrame(
        dict(S3_Path=sample_paths, Sample_ID=sample_ids),
        index=sample_names,
        dtype=str,
    )
    return samples

In [18]:
def get_genomeIndex_id(query, query_col, creds):
    user = creds['user']
    password = creds['password']
    
    try:
        table_sample_data = "peer_lab_db.sample_data"
        query = f"""
        SELECT {table_sample_data}.genomeIndex_id
        FROM {table_sample_data}
        WHERE {table_sample_data}.{query_col}="{query}"
        """
        
        results = execute_query(query, user, password)
        for result in results:
            genomeIndex_id = result[0]
            
        return genomeIndex_id
            
    except Error as e:
        print(f"Error: {e}")
        
def get_scTech_id(query, query_col, creds):
    user = creds['user']
    password = creds['password']
    
    try:
        table_sample_data = "peer_lab_db.genome_index"
        query = f"""
        SELECT {table_sample_data}.scTech_id
        FROM {table_sample_data}
        WHERE {table_sample_data}.{query_col}="{query}"
        """
        
        results = execute_query(query, user, password)
        for result in results:
            scTech_id = result[0]
            
        return scTech_id
            
    except Error as e:
        print(f"Error: {e}")
        
def get_index(query, query_col, creds):
    user = creds['user']
    password = creds['password']
    
    try:
        table_sample_data = "peer_lab_db.genome_index"
        query = f"""
        SELECT {table_sample_data}.gIndex
        FROM {table_sample_data}
        WHERE {table_sample_data}.{query_col}="{query}"
        """
        
        results = execute_query(query, user, password)
        for result in results:
            index = result[0]
            
        return index
            
    except Error as e:
        print(f"Error: {e}")
        
def get_assay(query, query_col, creds):
    user = creds['user']
    password = creds['password']
    
    try:
        table_sample_data = "peer_lab_db.sc_tech"
        query = f"""
        SELECT {table_sample_data}.Run_name
        FROM {table_sample_data}
        WHERE {table_sample_data}.{query_col}="{query}"
        """
        
        results = execute_query(query, user, password)
        for result in results:
            assay = result[0]
            
        return assay
            
    except Error as e:
        print(f"Error: {e}")
        
def get_barcode(query, query_col, creds):
    user = creds['user']
    password = creds['password']
    
    try:
        table_sample_data = "peer_lab_db.sc_tech"
        query = f"""
        SELECT {table_sample_data}.barcodes
        FROM {table_sample_data}
        WHERE {table_sample_data}.{query_col}="{query}"
        """
        
        results = execute_query(query, user, password)
        for result in results:
            barcode = result[0]
            
        return barcode
            
    except Error as e:
        print(f"Error: {e}")
        
        
def format_assay_barcode(samples, creds):
    
    # samples['genomeIndex_id'] = np.nan
    samples['Reference'] = np.nan
    # samples['scTech_id'] = np.nan
    samples['assay'] = np.nan
    # samples['barcode'] = np.nan
    
    for sample, row in samples.iterrows():
        sample_id = row['Sample_ID']
        
        genomeIndex_id  = get_genomeIndex_id(sample_id, 'id', creds)
        index = get_index(genomeIndex_id, 'id', creds)
        scTech_id = get_scTech_id(genomeIndex_id, 'id', creds)
        assay = get_assay(scTech_id, 'id', creds)
        barcode = get_barcode(scTech_id, 'id', creds)
        
        # samples.loc[sample, 'genomeIndex_id'] = genomeIndex_id
        samples.loc[sample, 'Reference'] = index.strip()
        # samples.loc[sample, 'scTech_id'] = scTech_id
        samples.loc[sample, 'assay'] = assay.strip()
        # samples.loc[sample, 'barcode'] = barcode.strip()

    return samples

In [19]:
def get_project_id(sample_id, creds):
    
    user = creds['user']
    password = creds['password']
    
    try:
        table_sample_data = "peer_lab_db.sample_data"
        table_project_data = "peer_lab_db.project_data"
        query = f"""
        SELECT {table_project_data}.projectName
        FROM {table_project_data}
        LEFT JOIN {table_sample_data}
        ON {table_project_data}.id = {table_sample_data}.projectData_id
        WHERE {table_sample_data}.id = {sample_id}
        """
        result = execute_query(query, user, password)[0][0]
        return result
    except Error as e:
        print(f"Error: {e}")

# Process Samples

## Setup

In [20]:
# Location of docker files
common_docker_registry = "quay.io/hisplan"

prefix = "CellRangerGex" # Workflow to run; also .wdl filename prefix
pipeline_type = prefix # field in *.labels.json
output_dirname = "cr-gex-results"

# If need to add comment, put here
comment = "sohailn"

In [21]:
# Locations of workflow-related directories and files
path_to_cromwell_secrets = f"{Path.home()}/.cromwell/cromwell-secrets.json" # CHANGE THIS
workflow_dir = f"{Path.home()}/scing/bin/cellranger-gex-6.1.2" # CHANGE THIS
path_to_exec = f"{workflow_dir}/submit.sh" # CHANGE THIS FOR SHARP
config_dir = f"{workflow_dir}/configs"
path_to_options = f"{workflow_dir}/{prefix}.options.aws.json"

# Other file locations
db_credentials_path = f"{Path.home()}/.config.json" # CHANGE THIS

In [22]:
# Set credentials based on SCRIdb CLI config file
with open(db_credentials_path) as f:
    creds = json.load(f)

## Get sample Information

In [25]:
# Common query col: id, request_id, Sample
request_ids = ['YM-1704', ]

samples = format_sample_aws(request_ids, 'request_id', creds)
samples

Unnamed: 0,S3_Path,Sample_ID
YM-1704_Krt4eGFP,s3://dp-lab-data/collaborators/white...,3845
YM-1704_Krt4TWIST,s3://dp-lab-data/collaborators/white...,3846


In [26]:
samples = format_assay_barcode(samples, creds)
samples

Unnamed: 0,S3_Path,Sample_ID,Reference,assay
YM-1704_Krt4eGFP,s3://dp-lab-data/collaborators/white...,3845,s3://dp-lab-data/collaborators/white...,CR
YM-1704_Krt4TWIST,s3://dp-lab-data/collaborators/white...,3846,s3://dp-lab-data/collaborators/white...,CR


In [27]:
# Get FASTQ paths from S3
# Note: Uses same FASTQ file ids for all samples
fastq_file_ids = fastq_map[prefix]
samples["FASTQs"] = samples["S3_Path"].apply(lambda x: get_fastqs(x, fastq_file_ids, "FASTQ"))
samples

Unnamed: 0,S3_Path,Sample_ID,Reference,assay,FASTQs
YM-1704_Krt4eGFP,s3://dp-lab-data/collaborators/white...,3845,s3://dp-lab-data/collaborators/white...,CR,{'I1': ['s3://dp-lab-data/collaborat...
YM-1704_Krt4TWIST,s3://dp-lab-data/collaborators/white...,3846,s3://dp-lab-data/collaborators/white...,CR,{'I1': ['s3://dp-lab-data/collaborat...


In [28]:
samples['Reference'].values

array(['s3://dp-lab-data/collaborators/whiter/YM-1704_transgene_reference/refdata-cellranger/',
       's3://dp-lab-data/collaborators/whiter/YM-1704_transgene_reference/refdata-cellranger/'],
      dtype=object)

In [29]:
# Set path to transgene reference S3
# Note: This is an exceptional case
# path_to_reference = f"{common_dir}/transgene_reference/refdata-cellranger/3PS19_SNSEQ-GRCm38-Ensembl-87-transgenes.tar.gz"
# path_to_reference = "https://cf.10xgenomics.com/supp/cell-exp/refdata-gex-mm10-2020-A.tar.gz"

## Inputs

In [30]:
# Load minimum inputs and labels fields from templates
with open(f"{config_dir}/template.inputs.json") as f:
    std_inputs_fields = list(json.load(f).keys())
    
# Annotate all samples with workflow inputs and labels
inputs = pd.DataFrame(index=samples.index, columns=std_inputs_fields,)

inputs

Unnamed: 0,CellRangerGex.sampleName,CellRangerGex.fastqName,CellRangerGex.inputFastq,CellRangerGex.referenceUrl,CellRangerGex.includeIntrons,CellRangerGex.expectCells,CellRangerGex.memory,CellRangerGex.dockerRegistry
YM-1704_Krt4eGFP,,,,,,,,
YM-1704_Krt4TWIST,,,,,,,,


In [31]:
# Annotate inputs
inputs[f"{prefix}.sampleName"] = inputs.index # may need to change
inputs[f"{prefix}.inputFastq"] = samples["FASTQs"].apply(lambda x: np.ravel(list(x.values())))
inputs[f"{prefix}.fastqName"] = inputs[f"{prefix}.inputFastq"].apply(lambda x: get_fastqs_name(x))
inputs[f"{prefix}.referenceUrl"] = samples["Reference"]                        
inputs[f"{prefix}.includeIntrons"] = False
inputs[f"{prefix}.expectCells"] = 5000
inputs[f"{prefix}.memory"] = 256
inputs[f"{prefix}.dockerRegistry"] = common_docker_registry

inputs

Unnamed: 0,CellRangerGex.sampleName,CellRangerGex.fastqName,CellRangerGex.inputFastq,CellRangerGex.referenceUrl,CellRangerGex.includeIntrons,CellRangerGex.expectCells,CellRangerGex.memory,CellRangerGex.dockerRegistry
YM-1704_Krt4eGFP,YM-1704_Krt4eGFP,3845_YM-1704_Krt4eGFP_IGO_12437_AO_37,[s3://dp-lab-data/collaborators/whit...,s3://dp-lab-data/collaborators/white...,False,5000,256,quay.io/hisplan
YM-1704_Krt4TWIST,YM-1704_Krt4TWIST,3846_YM-1704_Krt4TWIST_IGO_12437_AO_38,[s3://dp-lab-data/collaborators/whit...,s3://dp-lab-data/collaborators/white...,False,5000,256,quay.io/hisplan


## Labels

In [32]:
with open(f"{config_dir}/template.labels.json") as f:
    std_labels_fields = list(json.load(f).keys())
    
# Annotate all samples with workflow inputs and labels
labels = pd.DataFrame(index=samples.index, columns=std_labels_fields,)
labels

Unnamed: 0,pipelineType,project,sample,owner,destination,transfer,comment
YM-1704_Krt4eGFP,,,,,,,
YM-1704_Krt4TWIST,,,,,,,


In [33]:
labels["pipelineType"] = pipeline_type
labels["project"] = samples["Sample_ID"].apply(lambda x: get_project_id(x, creds))
labels["sample"] = labels.index
labels["owner"] = creds["user"]
labels["destination"] = samples['S3_Path'] + "/" + output_dirname
labels["transfer"] = "-"
labels["comment"] = creds["user"]

labels

Unnamed: 0,pipelineType,project,sample,owner,destination,transfer,comment
YM-1704_Krt4eGFP,CellRangerGex,Zebrafish KC-MC,YM-1704_Krt4eGFP,sohailn,s3://dp-lab-data/collaborators/white...,-,sohailn
YM-1704_Krt4TWIST,CellRangerGex,Zebrafish KC-MC,YM-1704_Krt4TWIST,sohailn,s3://dp-lab-data/collaborators/white...,-,sohailn


## Execution

In [34]:
inputs

Unnamed: 0,CellRangerGex.sampleName,CellRangerGex.fastqName,CellRangerGex.inputFastq,CellRangerGex.referenceUrl,CellRangerGex.includeIntrons,CellRangerGex.expectCells,CellRangerGex.memory,CellRangerGex.dockerRegistry
YM-1704_Krt4eGFP,YM-1704_Krt4eGFP,3845_YM-1704_Krt4eGFP_IGO_12437_AO_37,[s3://dp-lab-data/collaborators/whit...,s3://dp-lab-data/collaborators/white...,False,5000,256,quay.io/hisplan
YM-1704_Krt4TWIST,YM-1704_Krt4TWIST,3846_YM-1704_Krt4TWIST_IGO_12437_AO_38,[s3://dp-lab-data/collaborators/whit...,s3://dp-lab-data/collaborators/white...,False,5000,256,quay.io/hisplan


In [35]:
labels

Unnamed: 0,pipelineType,project,sample,owner,destination,transfer,comment
YM-1704_Krt4eGFP,CellRangerGex,Zebrafish KC-MC,YM-1704_Krt4eGFP,sohailn,s3://dp-lab-data/collaborators/white...,-,sohailn
YM-1704_Krt4TWIST,CellRangerGex,Zebrafish KC-MC,YM-1704_Krt4TWIST,sohailn,s3://dp-lab-data/collaborators/white...,-,sohailn


In [36]:
assert (std_inputs_fields == list(inputs.columns)) & (inputs.notna().values.all())
assert (std_labels_fields == list(labels.columns)) & (labels.notna().values.all())

stdouts = [] # to store all outputs
process = True

with tqdm(samples.index) as t:

    for sample_name in t:

        # Write inputs and labels to file
        path_to_inputs = f"{config_dir}/{sample_name}.inputs.json"
        with open(path_to_inputs, "w") as f_inputs:
            json.dump(inputs.loc[sample_name].to_dict(), f_inputs, indent=4, cls=NpEncoder)

        path_to_labels = f"{config_dir}/{sample_name}.labels.json"
        with open(path_to_labels, "w") as f_labels:
            json.dump(labels.loc[sample_name].to_dict(), f_labels, indent=4, cls=NpEncoder)

        if process:
            stdouts.append(run(
                workflow_path = workflow_dir,
                execp = "submit.sh",
                secrets = path_to_cromwell_secrets,
                inputs = path_to_inputs,
                labels = path_to_labels,
                options = path_to_options,
            ))

  0%|          | 0/2 [00:00<?, ?it/s]

In [37]:
print(path_to_inputs)
print(path_to_labels)

/Users/sohailn/scing/bin/cellranger-gex-6.1.2/configs/YM-1704_Krt4TWIST.inputs.json
/Users/sohailn/scing/bin/cellranger-gex-6.1.2/configs/YM-1704_Krt4TWIST.labels.json


In [38]:
stdouts

[{'args': ['/Users/sohailn/scing/bin/cellranger-gex-6.1.2/submit.sh',
   '-k',
   '/Users/sohailn/.cromwell/cromwell-secrets.json',
   '-i',
   '/Users/sohailn/scing/bin/cellranger-gex-6.1.2/configs/YM-1704_Krt4eGFP.inputs.json',
   '-l',
   '/Users/sohailn/scing/bin/cellranger-gex-6.1.2/configs/YM-1704_Krt4eGFP.labels.json',
   '-o',
   '/Users/sohailn/scing/bin/cellranger-gex-6.1.2/CellRangerGex.options.aws.json'],
  'returncode': 0,
  'stdout': '{"id":"73ae5fde-6551-48f7-8a1e-c9278953d334","status":"Submitted"}\n',
  'stderr': ''},
 {'args': ['/Users/sohailn/scing/bin/cellranger-gex-6.1.2/submit.sh',
   '-k',
   '/Users/sohailn/.cromwell/cromwell-secrets.json',
   '-i',
   '/Users/sohailn/scing/bin/cellranger-gex-6.1.2/configs/YM-1704_Krt4TWIST.inputs.json',
   '-l',
   '/Users/sohailn/scing/bin/cellranger-gex-6.1.2/configs/YM-1704_Krt4TWIST.labels.json',
   '-o',
   '/Users/sohailn/scing/bin/cellranger-gex-6.1.2/CellRangerGex.options.aws.json'],
  'returncode': 0,
  'stdout': '{"id