In [44]:
from __future__ import print_function
import os.path
import dalmatian as dm
import pandas as pd
import sys
pathtoJK = "../JKBio"
sys.path.insert(0, pathtoJK)
import TerraFunction as terra
from Helper import *
import numpy as np
from gsheets import Sheets
#%load_ext autoreload
#%autoreload 2
# https://github.com/jkobject/JKBIO

"""
Log into the Google Developers Console with the Google account whose spreadsheets you want to access.
Create (or select) a project and enable the Drive API and Sheets API (under Google Apps APIs).

https://console.developers.google.com/

Go to the Credentials for your project and create New credentials > OAuth client ID > of type Other.
In the list of your OAuth 2.0 client IDs click Download JSON for the Client ID you just created.
Save the file as client_secrets.json in your home directory (user directory).
Another file, named storage.json in this example, will be created after successful authorization
to cache OAuth data.

On you first usage of gsheets with this file (holding the client secrets),
your webbrowser will be opened, asking you to log in with your Google account to authorize
this client read access to all its Google Drive files and Google Sheets.
"""
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
replace = {'T': 'Tumor', 'N': 'Normal', 'm': 'Unknown', 'L': 'Unknown'}

# Initialization

In [45]:
samplesetname="trial1"
date="2019"
data_namespace="broad-genomics-delivery"
data_workspace="Cancer_Cell_Line_Factory_CCLF_PanCancer_PanelSeq"
proc_namespace="nci-mimoun-bi-org"
proc_workspace="PANCAN_TWIST copy"
source="CCLF"
site="HT33MBCX2"
tsca_id="TSCA45"
TSCA_version="TSCA Rapid Cancer Detection Panel v2"
picard_aggregation_type_validation="PCR"
forcekeep=[]
cohorts2id="https://docs.google.com/spreadsheets/d/1R97pgzoX0YClGDr5nmQYQwimnKXxDBGnGzg7YPlhZJU"
gsheeturllist=["https://docs.google.com/spreadsheets/d/1LR8OFylVClxf0kmZpAdlVjrn3RBcfZKpNoDYtKdnHB8", "https://docs.google.com/spreadsheets/d/128dkFhL1A0GqTjmR7iMvBZE8j6ymO8krBL9WX-wUAn4"]
wfrom = dm.WorkspaceManager(data_namespace, data_workspace)
wto = dm.WorkspaceManager(proc_namespace, proc_workspace)

# Getting the samples

- we load the samples from data workspace and load the metadata files
- we remove data that has already been processed
- we create the final ids

In [25]:
# we look at all the samples we already have
refsamples = wto.get_samples()
refids = refsamples.index
metadata = pd.concat([sheets.get(url).sheets[0].to_frame() for url in gsheeturllist])


2019-09-10 02:23:38::INFO  URL being requested: GET https://sheets.googleapis.com/v4/spreadsheets/1LR8OFylVClxf0kmZpAdlVjrn3RBcfZKpNoDYtKdnHB8?alt=json
2019-09-10 02:23:39::INFO  URL being requested: GET https://sheets.googleapis.com/v4/spreadsheets/1LR8OFylVClxf0kmZpAdlVjrn3RBcfZKpNoDYtKdnHB8/values:batchGet?majorDimension=ROWS&valueRenderOption=UNFORMATTED_VALUE&dateTimeRenderOption=FORMATTED_STRING&ranges=Plate+1&ranges=Sheet6&ranges=TSCA36_Results&ranges=Fingerprint+Results&ranges=Sheet3&ranges=Sheet2&ranges=Changelog&alt=json
2019-09-10 02:23:39::INFO  URL being requested: GET https://sheets.googleapis.com/v4/spreadsheets/128dkFhL1A0GqTjmR7iMvBZE8j6ymO8krBL9WX-wUAn4?alt=json
2019-09-10 02:23:39::INFO  URL being requested: GET https://sheets.googleapis.com/v4/spreadsheets/128dkFhL1A0GqTjmR7iMvBZE8j6ymO8krBL9WX-wUAn4/values:batchGet?majorDimension=ROWS&valueRenderOption=UNFORMATTED_VALUE&dateTimeRenderOption=FORMATTED_STRING&ranges=Plate+1&ranges=Sheet5&ranges=Sheet3&ranges=Sheet2&r

In [None]:
# we look at all the samples we already have
refsamples = wto.get_samples()
refids = refsamples.index
cohorts = sheets.get(cohorts2id).sheets[0].to_frame()
# we use this gsheet package to get all the sheets into one dataframe
metadata = pd.concat([sheets.get(url).sheets[0].to_frame() for url in gsheeturllist])

# we do some corrections just in case
samples1 = wfrom.get_samples().replace(np.nan, '', regex=True)

# creating sample_id (like in processing workspace) for metadata and samples1
metadata = metadata.dropna(0, subset=['Collaborator Sample ID'])
ttype = [i for i in metadata["Sample Type"]]
metadata['sample_id'] = [val['Collaborator Participant ID'] + '-' + val['Sample Type'] + '-' + val['Exported DNA SM-ID'] for i, val in metadata.iterrows()]


sample_id = [val["individual_alias"] + '-' + val['sample_type'] + '-' + i.split('_')[2] for i, val in samples1.iterrows()]
samples1.index = sample_id

# filtering on what already exists in the processing workspace (refids)
newsamples = samples1[(~samples1.index.isin(refids)) | samples1.index.isin(forcekeep)]
tokeep = set(metadata.index) & set(newsamples.index)

# usefull to merge the two df, sm-id is one of the only unique id here
if len(newsamples[~newsamples.index.isin(tokeep)]) > 0:
    print('we could not add these as we dont have metadata for them:' + str(newsamples[~newsamples.index.isin(tokeep)]))
newsamples = newsamples[newsamples.index.isin(tokeep)]
newmetadata = metadata[metadata.index.isin(tokeep)]

In [92]:
newsamples = samples1
newmetadata = metadata

# Creating the sample information dataframe

In [99]:
print('creating new df')
df = pd.concat([newmetadata, newsamples], axis=1, sort=True)
# from this new set we create a dataframe which will get uploaded to terra
sample_info = df[['crai_or_bai_path', 'cram_or_bam_path']]
sample_info['individual_id'] = df['Collaborator Participant ID']
sample_info['reference_id'] = df['Exported DNA SM-ID']
sample_info['participant'] = df['Collaborator Participant ID']
sample_info['aggregation_product_name_validation'] = [TSCA_version] * sample_info.shape[0]
# here we add this number as the reference id might be present many times already for different samples
# in the processing workspace
sample_info['external_id_validation'] = [i +'_'+ str(refsamples[refsamples['external_id_validation'] == i].shape[1]) if refsamples[refsamples['external_id_validation'] == i].shape[0] > 0 else i for i in sample_info['reference_id']]
sample_info['bsp_sample_id_validation'] = df.index
sample_info['stock_sample_id_validation'] = df['Stock DNA SM-ID']
sample_info['sample_type'] = df['Sample Type']
sample_info['picard_aggregation_type_validation'] = [picard_aggregation_type_validation] * sample_info.shape[0]
sample_info['tumor_subtype'] = df['Tumor Type']
sample_info['squid_sample_id_validation'] = sample_info['external_id_validation']
sample_info['source_subtype_validation'] = df['Original Material Type']
sample_info['processed_subtype_validation'] = df['Material Type']
sample_info['primary_disease'] = df['Primary Disease']
sample_info['media'] = df['Media on Tube']
sample_info['Collection'] = df['Collection']
# match collection data and error out
cohortlist = []
for k, val in sample_info['Collection'].iteritems():
    res = cohorts[cohorts['Name'] == val]
    if len(res) == 0:
        raise "we do not have a correponsding cohort for this collection"
    cohortlist.append(res['ID'].values[0])
sample_info['cohorts'] = cohortlist

sample_info['tissue_site'] = df['Tissue Site']
sample_info['source'] = [source] * sample_info.shape[0]
sample_info['sample_id'] = df.index

sample_info = sample_info.set_index('sample_id')

creating new df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus

In [102]:
sample_info['sample_type']

sample_id
CCLF_AB1065-Tumor-SM-J1OYT        Tumor
CCLF_AB1097-Tumor-SM-J1OZG        Tumor
CCLF_BU1013-Tumor-SM-J1OYY        Tumor
CCLF_BU1017-Normal-SM-J1OYR      Normal
CCLF_CY1006-Tumor-SM-J1OZK        Tumor
CCLF_CY1006-Tumor-SM-J1OZR        Tumor
CCLF_CY1007-Tumor-SM-J1OZM        Tumor
CCLF_CY1007-Tumor-SM-J1OZU        Tumor
CCLF_CY1015-Tumor-SM-J1OZW        Tumor
CCLF_CY1015-Tumor-SM-J1OZZ        Tumor
CCLF_KL1235-Tumor-SM-J1OZF        Tumor
CCLF_KL1283-Tumor-SM-J1OYV        Tumor
CCLF_KL1288-Tumor-SM-J1OYU        Tumor
CCLF_KL1292-Tumor-SM-J1OZT        Tumor
CCLF_KL1294-Tumor-SM-J1OZE        Tumor
CCLF_PEDS1064-Normal-SM-J1OZJ    Normal
CCLF_PEDS1140-Tumor-SM-J1OZ5      Tumor
CCLF_PEDS1143-Tumor-SM-J1OZP      Tumor
CCLF_PEDS1154-Tumor-SM-J1OZY      Tumor
CCLF_PEDS1155-Tumor-SM-J1P11      Tumor
CCLF_PEDS1156-Tumor-SM-J1OZV      Tumor
CCLF_RCRF1064-Tumor-SM-J1OZI      Tumor
CCLF_RCRF1070-Tumor-SM-J1OZH      Tumor
CCLF_RCRF1097-Tumor-SM-J1OZ4      Tumor
CCLF_RCRF1099-Tumor-SM-J1OZX  

# Creating the sample_sets

In [None]:
normals = [r["participant"] for i, r in sample_info.iterrows() if r['sample_type'] == "Normal"]
normalsid = [i for i, r in sample_info.iterrows() if r['sample_type'] == "Normal"]
tumors = [r["participant"] for i, r in sample_info.iterrows() if r['sample_type'] == "Tumor"]
tumorsid = [i for i, r in sample_info.iterrows() if r['sample_type'] == "Tumor"]
prevtumors = [val["participant"] for k, val in refsamples.iterrows() if val.sample_type == "Tumor"]
prevnormals = [val["participant"] for k, val in refsamples.iterrows() if val.sample_type == "Normal"]

print("creating new pairs")
# do we have new tumors/normals for our previous ones
newpairs = {'pair_id': [], 'case_sample': [], 'control_sample': [], 'participant': []}

toreprocess_normals = set(tumors) & set(prevnormals)
for val in toreprocess_normals:
    for tumor_id in sample_info[sample_info['participant'] == val][sample_info[
            'sample_type'] == 'Tumor'].index.tolist():
        normal_id = refsamples[refsamples['participant'] == val][refsamples[
          'sample_type'] == 'Normal'].index.tolist()[0]
        newpairs['pair_id'].append(tumor_id + '_' + normal_id)
        newpairs['case_sample'].append(tumor_id)
        newpairs['control_sample'].append(normal_id)
        newpairs['participant'].append(val)

paired = set(tumors) & set(normals)
for val in set(tumors) - toreprocess_normals:
    for tumor_id in sample_info[sample_info['participant'] == val][sample_info[
            'sample_type'] == 'Tumor'].index.tolist():
        normal_id = sample_info[(sample_info['participant'] == val) & (sample_info[
          'sample_type'] == 'Normal')].index.tolist()[0] if val in paired else 'NA'
        newpairs['pair_id'].append(tumor_id + "_" + normal_id)
        newpairs['case_sample'].append(tumor_id)
        newpairs['control_sample'].append(normal_id)
        newpairs['participant'].append(val)

newpairs = pd.DataFrame(newpairs).set_index('pair_id')

# Uploads to Terra

## all the entities need to exist! Else it will raise an error and block further uploads to Terra

In [None]:
print("all the entities need to exist! Else it will raise an error and block further uploads to Terra")
print("uploading new samples")
wto.upload_samples(sample_info)
if not "NA" in wto.get_samples().index.tolist():
    wto.upload_samples(pd.DataFrame({'sample_id':['NA'], 'participant_id':['NA']}).set_index('sample_id'))
    
print("creating pairs and pairsets")
wto.upload_entities('pair', newpairs)
wto.update_pair_set(samplesetname+'_pairs', newpairs.index)
cohorts_in_batch = []
cohorts_with_pairs = []
for val in cohorts['ID'].values:
    cohortsamples=sample_info[sample_info["cohorts"] == val].index.tolist()
    tumorsamplesincohort = sample_info[sample_info["cohorts"] == val][sample_info['sample_type']=="Tumor"].index.tolist()
    pairsamples=newpairs[newpairs['case_sample'].isin(tumorsamplesincohort)].index.tolist()
    if len(cohortsamples)>0:
        cohorts_in_batch.append(val)
        try:
            terra.addToSampleSet(wto, val, cohortsamples)
        except KeyError: # we may not have this set yet
            wto.update_sample_set(val, cohortsamples)
    if len(pairsamples)>0:
        cohorts_with_pairs.append(val)
        try:
            terra.addToPairSet(wto,val, pairsamples)
        except KeyError: # we may not have this set yet
            wto.update_pair_set(val, pairsamples)
print("creating a sample set")
wto.update_sample_set(sample_set_id=samplesetname + "_all", sample_ids=sample_info.index.tolist())
wto.update_sample_set(sample_set_id=samplesetname + "_tumors", sample_ids=tumorsid)
wto.update_sample_set(sample_set_id=samplesetname + "_normals", sample_ids=normalsid)
normalsid.extend([k for k, val in refsamples.iterrows() if val.sample_type == "Normal"])
# Same as cum pon but better
wto.update_sample_set(sample_set_id="All_normals", sample_ids=normalsid)
all_samples = wto.get_samples()..index.tolist()
all_samples.remove('NA')
wto.update_sample_set(sample_set_id="All_samples", sample_ids=all_samples)

# Running Terra Worlflows

In [None]:
print("Creating Terra submissions: remember you can only cancel \
    or interact with terra submissions from the Terra website. \
    https://app.terra.bio/#workspaces/"+proc_namespace.replace(" ", "%20")+"/"+proc_workspace.replace(" ", "%20")+"/job_history")

RenameBAM_TWIST = wto.create_submission("RenameBAM_TWIST", samplesetname + "_all", 'sample_set', expression='this.samples')
print("waiting for 'Rename'")
terra.waitForSubmission(wto, [RenameBAM_TWIST])

CalculateTargetCoverage_PANCAN = wto.create_submission('CalculateTargetCoverage_PANCAN', samplesetname + "_all", 'sample_set', expression='this.samples')
DepthOfCov_PANCAN = wto.create_submission('DepthOfCov_PANCAN', samplesetname + "_all", 'sample_set', expression='this.samples')
print("waiting for 'CalculateTargetCoverage_PANCAN' & 'DepthOfCov_PANCAN'")
terra.waitForSubmission(wto, [CalculateTargetCoverage_PANCAN, DepthOfCov_PANCAN])

In [3]:
CalculateTargetCoverage_PANCAN = wto.create_submission('CalculateTargetCoverage_PANCAN', samplesetname + "_all", 'sample_set', expression='this.samples')
DepthOfCov_PANCAN = wto.create_submission('DepthOfCov_PANCAN', samplesetname + "_all", 'sample_set', expression='this.samples')
print("waiting for 'CalculateTargetCoverage_PANCAN' & 'DepthOfCov_PANCAN'")
terra.waitForSubmission(wto, [CalculateTargetCoverage_PANCAN, DepthOfCov_PANCAN])

Successfully created submission 93f8e086-92e4-4772-bd86-3393a7e7b58c.
Successfully created submission a798dded-d5c9-49bc-89db-f2fce483703c.
waiting for 'CalculateTargetCoverage_PANCAN' & 'DepthOfCov_PANCAN'
1.0 of jobs Succeeded in submission 0.sion 0. 9 mn elapsed..
1.0 of jobs Succeeded in submission 1.sion 1. 24 mn elapsed..


[]

In [None]:
CreatePanelOfNormalsGATK_PANCAN = wto.create_submission('CreatePanelOfNormalsGATK_PANCAN', 'All_normals')
DepthOfCovQC_PANCAN = wto.create_submission('DepthOfCovQC_PANCAN', samplesetname + "_all", 'sample_set', expression='this.samples')
print("waiting for 'DepthOfCovQC_PANCAN' & 'CNV_CreatePoNForCNV'")
terra.waitForSubmission(wto, [DepthOfCovQC_PANCAN, CreatePanelOfNormalsGATK_PANCAN])

In [None]:
CallSomaticCNV_PANCAN = wto.create_submission('CallSomaticCNV_PANCAN', samplesetname + "_all", 'sample_set', expression='this.samples')
print("waiting for 'CallSomaticCNV_PANCAN'")
terra.waitForSubmission(wto, [CallSomaticCNV_PANCAN])

In [None]:
MutationCalling_Normals_TWIST = wto.create_submission("MutationCalling_Normals_TWIST", samplesetname + "_normals", 'sample_set', expression='this.samples')
print("waiting for 'MutationCalling_Normals_TWIST'")
terra.waitForSubmission(wto, [MutationCalling_Normals_TWIST])

In [9]:
FilterGermlineVariants_NormalSample_TWIST = wto.create_submission('FilterGermlineVariants_NormalSample_TWIST', samplesetname + "_normals", 'sample_set', expression='this.samples')
print("waiting for 'SNV_FilterGermline'")
terra.waitForSubmission(wto, [FilterGermlineVariants_NormalSample_TWIST])


Successfully created submission 355f3b30-fa62-4ca7-8270-b246b8c3ef46.
waiting for 'SNV_FilterGermline'
1.0 of jobs Succeeded in submission 0.sion 0. 7 mn elapsed.


ConfigNotFound: 'No such config CreatePoN_SNV_Mutect1 in this workspace'

In [7]:
CreatePoNSNV_Mutect1 = wto.create_submission('CreatePoNSNV_Mutect1', "All_normals")
CreatePoN_SNV_MuTect2 = wto.create_submission('CreatePoN_SNV_MuTect2', "All_normals")
print("waiting for 'CreatePoN_SNV_MuTect2' & 'CreatePoNSNV_Mutect1'")
terra.waitForSubmission(wto, [CreatePoNSNV_Mutect1, CreatePoN_SNV_MuTect2])

Successfully created submission d7493b45-3727-40d7-87dd-4b5e3eacc23d.
Successfully created submission 8503e14b-ce9c-4961-864e-4d07171efa04.
waiting for 'CreatePoN_SNV_MuTect2' & 'CreatePoNSNV_Mutect1'
All_normalsFailed for 0 jobs in submission 0. 2 mn elapsed.
0.0 of jobs Succeeded in submission 0.
1.0 of jobs Succeeded in submission 1.sion 1. 5 mn elapsed.


TypeError: exceptions must derive from BaseException

In [None]:


Process more chipseq data
get dropbox access
get MAX's list
see that I have everything
run pipeline

generate a merge peak set, only of good quality chip data.

create a document for processing decision. (why this pipeline, how)
create a document for score decision and merging decision. (why this score, why do we merge, why this way)

Compare overlaps of flag data vs regular data (check that it contains most of regular data)

create a presentation about what is new in rose2.

In [10]:
PlotSomaticCNVMaps_PANCAN = wto.create_submission('PlotSomaticCNVMaps_PANCAN', samplesetname + "_all")
for val in cohorts_in_batch:
    wto.create_submission("PlotSomaticCNVMaps_PANCAN", val)
print("submitted final jobs for CNV pipeline")

Successfully created submission 8dcd4fba-da3a-44a1-ba32-0a10ea23e719.
Successfully created submission e38c17cf-64d8-4f4c-9b98-022a54e777b0.
Successfully created submission 5a101571-de71-43a7-a95c-719fd1490438.
Successfully created submission 7d137f4f-3f5c-41b2-916a-292de58c1588.
Successfully created submission 2f8c8bfc-26c9-42aa-9889-51da70afe314.
Successfully created submission 64e9826e-b9d4-4e94-b723-55a585e30d53.
Successfully created submission bdee4a8a-f38e-49fa-b186-41cf95a1492c.
submitted final jobs for CNV pipeline
Successfully created submission 13269ea6-89f0-4b6e-ae3a-045855e3092e.
Successfully created submission b870de97-13ac-4bcc-b207-59634e47f0cd.
waiting for 'SNV_PostProcessing' & 'MutationCalling_Tumors_TWIST'
1.0 of jobs Succeeded in submission 0.sion 0. 6 mn elapsed.
CCLF_AB1065-Tumor-SM-J1OYT_NA
CCLF_AB1097-Tumor-SM-J1OZG_NA
CCLF_BU1013-Tumor-SM-J1OYY_NA
CCLF_CY1006-Tumor-SM-J1OZK_NA
CCLF_CY1006-Tumor-SM-J1OZR_NA
CCLF_CY1007-Tumor-SM-J1OZM_NA
CCLF_CY1007-Tumor-SM-J1OZU

TypeError: unsupported operand type(s) for +: 'int' and 'str'

In [4]:
SNV_PostProcessing_Normals = wto.create_submission('SNV_PostProcessing_Normals', samplesetname + "_normals")
MutationCalling_Tumors_TWIST = wto.create_submission('MutationCalling_Tumors_TWIST', samplesetname+'_pairs', 'pair_set', expression='this.pairs')
print("waiting for 'SNV_PostProcessing' & 'MutationCalling_Tumors_TWIST'")
terra.waitForSubmission(wto, [SNV_PostProcessing_Normals, MutationCalling_Tumors_TWIST])

Successfully created submission cfeb746a-ba8a-4dad-8ae7-577c7641dec8.
waiting for 'SNV_PostProcessing' & 'MutationCalling_Tumors_TWIST'
status is: Failed for 0 jobs in submission 0. 46 mn elapsed.

ConnectionError: ('Connection aborted.', OSError("(60, 'ETIMEDOUT')",))

## two cohorts have not worked because they contained just one acceptable cell line (the workflow needs cohorts with at least 2 acceptable CL, here both had one rejected one)

In [42]:
FilterGermlineEvents_TumorSample = wto.create_submission('FilterGermlineEvents_TumorSample', samplesetname+'_pairs', 'pair_set', expression='this.pairs')
print("waiting for 'FilterGermlineEvents_TumorSample'")
terra.waitForSubmission(wto, FilterGermlineEvents_TumorSample)

Successfully created submission e72382d2-8503-4736-bbd6-50c4e202e1d6.
waiting for 'FilterGermlineEvents_TumorSample'
CCLF_AB1065-Tumor-SM-J1OYT_NAin submission 0. 5 mn elapsed.
CCLF_AB1097-Tumor-SM-J1OZG_NA
CCLF_CY1006-Tumor-SM-J1OZK_NA
CCLF_CY1006-Tumor-SM-J1OZR_NA
CCLF_CY1007-Tumor-SM-J1OZU_NA
CCLF_CY1015-Tumor-SM-J1OZW_CCLF_CY1015-Tumor-SM-J1OZN
CCLF_CY1015-Tumor-SM-J1OZZ_CCLF_CY1015-Tumor-SM-J1OZN
CCLF_KL1235-Tumor-SM-J1OZF_NA
CCLF_KL1283-Tumor-SM-J1OYV_NA
CCLF_KL1288-Tumor-SM-J1OYU_NA
CCLF_KL1292-Tumor-SM-J1OZT_NA
CCLF_KL1294-Tumor-SM-J1OZE_NA
CCLF_PEDS1140-Tumor-SM-J1OZ5_NA
CCLF_PEDS1141-Normal-SM-J1OZO_CCLF_PEDS1141-Tumor-SM-J1OZQ
CCLF_PEDS1143-Normal-SM-J1OZP_NA
CCLF_PEDS1143-Tumor-SM-J1OZ3_NA
CCLF_PEDS1153-Tumor-SM-J1OZB_CCLF_PEDS1153-Normal-SM-J1OZ8
CCLF_PEDS1154-Tumor-SM-J1OZY_NA
CCLF_PEDS1156-Tumor-SM-J1OZV_NA
CCLF_RCRF1064-Tumor-SM-J1OZI_NA
CCLF_RCRF1070-Tumor-SM-J1OZH_NA
CCLF_RCRF1097-Tumor-SM-J1OZ4_NA
CCLF_RCRF1099-Tumor-SM-J1OZX_NA
CCLF_RCRF1102-Normal-SM-J1P12_NA
CCLF_

RuntimeError: 38 failed submission

In [48]:
SNVPostProcessing_TWIST = wto.create_submission('SNVPostProcessing_TWIST', samplesetname+'_pairs', "pair_set")
print("Submitted final jobs for SNV pipeline")

FNG_Compile_Pileup_Cnt = wto.create_submission("FNG_Compile_Pileup_Cnt", samplesetname + "_all", 'sample_set', expression='this.samples')
print("waiting for 'FNG_Compile_Pileup_Cnt'")
terra.waitForSubmission(wto, [FNG_Compile_Pileup_Cnt])

FNG_Compile_db_slow_download = wto.create_submission("FNG_Compile_db_slow_download", "All_samples")
print("waiting for 'FNG_Compile_db'")
terra.waitForSubmission(wto, [FNG_Compile_db_slow_download])

Successfully created submission 11f7e178-cb44-4521-a174-a01c375ba493.
Submitted final jobs for SNV pipeline
Successfully created submission b424bb18-5f35-4914-9172-1efa9c6f4b60.
waiting for 'FNG_Compile_Pileup_Cnt'
1.0 of jobs Succeeded in submission 0.sion 0. 8 mn elapsed..
Successfully created submission 0767609b-d381-4b52-bab8-1028e47c095a.
waiting for 'FNG_Compile_db'
1.0 of jobs Succeeded in submission 0.sion 0. 4 mn elapsed.


ValueError: expression and etype must BOTH be None or a string value

In [50]:
FNG_Query_db = wto.create_submission("FNG_Query_db", samplesetname + "_all")
print("Submitted final FNG Job")

print('Done')

Successfully created submission c3889bb2-a80f-4994-baa6-017ef512a9c7.
Submitted final FNG Job
Done


## Create and upload nice folders with data per sample or per cohort or per any list provided

In [106]:
workspace = "CCLF_TSCA_2_0_2"
namespace = "nci-mimoun-bi-org"
wm = dm.WorkspaceManager(namespace,workspace)
pathto_cnvpng='segmented_copy_ratio_img'
pathto_stats='sample_statistics'
is_from_pairs=True
pathto_snv='filtered_variants'
pathto_seg='cnv_calls'
datadir='gs://cclf_results/targeted/kim_sept/'
#specificlist= pd.read_csv("")[""].tolist() 
#specificlist= wm.get_sample_sets(), ...
specificlist=['CCLF_PEDS1012-Tumor-SM-E7S13',
'CCLF_PEDS1012-Tumor-SM-E7S1F',
'CCLF_PEDS1012-Tumor-SM-E7S1R',
'PEDS172-Tumor-SM-DB2K3',
'PEDS172-Tumor-SM-DB3R7',
'PEDS182-Tumor-SM-DHZ8V',
'PEDS196-Tumor-SM-DNUN4',
'PEDS196-Tumor-SM-DNUN5',
'PEDS204-Tumor-SM-DO3D5']


In [108]:
samples

Unnamed: 0_level_0,Collection,ID3,aggregation_product_name_validation,bai_filename,bam_filename,bsp_sample_id_validation,chromosome_coverage_dist,clean_bai_file_capture,clean_bam_file_capture,clear_snvs,...,stock_sample_id_validation,target_coverage,tissue_site,tn_decision_clean,tsca_id,tumor_ptn,tumor_seg,tumor_seg_for_plotting,tumor_subtype,tumor_tn
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AA24-Tumor-SM-DHZ8W,,,TSCA Rapid Cancer Detection Panel v2,2_AA24T_OPAC_p9_H5V3YBCXY.2.aligned.duplicates...,2_AA24T_OPAC_p9_H5V3YBCXY.2.aligned.duplicates...,SM-DHZ8W,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/a...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-DHTKX,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Pancreas,VT,TSCA15,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,Primary,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...
AA24-Tumor-SM-HQ9AK,Cancer Cell Line Factory (CCLF) / Andy Aguirre...,,TSCA Rapid Cancer Detection Panel v2,2_AA24T_OPAC_p3_3D_HKGTMBCX2.2.aligned.bai,2_AA24T_OPAC_p3_3D_HKGTMBCX2.2.aligned.bam,SM-HQ9AK,,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-HO15Q,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/2...,Pancreas,VT,TSCA34,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/9...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/9...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/9...,Primary,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/9...
AA25-Tumor-SM-DHZ99,,,TSCA Rapid Cancer Detection Panel v2,2_AA25T_OPAC_p4_H5V3YBCXY.2.aligned.duplicates...,2_AA25T_OPAC_p4_H5V3YBCXY.2.aligned.duplicates...,SM-DHZ99,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/a...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-DHTKY,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Pancreas,VT,TSCA15,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,Primary,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...
AA31-Tumor-SM-HADJU,Cancer Cell Line Factory (CCLF) / Andy Aguirre...,,TSCA Rapid Cancer Detection Panel v2,2_AA31T_OPAC_p11_HH22NBCX2.2.aligned.duplicate...,2_AA31T_OPAC_p11_HH22NBCX2.2.aligned.duplicate...,SM-HADJU,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/8...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-H5LWT,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Pancreas,VT,TSCA28,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...,Primary,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...
AA31-Tumor-SM-HAF5I,Cancer Cell Line Factory (CCLF) / Andy Aguirre...,,TSCA Rapid Cancer Detection Panel v2,1_AA31T_CMOPAC_p14_HH22NBCX2.1.aligned.duplica...,1_AA31T_CMOPAC_p14_HH22NBCX2.1.aligned.duplica...,SM-HAF5I,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/8...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-H5LWP,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Pancreas,,TSCA28,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...,Primary,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...
AA31-Tumor-SM-HAF5L,Cancer Cell Line Factory (CCLF) / Andy Aguirre...,,TSCA Rapid Cancer Detection Panel v2,2_AA31T_OPAC_p17_HH22NBCX2.2.aligned.duplicate...,2_AA31T_OPAC_p17_HH22NBCX2.2.aligned.duplicate...,SM-HAF5L,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/8...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-H8JE2,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Pancreas,VT,TSCA28,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...,Primary,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...
AA33-Normal-SM-D4L4D,,,TSCA Rapid Cancer Detection Panel v2,1_AA33N_3D_p5_HVLVWBCXX.1.aligned.duplicates_m...,1_AA33N_3D_p5_HVLVWBCXX.1.aligned.duplicates_m...,SM-D59KF,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/a...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,true,...,SM-D4L4D,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Pancreas,VT,TSCA1213,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/9...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/9...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/9...,,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/9...
AA33-Normal-SM-DHZ9L,,,TSCA Rapid Cancer Detection Panel v2,2_AA33N_OPAC_p5_H5V3YBCXY.2.aligned.duplicates...,2_AA33N_OPAC_p5_H5V3YBCXY.2.aligned.duplicates...,SM-DHZ9L,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/a...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,true,...,SM-DHTKZ,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Pancreas,VT,TSCA15,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...
AA33-Normal-SM-HAF5K,Cancer Cell Line Factory (CCLF) / Andy Aguirre...,,TSCA Rapid Cancer Detection Panel v2,2_AA33N_OPAC_p5_HH22NBCX2.2.aligned.duplicates...,2_AA33N_OPAC_p5_HH22NBCX2.2.aligned.duplicates...,SM-HAF5K,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/8...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,true,...,SM-H5LWR,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Pancreas,VN,TSCA28,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...,Primary,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/4...
AA33-Tumor-SM-DHZ9X,,,TSCA Rapid Cancer Detection Panel v2,2_AA33T_OPAC_p5_H5V3YBCXY.2.aligned.duplicates...,2_AA33T_OPAC_p5_H5V3YBCXY.2.aligned.duplicates...,SM-DHZ9X,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/a...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-DHTL1,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Pancreas,VT,TSCA15,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,Primary,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...


In [110]:
samples = wm.get_samples()
samples = samples[samples.index.isin(specificlist)] 
if is_from_pairs:
    pairs = wm.get_pairs()
    pairs = pairs[pairs['case_sample'].isin(specificlist)] 
samples

Unnamed: 0_level_0,Collection,ID3,aggregation_product_name_validation,bai_filename,bam_filename,bsp_sample_id_validation,chromosome_coverage_dist,clean_bai_file_capture,clean_bam_file_capture,clear_snvs,...,stock_sample_id_validation,target_coverage,tissue_site,tn_decision_clean,tsca_id,tumor_ptn,tumor_seg,tumor_seg_for_plotting,tumor_subtype,tumor_tn
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CCLF_PEDS1012-Tumor-SM-E7S13,,,TSCA Rapid Cancer Detection Panel v2,2_CCLF_PEDS1012T_CM_p5_H57WVBCXY.2.aligned.dup...,2_CCLF_PEDS1012T_CM_p5_H57WVBCXY.2.aligned.dup...,SM-E7S13,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/a...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-E78BF,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Lung,VT,TSCA17,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/6...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/6...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/6...,Metastatic,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/6...
CCLF_PEDS1012-Tumor-SM-E7S1F,,,TSCA Rapid Cancer Detection Panel v2,2_CCLF_PEDS1012T_RETM_p5_H57WVBCXY.2.aligned.d...,2_CCLF_PEDS1012T_RETM_p5_H57WVBCXY.2.aligned.d...,SM-E7S1F,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/a...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-E77B3,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Lung,VT,TSCA17,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/6...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/6...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/6...,Metastatic,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/6...
CCLF_PEDS1012-Tumor-SM-E7S1R,,,TSCA Rapid Cancer Detection Panel v2,2_CCLF_PEDS1012T_WITP_p4_H57WVBCXY.2.aligned.d...,2_CCLF_PEDS1012T_WITP_p4_H57WVBCXY.2.aligned.d...,SM-E7S1R,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/a...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-E77BE,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Lung,VT,TSCA17,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/6...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/6...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/6...,Metastatic,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/6...
PEDS172-Tumor-SM-DB2K3,,,TSCA Rapid Cancer Detection Panel v2,1_PEDS172T_PF_AR5_p7_HYTLGBCXX.1.aligned.dupli...,1_PEDS172T_PF_AR5_p7_HYTLGBCXX.1.aligned.dupli...,SM-DB9LA,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/a...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-DB2K3,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Pleural Cavity,VT,TSCA14,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/7...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/7...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/7...,Metastatic,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/7...
PEDS172-Tumor-SM-DB3R7,,,TSCA Rapid Cancer Detection Panel v2,1_PEDS172T_PF_CM_p7_HYTLGBCXX.1.aligned.duplic...,1_PEDS172T_PF_CM_p7_HYTLGBCXX.1.aligned.duplic...,SM-DB9KX,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/a...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-DB3R7,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Pleural Cavity,VT,TSCA14,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/7...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/7...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/7...,Metastatic,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/7...
PEDS182-Tumor-SM-DHZ8V,,,TSCA Rapid Cancer Detection Panel v2,2_PEDS182T_SMGM_p6_H5V3YBCXY.2.aligned.duplica...,2_PEDS182T_SMGM_p6_H5V3YBCXY.2.aligned.duplica...,SM-DHZ8V,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/a...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-DHNAV,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Bone,VT,TSCA15,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...,Primary,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/1...
PEDS196-Tumor-SM-DNUN4,,,TSCA Rapid Cancer Detection Panel v2,2_PEDS196T_FM_p5_H5TTJBCXY.2.aligned.duplicate...,2_PEDS196T_FM_p5_H5TTJBCXY.2.aligned.duplicate...,SM-DOEL6,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/a...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-DNUN4,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Kidney,VT,TSCA16,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/c...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/c...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/c...,Primary,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/c...
PEDS196-Tumor-SM-DNUN5,,,TSCA Rapid Cancer Detection Panel v2,2_PEDS196T_CM_p5_H5TTJBCXY.2.aligned.duplicate...,2_PEDS196T_CM_p5_H5TTJBCXY.2.aligned.duplicate...,SM-DOEL7,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/a...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-DNUN5,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Kidney,VT,TSCA16,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/c...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/c...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/c...,Primary,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/c...
PEDS204-Tumor-SM-DO3D5,,,TSCA Rapid Cancer Detection Panel v2,2_PEDS204T_CM_p6_H5TTJBCXY.2.aligned.duplicate...,2_PEDS204T_CM_p6_H5TTJBCXY.2.aligned.duplicate...,SM-DOELH,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/a...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,gs://fc-35446f22-ea37-483a-bd6c-5e9fc56851ff/s...,,...,SM-DO3D5,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/5...,Kidney,VT,TSCA16,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/c...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/c...,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/c...,Primary,gs://fc-c23078b3-05b3-4158-ba8f-2b1eeb1bfa16/c...


In [143]:
for i, val in samples.iterrows():
    os.system('gsutil cp '+val[pathto_seg]+' '+datadir+i+'/')
    os.system('gsutil cp '+val[pathto_cnvpng]+' '+datadir+i+'/')
    os.system('gsutil cp '+val[pathto_stats]+' '+datadir+i+'/')
    if is_from_pairs:
        snvs = pairs[pairs["case_sample"]==i][pathto_snv]
        for snv in snvs:
            if snv is not np.nan:
                os.system('gsutil cp '+snv+' '+datadir+i+'/')
                break
    else:
        os.system('gsutil cp '+val[pathto_snv]+' '+datadir+i+'/')