In [None]:
from __future__ import print_function
import os.path
import dalmatian as dm
import pandas as pd
import sys
pathtoJK = "../../JKBio"
sys.path.insert(0, pathtoJK)
import TerraFunction as terra
from Helper import *
import numpy as np
from gsheets import Sheets

In [None]:
namespace1="broad-firecloud-ccle"
workspace1="DepMap_WES_PONS_hg38"
wm = dm.WorkspaceManager(namespace1,workspace1)

## creating sample sets

In [None]:
samples = wm.get_samples()
participants = wm.get_participants()

In [None]:
ice = samples[samples['type']=='ice']
agilent = samples[samples['type']=='agilent']

In [None]:
participants

## creating sample sets for each pons

In [None]:
xx = participants[participants['gender']=='F'].index.tolist()
xy = participants[participants['gender']=='M'].index.tolist()

In [None]:
xy_ice = ice[ice['participant'].isin(xy)]
xy_agilent = agilent[agilent['participant'].isin(xy)]
xx_ice = ice[ice['participant'].isin(xx)]
xx_agilent  = agilent[agilent['participant'].isin(xx)]

In [None]:
wm.update_sample_set('xy_ice',xy_ice.index.tolist())
wm.update_sample_set('xy_agilent',xy_agilent.index.tolist())
wm.update_sample_set('xx_ice',xx_ice.index.tolist())
wm.update_sample_set('xx_agilent',xx_agilent.index.tolist())

## unmapping hg19 bams

In [None]:
unmap1 = wm.create_submission("BamToUnmappedRGBams_MC",'xy_agilent','sample_set','this.samples')
unmap2 = wm.create_submission("BamToUnmappedRGBams_MC",'xy_ice','sample_set','this.samples')
terra.waitForSubmission(wm,[unmap1,unmap2])

In [None]:
ubamfilelist1 = wm.create_submission("Generate_uBAM_File_List",'xy_agilent','sample_set','this.samples')
ubamfilelist2 = wm.create_submission("Generate_uBAM_File_List",'xy_ice','sample_set','this.samples')
terra.waitForSubmission(wm,[ubamfilelist1,ubamfilelist2])

## realigning them to hg38

In [None]:
realign1 = wm.create_submission("Relalign_WES_GATK4",'xy_agilent','sample_set','this.samples')
realign2 = wm.create_submission("Relalign_WES_GATK4",'xy_ice','sample_set','this.samples')
terra.waitForSubmission(wm,[realign1,realign2])

In [None]:
terra.waitForSubmission(wm,"e11159e111595f-9f14-4f01-81fd-74ee988a0032")

## config pon for Agilent data

In [None]:
failed =['AGILENT_10001','AGILENT_10172','AGILENT_10344','AGILENT_10372','AGILENT_10516','AGILENT_10597','AGILENT_11543','AGILENT_11630','AGILENT_11740','AGILENT_11800','AGILENT_11985','AGILENT_12068','AGILENT_12161','AGILENT_12406','AGILENT_12585','AGILENT_12877','AGILENT_12997','AGILENT_13051','AGILENT_13305','AGILENT_13522','AGILENT_13536','AGILENT_13545','AGILENT_14010','AGILENT_14379','AGILENT_24981','AGILENT_25020','AGILENT_25103','AGILENT_25177','AGILENT_25209','AGILENT_25293','AGILENT_25412','AGILENT_25477','AGILENT_25598','AGILENT_25688','AGILENT_25771','AGILENT_25807','AGILENT_25873','AGILENT_25898','AGILENT_25906','AGILENT_26095','AGILENT_26110','AGILENT_26255','AGILENT_26416','AGILENT_26426','AGILENT_918992','AGILENT_A00131','AGILENT_PT-7X7B','AGILENT_PT-7XGX','AGILENT_PT-7XHZ','AGILENT_PT-7XI7','AGILENT_PT-7YVH','AGILENT_PT-7Z9C','AGILENT_PT-7ZI3','AGILENT_PT-7ZYL','AGILENT_PT-7ZZ6','AGILENT_PT-811S'
]

In [None]:
wm.update_sample_set('xy_agilent_retry',failed)

In [None]:
realign1 = wm.create_submission("Relalign_WES_GATK4",'xy_agilent_retry','sample_set','this.samples')

## they seemed to have failed for good...

In [None]:
sset

In [None]:
sset = wm.get_sample_sets()
sset = sset[sset.index=='xy_agilent'].samples.tolist()[0]
newset = [val for val in sset if val not in failed ]
wm.update_sample_set('xy_agilent_notfailed',newset)

In [None]:
config_PON = wm.get_configuration("CNV_Somatic_Panel_Workflow_Agilent_XX")
config_PON

In [None]:
config_PON['inputs']['CNVSomaticPanelWorkflow.intervals'] = 'workspace.agilent_xy_intervals_no_pad'
config_PON['inputs']['CNVSomaticPanelWorkflow.pon_entity_id'] = '"hg38_agilent_pon_XY"'
config_PON['name'] = 'CNV_Somatic_Panel_Workflow_Agilent_XY'

In [None]:
wm.update_configuration(config_PON)
pon_creation1 = wm.create_submission(config_PON['name'],'xy_agilent_notfailed')

## config pon for ice data

In [None]:
config_PON['inputs']['CNVSomaticPanelWorkflow.intervals'] = 'workspace.ice_xy_intervals_no_pad'
config_PON['inputs']['CNVSomaticPanelWorkflow.pon_entity_id'] = '"hg38_ice_pon_XY"'
config_PON['name'] = 'CNV_Somatic_Panel_Workflow_ICE_XY'

In [None]:
wm.update_configuration(config_PON)
pon_creation2 = wm.create_submission(config_PON['name'],'xy_ice')
terra.waitForSubmission(wm,[pon_creation2])

# we now want to remove the intermediary files

In [None]:
# get all files above a certain size
! gsutil -m ls -l "gs://fc-secure-b82334d0-5e87-4e4d-8e0b-757b9a8aae6b/**" | awk '{if ($1 > 1000000000) print $NF}' > 'ponwmdatamorethan1Gb.txt'

file = help.fileToList('/Users/jeremie/ponwmdatamorethan1Gb.txt')
# not removing the hg38 version
file = [i for i in file if '.hg38.bam' not in i]

# removing files by bunch of 40
for f in grouped(file,40):
    c = ''
    for v in f:
        c+=' ' + v
        os.system('gsutil -m rm'+c)