# Just doing some neuroblastoma-specific stuff.

In particular, this notebook will run the mutation pipeline (or most of it) on a subset of the neuroblastoma (NB) cell lines for which we have not processed the BAM and BAI files we have. Most of these are samples from Sanger.

In addition, some subset of NB lines that we have CCLE data on were included in the CCLE2 paper, and already have results from running ABSOLUTE.

In [1]:
from __future__ import print_function
import os.path
import dalmatian as dm
import pandas as pd
import sys
sys.path.insert(0, '../../JKBio/')
sys.path.insert(0, '../')
from src.CCLE_postp_function import *
import TerraFunction as terra
%load_ext autoreload
%autoreload 2
%load_ext rpy2.ipython
from taigapy import TaigaClient
from IPython.display import Image,display
tc = TaigaClient()
import numpy as np

In [9]:
## Import up sound alert dependencies
from IPython.display import Audio, display

# play sound alert when function is called
def allDone():
    framerate = 4410
    play_time_seconds = 1

    t = np.linspace(0, play_time_seconds, framerate*play_time_seconds)
    audio_data = np.sin(2*np.pi*300*t) + np.sin(2*np.pi*240*t)
    display(Audio(audio_data, rate=framerate, autoplay=True))

In [2]:
cwd = os.getcwd()
print(cwd)

/Users/gmiller/Documents/Work/GitHub/ccle_processing/ccle_tasks


In [11]:
refnamespace="broad-firecloud-ccle"
refworkspace="DepMap_Mutation_Calling_CGA_pipeline"
samplesetname = "NB_to_run"
refname_work = refnamespace + "/" + refworkspace

In [4]:
refwm = dm.WorkspaceManager(refnamespace, refworkspace)

## create sample set for NB lines to run

In [5]:
# get list of all samples in the Sanger set
refwm.get_sample_sets()["samples"]["Sanger"]

['ACH-001626_1',
 'ACH-000334_1',
 'ACH-000482_1',
 'ACH-000295_1',
 'ACH-002179_1',
 'ACH-000558_1',
 'ACH-000893_1',
 'ACH-000252_1',
 'ACH-000148_2',
 'ACH-000587_1',
 'ACH-000610_1',
 'ACH-001702_2',
 'ACH-000545_1',
 'ACH-001350_1',
 'ACH-000442_1',
 'ACH-000401_1',
 'ACH-002259_1',
 'ACH-002261_1',
 'ACH-000238_1',
 'ACH-000055_2',
 'ACH-002285_1',
 'ACH-001274_2',
 'ACH-000647_2',
 'ACH-001321_2',
 'ACH-000840_2',
 'ACH-000613_1',
 'ACH-000574_1',
 'ACH-000501_1',
 'ACH-000950_1',
 'ACH-000932_1',
 'ACH-002367_1',
 'ACH-002311_1',
 'ACH-000225_1',
 'ACH-000776_1',
 'ACH-000522_1',
 'ACH-000376_2',
 'ACH-000567_1',
 'ACH-001529_2',
 'ACH-000792_1',
 'ACH-002182_1',
 'ACH-000451_1',
 'ACH-000628_1',
 'ACH-000271_2',
 'ACH-000366_2',
 'ACH-000880_1',
 'ACH-000104_1',
 'ACH-000481_2',
 'ACH-000400_2',
 'ACH-000569_1',
 'ACH-000424_2',
 'ACH-000556_2',
 'ACH-000053_1',
 'ACH-000568_1',
 'ACH-000823_2',
 'ACH-000626_1',
 'ACH-002307_1',
 'ACH-000381_1',
 'ACH-000254_1',
 'ACH-002151_1

In [6]:
# get subset that haven't been run through the pipeline yet
# need to grab just these: 
nb_lines_to_run = pd.read_csv("../../NB-copy-loss/data/NB_lines_to_run.csv") # I used an R script to get this list.
nb_lines_to_run = nb_lines_to_run["x"].to_list()
nb_lines_to_run

['sanger_ACH-000078',
 'sanger_ACH-000099',
 'sanger_ACH-000120',
 'sanger_ACH-000136',
 'sanger_ACH-000149',
 'sanger_ACH-000227',
 'sanger_ACH-000259',
 'sanger_ACH-000260',
 'sanger_ACH-000341',
 'sanger_ACH-000345',
 'sanger_ACH-000366',
 'sanger_ACH-000804',
 'sanger_ACH-001338',
 'sanger_ACH-001344',
 'sanger_ACH-001355',
 'sanger_ACH-001603',
 'sanger_ACH-001674',
 'sanger_ACH-001716',
 'sanger_ACH-002083',
 'sanger_ACH-002217',
 'sanger_ACH-002248',
 'sanger_ACH-002261',
 'sanger_ACH-002277',
 'sanger_ACH-002278',
 'sanger_ACH-002279',
 'sanger_ACH-002280',
 'sanger_ACH-002281',
 'sanger_ACH-002282',
 'sanger_ACH-002283',
 'sanger_ACH-002284',
 'sanger_ACH-002285',
 'sanger_ACH-002340',
 'sanger_ACH-002389']

In [6]:
# create new sample set in Terra for these lines
refwm.update_sample_set(samplesetname, nb_lines_to_run)

Sample set "NB_to_run" (33 samples) successfully updated.


In [6]:
refwm.get_sample_sets()["samples"]["NB_to_run"] 

['ACH-002083_1',
 'ACH-001603_1',
 'ACH-000345_2',
 'ACH-001338_1',
 'ACH-002261_1',
 'ACH-002285_1',
 'ACH-002279_1',
 'ACH-001344_2',
 'ACH-001355_1',
 'ACH-001716_1',
 'ACH-000078_1',
 'ACH-000341_1',
 'ACH-002340_1',
 'ACH-000120_1',
 'ACH-001674_2',
 'ACH-002281_1',
 'ACH-000259_1',
 'ACH-000227_1',
 'ACH-002217_1',
 'ACH-000366_2',
 'ACH-002248_1',
 'ACH-000149_1',
 'ACH-002280_1',
 'ACH-002278_1',
 'ACH-002277_1',
 'ACH-002389_1',
 'ACH-002282_1',
 'ACH-002284_1',
 'ACH-000260_1',
 'ACH-000136_1',
 'ACH-002283_1',
 'ACH-000099_2',
 'ACH-000804_2']

In [7]:
# run the CGA characterization pipeline: will give MAF and ABSOLUTE as output
CGAmutations = refwm.get_config("CGA_WES_CCLE_Characterization_Pipeline_v0.1_Jul2019")
CGAmutations

{'deleted': False,
 'inputs': {'CGA_Production_Analysis_Workflow.CrossCheckLaneFingerprints_Task.HaplotypeDBForCrossCheck': 'workspace.HAP_DB_FOR_CC',
  'CGA_Production_Analysis_Workflow.gatk_acnv_only.one_thousand_genomes_common_variants_minor_allele_freq_five': 'workspace.COMMON_VARIANTS_1000_GENOMES',
  'CGA_Production_Analysis_Workflow.VEP_Task.GNOMAD_FILE_IDX': 'workspace.GNOMAD_IDX',
  'CGA_Production_Analysis_Workflow.lego_plotter_task.mut_categs': 'workspace.MUT_CATEGORIES',
  'CGA_Production_Analysis_Workflow.MutectFC_Task.mutectIntervals': 'workspace.MUTECT_FC_INTERVALS',
  'CGA_Production_Analysis_Workflow.hasPicardMetrics_normal': '',
  'CGA_Production_Analysis_Workflow.DB_SNP_VCF_IDX': 'workspace.DB_SNP_VCF_IDX',
  'CGA_Production_Analysis_Workflow.ctrlName': '"RCRF9001_KIDNEY_Normal"',
  'CGA_Production_Analysis_Workflow.targetIntervals': 'workspace.ILLUMINA_TARGET_INTERVALS',
  'CGA_Production_Analysis_Workflow.readGroupBlackList': 'workspace.READ_GROUP_BLACKLIST',
  'CG

In [8]:
refwm.update_config(CGAmutations)
submission_id = refwm.create_submission(CGAmutations['name'], samplesetname,'sample_set',expression='this.samples')

Successfully updated configuration getzlab/CGA_WES_CCLE_Characterization_Pipeline_v0.1_Jul2019
Successfully created submission 8dd3d281-962e-43f4-abaa-e1a34aa3efe9.


In [12]:
terra.waitForSubmission(refname_work, submission_id)
allDone()

status is: Failed for 0 jobs in submission 0. 12 mn elapsed.



TransportError: HTTPSConnectionPool(host='oauth2.googleapis.com', port=443): Max retries exceeded with url: /token (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x1220b4990>: Failed to establish a new connection: [Errno 8] nodename nor servname provided, or not known'))