In [None]:
#///////////////////////////////////////////////////////////////////
# script: mutations_by_patient_GOI
# author: Lincoln
# date: 2/27/19
#
# This script takes the output of getMutationCounts_overall_and_GOI.py 
#    (option 4) and parses it into a readable, useable table, where 
#    the number of each possible mutation is given, on a per-patient
#   basis
#
#  adapted from SNP_calling_pipeline/lolliplots/creating_lollipop_cmds.ipynb
#
#///////////////////////////////////////////////////////////////////

In [1]:
import pandas as pd

In [2]:
egfr_df = pd.read_csv('/Users/lincoln.harris/Desktop/egfr_out_AA.csv', header=None, names=['cell', 'mutations'])
egfr_df

Unnamed: 0,cell,mutations
0,P13_1001000339,[]
1,C1_B002572,[]
2,J2_B002573,[]
3,P22_B003116,[]
4,O22_B003191,[]
5,J12_B003529,['T903T']
6,D14_1001000407,"['R521K', 'T903T', 'T629T', 'Q787Q']"
7,F14_B003105,[]
8,P11_1001000340,[]
9,E21_B003930,['T903T']


In [9]:
egfr_df['mutations'] = egfr_df['mutations'].str.replace("'", "") # remove quotes
egfr_df['mutations'] = egfr_df['mutations'].str.replace("[", "") # remove brackets
egfr_df['mutations'] = egfr_df['mutations'].str.replace("]", "") # remove brackets
egfr_df['mutations'] = egfr_df['mutations'].str.replace(" ", "") # remove whitespace?
egfr_df['mutations']

0                                    
1                                    
2                                    
3                                    
4                                    
5                               T903T
6             R521K,T903T,T629T,Q787Q
7                                    
8                                    
9                               T903T
10                                   
11                                   
12                        Q787Q,T629T
13                                   
14                                   
15                              T903T
16                                   
17                                   
18                                   
19                                   
20                                   
21                                   
22                                   
23                                   
24                                   
25                                   
26          

In [11]:
# create a dict for all of the unique mutations 

mutations_dict = {}

for i in range(0,len(egfr_df.index)):
    currRow = egfr_df['mutations'].iloc[i]
    currRowSplit = currRow.split(',')
    for currMut in currRowSplit:
        if currMut in mutations_dict.keys():     # existing entry
            currVal = mutations_dict.get(currMut)
            currVal += 1
            mutations_dict.update({currMut : currVal})
        else:     # new entry
            mutations_dict.update({currMut : 1})

In [12]:
mutations_dict

{'': 8352,
 'A237>?': 1,
 'A237V': 7,
 'D1014N': 24,
 'D994D': 41,
 'E709A': 1,
 'E746_A750delELREA': 53,
 'E746_T751delELREAT': 53,
 'F856L': 3,
 'G221V': 1,
 'G331R': 1,
 'G598V': 1,
 'G652G': 3,
 'G719A': 1,
 'G857V': 2,
 'H1129Y': 1,
 'I1093M': 1,
 'K745_A750>T': 21,
 'K754E': 2,
 'L1034I': 1,
 'L1167V': 2,
 'L387M': 1,
 'L747S': 1,
 'L858R': 148,
 'L861Q': 1,
 'L907M': 1,
 'P1019L': 2,
 'P589L': 3,
 'Q1020H': 3,
 'Q787Q': 840,
 'R1100S': 6,
 'R521K': 541,
 'R776H': 1,
 'R831H': 2,
 'S442I': 1,
 'S768I': 1,
 'S811F': 1,
 'S921R': 5,
 'T629T': 562,
 'T903T': 667,
 'V1142V': 2,
 'V536M': 3,
 'V769L': 2,
 'V774_C775insHV': 1,
 'V843L': 2}

In [13]:
mutations_dict_items = list(mutations_dict.items())
mutations_dict_items

[('', 8352),
 ('T903T', 667),
 ('R521K', 541),
 ('T629T', 562),
 ('Q787Q', 840),
 ('L858R', 148),
 ('E746_T751delELREAT', 53),
 ('E746_A750delELREA', 53),
 ('D1014N', 24),
 ('G857V', 2),
 ('D994D', 41),
 ('K745_A750>T', 21),
 ('S811F', 1),
 ('V1142V', 2),
 ('Q1020H', 3),
 ('A237V', 7),
 ('G221V', 1),
 ('P589L', 3),
 ('L747S', 1),
 ('S442I', 1),
 ('R1100S', 6),
 ('S921R', 5),
 ('L1167V', 2),
 ('L1034I', 1),
 ('F856L', 3),
 ('V536M', 3),
 ('R831H', 2),
 ('L861Q', 1),
 ('V769L', 2),
 ('V843L', 2),
 ('G652G', 3),
 ('I1093M', 1),
 ('P1019L', 2),
 ('R776H', 1),
 ('V774_C775insHV', 1),
 ('A237>?', 1),
 ('L907M', 1),
 ('G331R', 1),
 ('H1129Y', 1),
 ('K754E', 2),
 ('S768I', 1),
 ('G598V', 1),
 ('L387M', 1),
 ('G719A', 1),
 ('E709A', 1)]

In [16]:
mutations_dict_items[10]

('D994D', 41)

In [17]:
#////////////////////////////////////////////////////////////////////////////
#
# want to create a per-patient EGFR mutations dataframe
#
#////////////////////////////////////////////////////////////////////////////

patientMetadata = pd.read_csv('/Users/lincoln.harris/Desktop/152-LAUD_cell_lists_and_various_shit/cDNA_plate_metadata.csv')
patientMetadata

Unnamed: 0,plate,sample_type,patient_id,DOB,gender,race,smokingHx,histolgy,driver_gene,driver_mutation,...,sample_name,processing_status,physical_description,sort_data_exported,cell_density,cDNA_cells,sequenced_cells_passQC,Sequence_Run1,Sequence_Run2,Sequence_Run3
0,,cell_line,Ewing,,,,,,,,...,CL_S1,Sequenced,,,,43.0,,170504_NS500126_0691_AHC22JBGX2,,
1,,cell_line,Fibroblasts,,,,,,,,...,CL_S1,Sequenced,,,,43.0,,170504_NS500126_0691_AHC22JBGX2,,
2,1001000332,cell_line,H1975,,,,,,,,...,CL_S1,Sequenced,,,,38.0,,170504_NS500126_0691_AHC22JBGX2,,
3,1001000330,cell_line,TPH1,,,,,,,,...,CL_S1,Sequenced,,,,37.0,,170504_NS500126_0691_AHC22JBGX2,,
4,1001000302,Lung_tumor,TH158,1959-11-23,Female,Native Hawaiian or Other Pacific Island,Never,Adenocarcinoma,EGFR,del19,...,LT_S01,Sequenced,,,,,44,170215_NS500126_0658_AH7TLYBGX2,,
5,1001000301,Lung_tumor,TH157,2016-11-25,Male,Asian,Never,Adenocarcinoma,ALK,fusion,...,LT_S02,Sequenced,,,,,27,170215_NS500126_0658_AH7TLYBGX2,,
6,1001000292,Lung_tumor,TH156,1973-03-17,Male,White or Caucasian,Former,Adenocarcinoma,ALK,fusion,...,LT_S03,Sequenced,,,,,63,170125_NS500126_0647_AHVHJ2BGXY,170202_NS500126_0653_AHVL5NBGXY,
7,1001000293,Lung_tumor,TH156,1973-03-17,Male,White or Caucasian,Former,Adenocarcinoma,ALK,fusion,...,LT_S03,Sequenced,,,,,64,170125_NS500126_0647_AHVHJ2BGXY,170202_NS500126_0653_AHVL5NBGXY,
8,1001000294,Lung_tumor,TH156,1973-03-17,Male,White or Caucasian,Former,Adenocarcinoma,ALK,fusion,...,LT_S03,Sequenced,,,,,59,170125_NS500126_0647_AHVHJ2BGXY,170202_NS500126_0653_AHVL5NBGXY,
9,1001000295,Lung_tumor,TH156,1973-03-17,Male,B003109,Former,Adenocarcinoma,ALK,fusion,...,LT_S03,Sequenced,,,,,89,170125_NS500126_0647_AHVHJ2BGXY,170202_NS500126_0653_AHVL5NBGXY,


In [18]:
patientMetadata = patientMetadata.drop([0,1])
patientMetadata

Unnamed: 0,plate,sample_type,patient_id,DOB,gender,race,smokingHx,histolgy,driver_gene,driver_mutation,...,sample_name,processing_status,physical_description,sort_data_exported,cell_density,cDNA_cells,sequenced_cells_passQC,Sequence_Run1,Sequence_Run2,Sequence_Run3
2,1001000332,cell_line,H1975,,,,,,,,...,CL_S1,Sequenced,,,,38.0,,170504_NS500126_0691_AHC22JBGX2,,
3,1001000330,cell_line,TPH1,,,,,,,,...,CL_S1,Sequenced,,,,37.0,,170504_NS500126_0691_AHC22JBGX2,,
4,1001000302,Lung_tumor,TH158,1959-11-23,Female,Native Hawaiian or Other Pacific Island,Never,Adenocarcinoma,EGFR,del19,...,LT_S01,Sequenced,,,,,44,170215_NS500126_0658_AH7TLYBGX2,,
5,1001000301,Lung_tumor,TH157,2016-11-25,Male,Asian,Never,Adenocarcinoma,ALK,fusion,...,LT_S02,Sequenced,,,,,27,170215_NS500126_0658_AH7TLYBGX2,,
6,1001000292,Lung_tumor,TH156,1973-03-17,Male,White or Caucasian,Former,Adenocarcinoma,ALK,fusion,...,LT_S03,Sequenced,,,,,63,170125_NS500126_0647_AHVHJ2BGXY,170202_NS500126_0653_AHVL5NBGXY,
7,1001000293,Lung_tumor,TH156,1973-03-17,Male,White or Caucasian,Former,Adenocarcinoma,ALK,fusion,...,LT_S03,Sequenced,,,,,64,170125_NS500126_0647_AHVHJ2BGXY,170202_NS500126_0653_AHVL5NBGXY,
8,1001000294,Lung_tumor,TH156,1973-03-17,Male,White or Caucasian,Former,Adenocarcinoma,ALK,fusion,...,LT_S03,Sequenced,,,,,59,170125_NS500126_0647_AHVHJ2BGXY,170202_NS500126_0653_AHVL5NBGXY,
9,1001000295,Lung_tumor,TH156,1973-03-17,Male,B003109,Former,Adenocarcinoma,ALK,fusion,...,LT_S03,Sequenced,,,,,89,170125_NS500126_0647_AHVHJ2BGXY,170202_NS500126_0653_AHVL5NBGXY,
10,1001000296,Lung_tumor,TH166,1974-12-15,Male,Hispanic or Latino,<5py,Adenocarcinoma,BRAF,V600E,...,LT_S04,Sequenced,,y,,,71,170129_NS500126_0650_AHVM75BGXY,170202_NS500126_0653_AHVL5NBGXY,
11,1001000297,Lung_tumor,TH166,1974-12-15,Male,Hispanic or Latino,<5py,Adenocarcinoma,BRAF,V600E,...,LT_S04,Sequenced,,y,,,79,170129_NS500126_0650_AHVM75BGXY,170202_NS500126_0653_AHVL5NBGXY,


In [19]:
egfr_df

Unnamed: 0,cell,mutations
0,P13_1001000339,
1,C1_B002572,
2,J2_B002573,
3,P22_B003116,
4,O22_B003191,
5,J12_B003529,T903T
6,D14_1001000407,"R521K,T903T,T629T,Q787Q"
7,F14_B003105,
8,P11_1001000340,
9,E21_B003930,T903T


In [20]:
colNames = ['patientID'] + list(mutations_dict.keys())
colNames

['patientID',
 '',
 'T903T',
 'R521K',
 'T629T',
 'Q787Q',
 'L858R',
 'E746_T751delELREAT',
 'E746_A750delELREA',
 'D1014N',
 'G857V',
 'D994D',
 'K745_A750>T',
 'S811F',
 'V1142V',
 'Q1020H',
 'A237V',
 'G221V',
 'P589L',
 'L747S',
 'S442I',
 'R1100S',
 'S921R',
 'L1167V',
 'L1034I',
 'F856L',
 'V536M',
 'R831H',
 'L861Q',
 'V769L',
 'V843L',
 'G652G',
 'I1093M',
 'P1019L',
 'R776H',
 'V774_C775insHV',
 'A237>?',
 'L907M',
 'G331R',
 'H1129Y',
 'K754E',
 'S768I',
 'G598V',
 'L387M',
 'G719A',
 'E709A']

In [21]:
# want to make a dataframe that has every patient and the specific EGFR mutations for that patient
egfr_muts_by_patient = pd.DataFrame(columns=colNames)
egfr_muts_by_patient

Unnamed: 0,patientID,Unnamed: 2,T903T,R521K,T629T,Q787Q,L858R,E746_T751delELREAT,E746_A750delELREA,D1014N,...,A237>?,L907M,G331R,H1129Y,K754E,S768I,G598V,L387M,G719A,E709A


In [22]:
uniquePatientIDs = set(patientMetadata['patient_id'])
uniquePatientIDs

{'H1975',
 'TH041',
 'TH067',
 'TH082',
 'TH100',
 'TH103',
 'TH107',
 'TH116',
 'TH124',
 'TH134_PDX',
 'TH143B',
 'TH144',
 'TH146',
 'TH150',
 'TH153',
 'TH155',
 'TH156',
 'TH157',
 'TH158',
 'TH166',
 'TH169',
 'TH171',
 'TH172',
 'TH174',
 'TH178',
 'TH179',
 'TH179_Normal',
 'TH183',
 'TH185',
 'TH187',
 'TH188',
 'TH199',
 'TH205',
 'TH208',
 'TH210',
 'TH214',
 'TH217',
 'TH218',
 'TH220',
 'TH222',
 'TH223',
 'TH225',
 'TH226',
 'TH227',
 'TH231',
 'TH236',
 'TH238',
 'TH238_Normal',
 'TH248',
 'TH249',
 'TH257',
 'TH266',
 'TPH1'}

In [23]:
list(uniquePatientIDs)

['TH222',
 'TH225',
 'TH100',
 'TH107',
 'TH238',
 'TPH1',
 'TH172',
 'TH208',
 'TH179_Normal',
 'TH174',
 'TH210',
 'TH124',
 'TH169',
 'TH134_PDX',
 'TH082',
 'TH231',
 'TH266',
 'TH143B',
 'H1975',
 'TH067',
 'TH158',
 'TH146',
 'TH150',
 'TH248',
 'TH249',
 'TH157',
 'TH153',
 'TH185',
 'TH187',
 'TH199',
 'TH041',
 'TH183',
 'TH155',
 'TH179',
 'TH218',
 'TH217',
 'TH227',
 'TH171',
 'TH220',
 'TH178',
 'TH214',
 'TH236',
 'TH156',
 'TH257',
 'TH116',
 'TH238_Normal',
 'TH144',
 'TH223',
 'TH188',
 'TH226',
 'TH166',
 'TH103',
 'TH205']

In [24]:
egfr_muts_by_patient['patientID'] = list(uniquePatientIDs)
egfr_muts_by_patient

Unnamed: 0,patientID,Unnamed: 2,T903T,R521K,T629T,Q787Q,L858R,E746_T751delELREAT,E746_A750delELREA,D1014N,...,A237>?,L907M,G331R,H1129Y,K754E,S768I,G598V,L387M,G719A,E709A
0,TH222,,,,,,,,,,...,,,,,,,,,,
1,TH225,,,,,,,,,,...,,,,,,,,,,
2,TH100,,,,,,,,,,...,,,,,,,,,,
3,TH107,,,,,,,,,,...,,,,,,,,,,
4,TH238,,,,,,,,,,...,,,,,,,,,,
5,TPH1,,,,,,,,,,...,,,,,,,,,,
6,TH172,,,,,,,,,,...,,,,,,,,,,
7,TH208,,,,,,,,,,...,,,,,,,,,,
8,TH179_Normal,,,,,,,,,,...,,,,,,,,,,
9,TH174,,,,,,,,,,...,,,,,,,,,,


In [25]:
# set all values to 0
egfr_muts_by_patient[:] = 0
egfr_muts_by_patient

Unnamed: 0,patientID,Unnamed: 2,T903T,R521K,T629T,Q787Q,L858R,E746_T751delELREAT,E746_A750delELREA,D1014N,...,A237>?,L907M,G331R,H1129Y,K754E,S768I,G598V,L387M,G719A,E709A
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
# dataframe looking good
egfr_muts_by_patient['patientID'] = list(uniquePatientIDs)
egfr_muts_by_patient

Unnamed: 0,patientID,Unnamed: 2,T903T,R521K,T629T,Q787Q,L858R,E746_T751delELREAT,E746_A750delELREA,D1014N,...,A237>?,L907M,G331R,H1129Y,K754E,S768I,G598V,L387M,G719A,E709A
0,TH222,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,TH225,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,TH100,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,TH107,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,TH238,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,TPH1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,TH172,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,TH208,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,TH179_Normal,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,TH174,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
# trying to fill it in 

for i in range(0,len(egfr_df.index)):
    currCell = egfr_df['cell'].iloc[i]
    currPlate = currCell.split('_')[1]
    
    index_to_keep = patientMetadata['plate'] == currPlate
    keepRow = patientMetadata[index_to_keep]
    try:
        currPatient = list(keepRow['patient_id'])[0]
    except IndexError:
        print('ERROR: plate not found') # maybe these are the plates that were NOT including in the analysis? 
        
    currRow = egfr_df['mutations'].iloc[i]
    currRowSplit = currRow.split(',')
    
    match_row = egfr_muts_by_patient[egfr_muts_by_patient['patientID'] == currPatient]
    match_row_index = match_row.index[0]
    try:
        for currMut in currRowSplit:
            egfr_muts_by_patient[currMut][match_row_index] += 1
    except:
        pass

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plat

ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plat

In [28]:
egfr_muts_by_patient

Unnamed: 0,patientID,Unnamed: 2,T903T,R521K,T629T,Q787Q,L858R,E746_T751delELREAT,E746_A750delELREA,D1014N,...,A237>?,L907M,G331R,H1129Y,K754E,S768I,G598V,L387M,G719A,E709A
0,TH222,332,35,1,1,17,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
1,TH225,82,6,3,3,7,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,TH100,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,TH107,38,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,TH238,555,34,30,50,46,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
5,TPH1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,TH172,7,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,TH208,1,0,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,TH179_Normal,227,11,0,0,26,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,TH174,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
#///////////////////////////////////////////////////////////////////
#///////////////////////////////////////////////////////////////////
#///////////////////////////////////////////////////////////////////

In [42]:
#egfr_df
summaryTable = pd.DataFrame(columns=['cell', 'patient', 'clinical_mutation', 'coverage_bool', 'clin_mut_found_bool', 'mutations_found_EGFR', 'mutations_found_BRAF', 'mutations_found_KRAS', 'fusions_found'])
summaryTable

Unnamed: 0,cell,patient,clinical_mutation,coverage_bool,clin_mut_found_bool,mutations_found_EGFR,mutations_found_BRAF,mutations_found_KRAS,fusions_found


In [43]:
summaryTable['cell'] = egfr_df['cell']
summaryTable

Unnamed: 0,cell,patient,clinical_mutation,coverage_bool,clin_mut_found_bool,mutations_found_EGFR,mutations_found_BRAF,mutations_found_KRAS,fusions_found
0,P13_1001000339,,,,,,,,
1,C1_B002572,,,,,,,,
2,J2_B002573,,,,,,,,
3,P22_B003116,,,,,,,,
4,O22_B003191,,,,,,,,
5,J12_B003529,,,,,,,,
6,D14_1001000407,,,,,,,,
7,F14_B003105,,,,,,,,
8,P11_1001000340,,,,,,,,
9,E21_B003930,,,,,,,,


In [44]:
# fill in patient
for i in range(0,len(summaryTable.index)):
    currCell = summaryTable['cell'].iloc[i]
    currPlate = currCell.split('_')[1]
    
    index_to_keep = patientMetadata['plate'] == currPlate
    keepRow = patientMetadata[index_to_keep]
    try:
        currPatient = list(keepRow['patient_id'])[0]
        summaryTable['patient'][i] = currPatient
    except IndexError:
        print('ERROR: plate not found') # maybe these are the plates that were NOT including in the analysis? 

ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plate not found
ERROR: plat

In [45]:
summaryTable

Unnamed: 0,cell,patient,clinical_mutation,coverage_bool,clin_mut_found_bool,mutations_found_EGFR,mutations_found_BRAF,mutations_found_KRAS,fusions_found
0,P13_1001000339,TH169,,,,,,,
1,C1_B002572,TH266,,,,,,,
2,J2_B002573,TH266,,,,,,,
3,P22_B003116,TH231,,,,,,,
4,O22_B003191,TH226,,,,,,,
5,J12_B003529,TH171,,,,,,,
6,D14_1001000407,TH185,,,,,,,
7,F14_B003105,TH222,,,,,,,
8,P11_1001000340,TH169,,,,,,,
9,E21_B003930,TH222,,,,,,,


In [48]:
summaryTable['mutations_found_EGFR'] = egfr_df['mutations']
summaryTable

Unnamed: 0,cell,patient,clinical_mutation,coverage_bool,clin_mut_found_bool,mutations_found_EGFR,mutations_found_BRAF,mutations_found_KRAS,fusions_found
0,P13_1001000339,TH169,,,,,,,
1,C1_B002572,TH266,,,,,,,
2,J2_B002573,TH266,,,,,,,
3,P22_B003116,TH231,,,,,,,
4,O22_B003191,TH226,,,,,,,
5,J12_B003529,TH171,,,,T903T,,,
6,D14_1001000407,TH185,,,,"R521K,T903T,T629T,Q787Q",,,
7,F14_B003105,TH222,,,,,,,
8,P11_1001000340,TH169,,,,,,,
9,E21_B003930,TH222,,,,T903T,,,


In [51]:
# read-in fusion dataframe
fusionsDF = pd.read_csv('./fusion_dataframe.csv')
fusionsDF

Unnamed: 0,ALK--EML4,ALK_any,EML4_any,NTRK_any,RET_any,ROS1_any
0,C2_B000862,K14_B003132,L18_B003120,C8_B003191,E2_B003920,D10_B003523
1,P1_B001464,G19_B003121,D10_B003523,D8_B003126,B15_B003529,L17_B003116
2,M11_B003522,O13_1001000339,I22_B000276,J11_B003528,M21_B002572,F5_B001556
3,G8_1001000317,I13_B003046,A4_B001607,B5_B002078,J6_B001470,I5_B003070
4,A7_10001000325,M8_B003119,I6_B003642,J18_B002571,J19_B003070,J5_B003125
5,M12_B003522,K2_B003522,I4_B001607,P6_B001545,G13_B000862,G5_1001000327
6,B11_10001000325,N8_B001556,P20_B002571,O17_B002097,D15_B002078,J22_B003120
7,G2_B000862,D18_B002073,E5_B001545,K10_B003528,P19_B002572,O24_1001000377
8,J15_B000862,O17_1001000409,E1_B003117,C17_B003071,D19_B003132,A3_B003067
9,E7_1001000317,C2_B000862,O2_B003067,,G11_B001543,L20_B002572


In [71]:
currCell = 'B11_10001000325'
currCell = 'foo'
colList = list(fusionsDF['ALK--EML4'])

currCell in colList

False

In [101]:
# i think this worked?
for i in range(0, len(summaryTable.index)):
    currCell = summaryTable['cell'][i]
    fusionsListCurr = []
    
    colList0 = list(fusionsDF['ALK--EML4'])
    colList1 = list(fusionsDF['ALK_any'])
    colList2 = list(fusionsDF['EML4_any'])
    colList3 = list(fusionsDF['NTRK_any'])
    colList4 = list(fusionsDF['RET_any'])
    colList5 = list(fusionsDF['ROS1_any'])

    if currCell in colList0:
        fusionsListCurr.append('ALK-EML4')
    elif currCell in colList1:
        fusionsListCurr.append('ALK_any')
    elif currCell in colList2:
        fusionsListCurr.append('EML4_any')
    elif currCell in colList3:
        fusionsListCurr.append('NTRK_any')
    elif currCell in colList4:
        fusionsListCurr.append('RET_any')
    elif currCell in colList0:
        fusionsListCurr.append('ROS1_any')
 
    print(fusionsListCurr)
    summaryTable['fusions_found'][i] = fusionsListCurr

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['RET_any']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[

[]
[]
[]
[]
[]
[]
[]
[]
['NTRK_any']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['ALK_any']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['RET_any']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]


[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['EML4_any']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['ALK_any']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
['ALK-EML4']
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]

In [107]:
list(summaryTable['fusions_found'])

[[],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 ['RET_any'],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 

In [94]:
fusionsListCurr = []

In [95]:
fusionsListCurr.append('oogabgooga')
fusionsListCurr

['oogabgooga']

In [96]:
fusionsListCurr

['oogabgooga']

In [None]:
fusions