In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from itertools import islice
import os
from sklearn.decomposition import PCA

# !cat /proc/sys/vm/overcommit_memory
# !echo 1 > /proc/sys/vm/overcommit_memory

In [2]:
#Read Genomic data

In [3]:
gen_data=pd.read_csv("./data/genomic_features.tsv",sep="\t",nrows=10)

#Optimize datatypes to save memory
gene_dict=gen_data.dtypes.apply(lambda x: x.name).to_dict()
new_gene_dict=gene_dict.copy()
vlist=['COSMIC_ID','Sample Name','TISSUE_FACTOR']
i=1
for k in new_gene_dict.keys():
        if k in vlist:
            i=0
        else:
            new_gene_dict[k]='int16'
#         print(k,new_wes_dict[k])

genomic_features = pd.read_csv("./data/genomic_features.tsv",sep = "\t",dtype=new_gene_dict)

gen_data.head(2)

Unnamed: 0,COSMIC_ID,Sample Name,TISSUE_FACTOR,MSI_FACTOR,ABCB1_mut,ABL2_mut,ACACA_mut,ACVR1B_mut,ACVR2A_mut,AFF4_mut,...,BAZ2B_mut,BCLAF1_mut,BCOR_mut,BCR-ABL_mut,BLM_mut,BMPR2_mut,BNC2_mut,BPTF_mut,BRAF_mut,TP53_mut
0,1287381,201T,lung_NSCLC,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,924100,22RV1,prostate,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [4]:
# Cell line details

In [5]:
celld=pd.read_csv('./data/Cell_Lines_Details.csv')
celld.head(2)

Unnamed: 0,Sample Name,COSMIC identifier,Whole Exome Sequencing (WES),Copy Number Alterations (CNA),Gene Expression,Methylation,Drug\nResponse,GDSC\nTissue descriptor 1,GDSC\nTissue\ndescriptor 2,Cancer Type\n(matching TCGA label),Microsatellite \ninstability Status (MSI),Screen Medium,Growth Properties
0,A253,906794.0,Y,Y,Y,Y,Y,aero_dig_tract,head and neck,,MSS/MSI-L,D/F12,Adherent
1,BB30-HNC,753531.0,Y,Y,Y,Y,Y,aero_dig_tract,head and neck,HNSC,MSS/MSI-L,D/F12,Adherent


In [6]:
# Read in drug data
drug_d=pd.read_csv("./data/Screened_Compounds.csv",sep=',')
drug_d.head()

Unnamed: 0,DRUG_ID,DRUG_NAME,SYNONYMS,TARGET,TARGET_PATHWAY
0,1,Erlotinib,"Tarceva, RG-1415, CP-358774, OSI-774, Ro-50823...",EGFR,EGFR signaling
1,3,Rapamycin,"AY-22989, Sirolimus, WY-090217, Torisel, Rapamune",MTORC1,PI3K/MTOR signaling
2,5,Sunitinib,"Sutent, Sunitinib Malate, SU-11248","PDGFR, KIT, VEGFR, FLT3, RET, CSF1R",RTK signaling
3,6,PHA-665752,"PHA665752, PHA 665752",MET,RTK signaling
4,9,MG-132,"LLL cpd, MG 132, MG132","Proteasome, CAPN1",Protein stability and degradation


In [7]:
# Read in drug response data
dose_response_df_all = pd.read_csv('./data/v17.3_fitted_dose_response_binary.csv', index_col='IC50_RESULTS_ID')
dose_response_df=pd.merge(dose_response_df_all,drug_d,how='left',on='DRUG_ID')
dose_response_df['TARGET_PATHWAY']=dose_response_df['TARGET_PATHWAY'].fillna("UNK")
dose_response_df.head(2)

Unnamed: 0,DATASET_VERSION,COSMIC_ID,CELL_LINE_NAME,DRUG_ID,DRUG_NAME_x,PUTATIVE_TARGET,MAX_CONC_MICROMOLAR,MIN_CONC_MICROMOLAR,LN_IC50,AUC,RMSE,Z_SCORE,BINARY_RESPONSE,DRUG_NAME_y,SYNONYMS,TARGET,TARGET_PATHWAY
0,17.3,683665,MC-CAR,1,Erlotinib,EGFR,2.0,0.007812,2.453524,0.98261,0.021678,-0.015505,R,Erlotinib,"Tarceva, RG-1415, CP-358774, OSI-774, Ro-50823...",EGFR,EGFR signaling
1,17.3,684055,ES3,1,Erlotinib,EGFR,2.0,0.007812,3.376592,0.985169,0.029915,0.779999,R,Erlotinib,"Tarceva, RG-1415, CP-358774, OSI-774, Ro-50823...",EGFR,EGFR signaling


In [8]:
#Read in cancer labels, clean data
tcga_labels=pd.read_csv("./data/TCGA_Labels.csv")
celld=pd.read_csv('./data/Cell_Lines_Details.csv')
celld=celld[['COSMIC identifier','GDSC\nTissue descriptor 1','GDSC\nTissue\ndescriptor 2','Cancer Type\n(matching TCGA label)']]
celld_desc=pd.merge(celld,tcga_labels,how='outer',left_on='Cancer Type\n(matching TCGA label)',right_on="Sigle",indicator=True)
celld_desc['Program']=celld_desc['Program'].fillna('UNK')
celld_desc=celld_desc.rename(columns={'COSMIC identifier':'COSMIC_ID','GDSC\nTissue descriptor 1':'GDSC_DESC1','GDSC\nTissue\ndescriptor 2':'GDSC_DESC2','Cancer Type\n(matching TCGA label)':'TCGA_Label', 'Program':'Cancer_Type'                                     })
dose_resp_caname=pd.merge(dose_response_df,celld_desc,how='left',on='COSMIC_ID')
dose_response_trimmed_df = dose_resp_caname[['COSMIC_ID', 'DRUG_ID', 'LN_IC50', 'BINARY_RESPONSE','TCGA_Label','Cancer_Type','TARGET_PATHWAY']]
dose_response_trimmed_df.head(2)

Unnamed: 0,COSMIC_ID,DRUG_ID,LN_IC50,BINARY_RESPONSE,TCGA_Label,Cancer_Type,TARGET_PATHWAY
0,683665,1,2.453524,R,MM,Multiple Myeloma,EGFR signaling
1,684055,1,3.376592,R,,UNK,EGFR signaling


In [9]:
# WES prep

In [10]:
# Read Wes data first few rows
wes_scored_temp = pd.read_csv("./data/wes_scored_transposed.tsv",sep = "\t",nrows=10)
wes_scored_temp.head()

Unnamed: 0.1,Unnamed: 0,1,2,9,10,12,13,14,15,16,...,101060321,101927546,101927722,101928638,102724473,102724928,105375355,105378803,107403068,109731405
0,907268,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,907269,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,907270,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,907271,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,907272,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
# Read full WES data with right data types
wes_dict=wes_scored_temp.dtypes.apply(lambda x: x.name).to_dict()
new_wes_dict=wes_dict.copy()
i=1
for k in new_wes_dict.keys():
        if k=='Unnamed: 0':
            i=0
        else:
            new_wes_dict[k]='int16'
#         print(k,new_wes_dict[k])

wes_scored = pd.read_csv("./data/wes_scored_transposed.tsv",sep = "\t",dtype=new_wes_dict)

In [12]:
wes_scored=wes_scored.rename(columns={"Unnamed: 0":"COSMIC_ID"})

In [13]:
#Subset one pathway RTK signaling and lung cancer - THIS MAY NEED TO BE PARAMETERS
rtk=dose_resp_caname[(dose_resp_caname['TARGET_PATHWAY']=='RTK signaling')  & (dose_resp_caname['GDSC_DESC1']=='lung_SCLC')]
# rtk['Cancer_Type'].value_counts()
rtk.shape

(1096, 23)

In [14]:
rtk_wes_joined_df = pd.merge(wes_scored, rtk,how='inner', on='COSMIC_ID')
print(rtk_wes_joined_df.shape)
print(rtk.shape)

(1096, 18404)
(1096, 23)


In [15]:
rtk_wes_joined_df['COSMIC_DRUG_ID']=rtk_wes_joined_df['COSMIC_ID'].map(str)+"_"+rtk_wes_joined_df['DRUG_ID'].map(str)
wes_subset=rtk_wes_joined_df[['COSMIC_DRUG_ID','347733','440560','4633','2019','5706','192683','1063','2220','4829','5137','7767','7982','9994','23033','51585','55051','57509','65083','84695','119395','148137','387266']]

In [16]:
#CNA

In [17]:
cna_scored_temp = pd.read_csv("./data/cna_scored_transposed.tsv",sep = "\t", nrows=10)
cna_scored_temp.head()

Unnamed: 0,COSMIC_ID,5S_rRNA,5_8S_rRNA,7SK,A1BG,A1CF,A2LD1,A2M,A2ML1,A2ML1-AS1,...,snoZ185,snoZ247,snoZ278,snoZ39,snoZ40,snoZ5,snoZ6,snosnR60_Z15,snosnR66,yR211F11.2
0,1240121,-4.32,0.584963,-4.32,0.0,0.0,0.584963,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,-1.0,-1.0,0.0
1,1240122,-1.0,0.0,-4.32,0.0,0.0,0.0,0.584963,0.584963,0.584963,...,1.0,1.0,0.584963,0.584963,0.0,0.0,0.0,0.0,0.584963,0.584963
2,1240123,-4.32,-0.415037,-4.32,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-0.415037,-0.415037,0.0,0.0,0.0,0.0,-0.415037,0.0
3,1240124,-4.32,0.0,-4.32,0.0,0.0,0.584963,0.584963,0.584963,0.584963,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1240125,-2.0,-2.0,-2.0,0.0,-0.415037,0.0,0.0,0.0,0.0,...,0.0,-0.415037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0


In [18]:
cna_dict=cna_scored_temp.dtypes.apply(lambda x: x.name).to_dict()
new_cna_dict=cna_dict.copy()
i=1
for k in new_cna_dict.keys():
        if k=='COSMIC_ID':
            i=0
        else:
            new_cna_dict[k]='float16'
#         print(k,new_cna_dict[k])

In [19]:
cna_scored = pd.read_csv("./data/cna_scored_transposed.tsv",sep = "\t", dtype=new_cna_dict)
cna_scored.head()

Unnamed: 0,COSMIC_ID,5S_rRNA,5_8S_rRNA,7SK,A1BG,A1CF,A2LD1,A2M,A2ML1,A2ML1-AS1,...,snoZ185,snoZ247,snoZ278,snoZ39,snoZ40,snoZ5,snoZ6,snosnR60_Z15,snosnR66,yR211F11.2
0,1240121,-4.320312,0.584961,-4.320312,0.0,0.0,0.584961,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,-1.0,-1.0,0.0
1,1240122,-1.0,0.0,-4.320312,0.0,0.0,0.0,0.584961,0.584961,0.584961,...,1.0,1.0,0.584961,0.584961,0.0,0.0,0.0,0.0,0.584961,0.584961
2,1240123,-4.320312,-0.415039,-4.320312,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-0.415039,-0.415039,0.0,0.0,0.0,0.0,-0.415039,0.0
3,1240124,-4.320312,0.0,-4.320312,0.0,0.0,0.584961,0.584961,0.584961,0.584961,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1240125,-2.0,-2.0,-2.0,0.0,-0.415039,0.0,0.0,0.0,0.0,...,0.0,-0.415039,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0


In [20]:
rtk=rtk.drop(['_merge'],axis=1)
rtk_cna_joined_df = pd.merge(cna_scored, rtk, how='inner', on='COSMIC_ID',indicator=True)
rtk_cna_joined_df['COSMIC_DRUG_ID']=rtk_cna_joined_df['COSMIC_ID'].map(str)+"_"+rtk_cna_joined_df['DRUG_ID'].map(str)

#Make a copy with original labels and other fields
rtk_cna_joined_df_copy_with_label=rtk_cna_joined_df.copy()

rtk_cna_joined_df=rtk_cna_joined_df.drop({"LN_IC50","COSMIC_ID","CELL_LINE_NAME","DRUG_ID","_merge","Sigle","GDSC_DESC1","GDSC_DESC2","TCGA_Label","Cancer_Type",'MAX_CONC_MICROMOLAR','MIN_CONC_MICROMOLAR','AUC','RMSE','Z_SCORE','DATASET_VERSION','PUTATIVE_TARGET','DRUG_NAME_x','DRUG_NAME_y','SYNONYMS','TARGET','TARGET_PATHWAY','BINARY_RESPONSE'},axis=1)
rtk_cna_joined_df=rtk_cna_joined_df.set_index("COSMIC_DRUG_ID")


In [21]:
rtk_cna_joined_df.head(2)

Unnamed: 0_level_0,5S_rRNA,5_8S_rRNA,7SK,A1BG,A1CF,A2LD1,A2M,A2ML1,A2ML1-AS1,A2ML1-AS2,...,snoZ185,snoZ247,snoZ278,snoZ39,snoZ40,snoZ5,snoZ6,snosnR60_Z15,snosnR66,yR211F11.2
COSMIC_DRUG_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1240182_5,0.415039,0.0,-4.320312,0.0,0.0,1.0,0.415039,0.415039,0.415039,0.415039,...,0.0,0.0,-0.584961,-0.584961,-0.584961,0.0,0.0,-0.584961,0.415039,0.415039
1240182_6,0.415039,0.0,-4.320312,0.0,0.0,1.0,0.415039,0.415039,0.415039,0.415039,...,0.0,0.0,-0.584961,-0.584961,-0.584961,0.0,0.0,-0.584961,0.415039,0.415039


In [22]:
#Apply PCA on CNA test
from sklearn.externals import joblib

# Load from file
joblib_file = "cna_pca.pkl"
joblib_pca = joblib.load(joblib_file)

# Transform test file
devcnapca=joblib_pca.transform(rtk_cna_joined_df)  

i=1
varlist=[]
for i in range(1, 41):
    var="cna_princ"+str(i)
    varlist.append(var)
    
cna_subset=pd.DataFrame(data=devcnapca,columns=varlist,index=rtk_cna_joined_df.index)
cna_subset=cna_subset.reset_index()
cna_subset.head()



Unnamed: 0,COSMIC_DRUG_ID,cna_princ1,cna_princ2,cna_princ3,cna_princ4,cna_princ5,cna_princ6,cna_princ7,cna_princ8,cna_princ9,...,cna_princ31,cna_princ32,cna_princ33,cna_princ34,cna_princ35,cna_princ36,cna_princ37,cna_princ38,cna_princ39,cna_princ40
0,1240182_5,-15.766352,-39.730151,-29.636514,10.296064,17.223757,-13.137069,-4.575651,-2.926855,20.152573,...,9.860055,0.786128,-4.007407,1.328369,5.47242,-4.260553,-2.767509,-1.259483,-0.575172,-1.583487
1,1240182_6,-15.766352,-39.730151,-29.636514,10.296064,17.223757,-13.137069,-4.575651,-2.926855,20.152573,...,9.860055,0.786128,-4.007407,1.328369,5.47242,-4.260553,-2.767509,-1.259483,-0.575172,-1.583487
2,1240182_30,-15.766352,-39.730151,-29.636514,10.296064,17.223757,-13.137069,-4.575651,-2.926855,20.152573,...,9.860055,0.786128,-4.007407,1.328369,5.47242,-4.260553,-2.767509,-1.259483,-0.575172,-1.583487
3,1240182_34,-15.766352,-39.730151,-29.636514,10.296064,17.223757,-13.137069,-4.575651,-2.926855,20.152573,...,9.860055,0.786128,-4.007407,1.328369,5.47242,-4.260553,-2.767509,-1.259483,-0.575172,-1.583487
4,1240182_35,-15.766352,-39.730151,-29.636514,10.296064,17.223757,-13.137069,-4.575651,-2.926855,20.152573,...,9.860055,0.786128,-4.007407,1.328369,5.47242,-4.260553,-2.767509,-1.259483,-0.575172,-1.583487


In [23]:
#Join the WES and CNA, split COSMIC_DRUG_ID to COSMIC_ID and DRUG_ID
cna_wes_joined=pd.merge(cna_subset,wes_subset,how='inner',on='COSMIC_DRUG_ID')

cna_wes_joined["DRUG_ID"]=(cna_wes_joined['COSMIC_DRUG_ID'].str.extract(pat = '(["_"].+)'))
cna_wes_joined["DRUG_ID"]=cna_wes_joined["DRUG_ID"].str.replace('_','').astype(int)

cna_wes_joined["COSMIC_ID"]=(cna_wes_joined['COSMIC_DRUG_ID'].str.extract(pat = '(.+["_"])'))
cna_wes_joined["COSMIC_ID"]=cna_wes_joined["COSMIC_ID"].str.replace('_','').astype(int)

cna_wes_joined.head()

Unnamed: 0,COSMIC_DRUG_ID,cna_princ1,cna_princ2,cna_princ3,cna_princ4,cna_princ5,cna_princ6,cna_princ7,cna_princ8,cna_princ9,...,51585,55051,57509,65083,84695,119395,148137,387266,DRUG_ID,COSMIC_ID
0,1240182_5,-15.766352,-39.730151,-29.636514,10.296064,17.223757,-13.137069,-4.575651,-2.926855,20.152573,...,0,0,0,0,0,0,0,0,5,1240182
1,1240182_6,-15.766352,-39.730151,-29.636514,10.296064,17.223757,-13.137069,-4.575651,-2.926855,20.152573,...,0,0,0,0,0,0,0,0,6,1240182
2,1240182_30,-15.766352,-39.730151,-29.636514,10.296064,17.223757,-13.137069,-4.575651,-2.926855,20.152573,...,0,0,0,0,0,0,0,0,30,1240182
3,1240182_34,-15.766352,-39.730151,-29.636514,10.296064,17.223757,-13.137069,-4.575651,-2.926855,20.152573,...,0,0,0,0,0,0,0,0,34,1240182
4,1240182_35,-15.766352,-39.730151,-29.636514,10.296064,17.223757,-13.137069,-4.575651,-2.926855,20.152573,...,0,0,0,0,0,0,0,0,35,1240182


In [24]:
#Transform drug_id to indicator columns
cna_wes_joined = pd.concat([cna_wes_joined, pd.get_dummies(cna_wes_joined['DRUG_ID'], prefix="drug_id_" )],axis=1)

In [25]:
#Import number of targets with drug id
drug_target=pd.read_csv("./data/drugs_num_target.csv")
drug_target.head(3)

Unnamed: 0,DRUG_ID,Num_Targets
0,1,1
1,3,1
2,5,6


In [26]:
#Import drugs with target indicator
drug_with_target=pd.read_csv("./data/drugs_with_target.csv")
drug_with_target=drug_with_target.drop(['num_targets'],axis=1)
drug_with_target.head(3)

Unnamed: 0,DRUG_ID,ABL,ABL(T315I),ADCK4,AKT1,AKT2,AKT3,ALK,ALK4,ALK5,...,WEE1,XIAP,c-FGR,dsDNAbreakinduction,gamma-secretase,notdefined,others,p38,p38alpha,p38beta
0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
#Join all 
cna_wes_gen_joined=pd.merge(cna_wes_joined,genomic_features,how='inner',on='COSMIC_ID')
all_joined=pd.merge(cna_wes_gen_joined,drug_target,how='inner',on='DRUG_ID')
all_joined1=pd.merge(all_joined,drug_with_target,how='inner',on='DRUG_ID')
all_joined2=all_joined1.drop(['Sample Name','TISSUE_FACTOR','DRUG_ID','COSMIC_ID'],axis=1)
all_joined2.head()

Unnamed: 0,COSMIC_DRUG_ID,cna_princ1,cna_princ2,cna_princ3,cna_princ4,cna_princ5,cna_princ6,cna_princ7,cna_princ8,cna_princ9,...,WEE1,XIAP,c-FGR,dsDNAbreakinduction,gamma-secretase,notdefined,others,p38,p38alpha,p38beta
0,1240182_5,-15.766352,-39.730151,-29.636514,10.296064,17.223757,-13.137069,-4.575651,-2.926855,20.152573,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1240189_5,-68.156404,12.673219,0.608561,-18.400234,42.853608,-18.695628,-37.22706,-36.987272,3.099806,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1240193_5,17.778145,-33.330183,9.331581,-9.952956,-18.784274,-3.682625,-5.890988,-12.620705,-3.50223,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1297439_5,49.013343,29.084473,-16.956284,-11.465734,-4.367829,4.769551,2.843716,-4.723054,-2.780999,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1303901_5,-26.5956,5.769174,5.868567,11.923177,7.155347,0.006829,-3.868658,27.349533,8.450679,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
all_joined3=all_joined2.set_index('COSMIC_DRUG_ID')

In [29]:
# Score with model

In [30]:
# Load saved model from file
joblib_file = "model_to_score.pkl"
saved_model = joblib.load(joblib_file)

In [31]:
THRESHOLD = 0.5
resp_prob_test=pd.DataFrame(data=saved_model.predict_proba(all_joined2)[:,1],index=all_joined3.index)
resp_prob_test.columns=['S_prob']
resp_prob_test['predicted_resp']=np.where(resp_prob_test['S_prob']>=THRESHOLD,1,0)
resp_prob_test.reset_index()
resp_prob_test.head()

Unnamed: 0_level_0,S_prob,predicted_resp
COSMIC_DRUG_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1240182_5,0.458498,0
1240189_5,0.545378,1
1240193_5,0.451615,0
1297439_5,0.513999,1
1303901_5,0.518769,1
