In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

2023-07-16 08:20:43.998148: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
# read the file that has smiles and ids

In [3]:
pubchem_to_drugs_df = pd.read_csv('../data/GDSC/1.Drug_listMon Jun 24 09_00_55 2019.csv')

In [4]:
pubchem_to_drugs_df = pubchem_to_drugs_df[["drug_id", "PubCHEM"]]

In [5]:
pubchem_to_drugs_df.dtypes

drug_id     int64
PubCHEM    object
dtype: object

In [6]:
pubchem_to_drugs_df["PubCHEM"] = [val if str(val).isdigit() else np.nan for val in pubchem_to_drugs_df["PubCHEM"] ]

In [7]:
pubchem_to_drugs_df = pubchem_to_drugs_df.dropna()

In [8]:
pubchem_to_drugs_df["PubCHEM"] = pubchem_to_drugs_df["PubCHEM"].astype(np.int64)

In [9]:
pubchem_to_smiles = pd.read_csv('../data/223drugs_pubchem_smiles.txt', sep="\t", header=None)

In [10]:
pubchem_to_smiles.columns = ["PubCHEM", "Smiles"]

In [11]:
pubchem_to_smiles["PubCHEM"] = pubchem_to_smiles["PubCHEM"].astype(np.int64)

In [12]:
pubchem_drugs_smiles_df = pubchem_to_drugs_df.merge(pubchem_to_smiles, on = "PubCHEM")

In [13]:
def get_emb_models(dataset, id_col, norm = False):
    std = StandardScaler()
    unique_ids = dataset[id_col].values
    text_vec_layer = tf.keras.layers.TextVectorization(max_tokens = dataset.shape[0] + 2, 
                                                  standardize=None, split = None, 
                                                  output_mode = "int", 
                                                  vocabulary = unique_ids.tolist())
    weights = dataset.drop(id_col, 1).values
    padding_zeros = np.zeros((2, weights.shape[1]))
    weights = np.vstack((padding_zeros, weights))
    if norm == True:
        std.fit(weights)
        weights = std.transform(weights)
    emb_layer = tf.keras.layers.Embedding(dataset.shape[0] + 2, 
                                     weights.shape[1], 
                                     weights = [weights], 
                                     trainable = False)
    input_layer = tf.keras.layers.Input(shape = (1,), dtype = tf.string)
    vec_out = text_vec_layer(input_layer)
    emb_out = emb_layer(vec_out)
    flat_out = tf.keras.layers.Flatten()(emb_out)
    emb_model = tf.keras.models.Model(input_layer, flat_out)
    return emb_model

In [14]:
# read cancer cell line data
cancer_cell_copy_num_df = pd.read_csv('../data/CCLE/genomic_copynumber_561celllines_710genes_demap_features.csv')

In [15]:
cancer_cell_copy_num_df.shape

(561, 711)

In [16]:
cancer_cell_copy_num_df.head()

Unnamed: 0.1,Unnamed: 0,AKT3,ABI1,SH2B3,CDH10,CDH11,AKAP9,CDH17,LHFP,CDK4,...,CD79B,BCLAF1,KEAP1,SETDB1,SRGAP3,MAFB,GOLGA5,THRAP3,MED12,CDH1
0,ACH-000828,1.548332,1.040041,1.020747,1.374471,1.0422,0.848321,1.72935,0.847032,1.025471,...,1.121307,0.830675,0.879632,1.492768,1.053714,1.014371,1.213421,0.780331,1.01326,0.64187
1,ACH-000568,0.798791,1.072012,1.038193,1.061508,0.792855,1.061508,1.271905,0.779829,1.002602,...,1.316797,1.049884,0.821346,1.621861,1.291863,1.046269,0.799812,1.074006,1.00551,0.792855
2,ACH-000560,1.080266,0.864616,0.694564,1.053766,0.628903,1.423261,1.3695,0.632267,0.911728,...,1.100098,1.069393,1.232225,0.860476,1.063332,1.194632,1.050246,0.894051,0.873167,0.620049
3,ACH-000561,1.032649,1.408964,0.949651,1.293622,1.274622,1.503638,1.470401,1.198447,0.893277,...,1.063162,0.660578,0.892464,1.252069,0.650263,1.178741,1.052244,0.902153,0.681885,1.274622
4,ACH-000562,1.406262,1.005521,0.997044,1.015986,0.932915,1.007777,1.051055,0.822244,0.997044,...,1.192299,0.974316,0.997944,1.406262,0.810652,1.008153,0.943241,0.878888,0.608959,0.932915


In [17]:
cancer_cell_copy_num_df = cancer_cell_copy_num_df.fillna(cancer_cell_copy_num_df.mean())

  cancer_cell_copy_num_df = cancer_cell_copy_num_df.fillna(cancer_cell_copy_num_df.mean())


In [18]:
cancer_copy_number_model = get_emb_models(cancer_cell_copy_num_df, "Unnamed: 0")

2023-07-16 08:20:46.718584: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-16 08:20:47.553742: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 78910 MB memory:  -> device: 0, name: NVIDIA A100 80GB PCIe, pci bus id: 0000:98:00.0, compute capability: 8.0
  weights = dataset.drop(id_col, 1).values


In [19]:
cancer_copy_number_model.save("..//Models//cancer_copy_number_model_no_norm")





INFO:tensorflow:Assets written to: ..//Models//cancer_copy_number_model_no_norm/assets


INFO:tensorflow:Assets written to: ..//Models//cancer_copy_number_model_no_norm/assets


In [20]:
# read cancer cell line data
cancer_cell_gen_expr_df = pd.read_csv('../data/CCLE/genomic_expression_561celllines_697genes_demap_features.csv')

In [21]:
cancer_cell_gen_expr_df.shape

(561, 698)

In [22]:
cancer_cell_gen_expr_df.head()

Unnamed: 0.1,Unnamed: 0,LASP1,HOXA11,CREBBP,ETV1,GAS7,CD79B,PAX7,BTK,BRCA1,...,NCKIPSD,MTCP1,DDX3X,FANCG,SSX2,ETV5,CEBPA,LSM14A,CUX1,C15orf65
0,ACH-000828,9.393476,0.042644,3.93546,0.871844,0.070389,0.084064,0.0,0.056584,3.339137,...,4.071248,3.119356,6.849374,4.355439,0.0,0.137504,1.769772,6.501598,4.700994,2.295723
1,ACH-000568,7.638074,0.056584,3.427606,0.201634,1.794936,0.739848,0.042644,0.333424,3.193772,...,4.084064,4.634593,5.671576,5.525443,0.056584,2.195348,0.124328,5.811214,3.590961,1.550901
2,ACH-000560,5.728193,6.001352,5.032542,5.018812,0.432959,0.250962,0.0,0.263034,4.678635,...,5.05745,3.468583,6.617798,6.425761,0.0,5.203201,1.922198,7.581351,5.320124,1.438293
3,ACH-000561,6.037163,1.565597,4.262283,0.790772,1.257011,0.028569,0.056584,0.042644,3.44228,...,3.400538,3.407353,6.154211,4.794936,0.0,3.984589,1.028569,6.533719,5.132166,2.144046
4,ACH-000562,7.050502,0.014355,3.360364,0.879706,0.084064,0.137504,0.0,0.042644,4.939227,...,4.125982,4.047015,6.281884,5.853497,0.056584,3.757023,0.056584,5.912171,4.877744,0.815575


In [23]:
cancer_cell_gen_expr_df = cancer_cell_gen_expr_df.fillna(cancer_cell_gen_expr_df.mean())

  cancer_cell_gen_expr_df = cancer_cell_gen_expr_df.fillna(cancer_cell_gen_expr_df.mean())


In [24]:
cancer_cell_gen_expr_model = get_emb_models(cancer_cell_gen_expr_df, "Unnamed: 0")

  weights = dataset.drop(id_col, 1).values


In [25]:
cancer_cell_gen_expr_model.save("..//Models//cancer_cell_gen_expr_model_no_norm")





INFO:tensorflow:Assets written to: ..//Models//cancer_cell_gen_expr_model_no_norm/assets


INFO:tensorflow:Assets written to: ..//Models//cancer_cell_gen_expr_model_no_norm/assets


In [26]:
cancer_cell_gen_expr_model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization_1 (TextV  (None, 1)                0         
 ectorization)                                                   
                                                                 
 embedding_1 (Embedding)     (None, 1, 697)            392411    
                                                                 
 flatten_1 (Flatten)         (None, 697)               0         
                                                                 
Total params: 392,411
Trainable params: 0
Non-trainable params: 392,411
_________________________________________________________________


In [27]:
# read cancer cell line data
cancer_cell_gen_methy_df = pd.read_csv('../data/CCLE/genomic_methylation_561celllines_808genes_demap_features.csv')

In [28]:
cancer_cell_gen_methy_df.shape

(561, 809)

In [29]:
cancer_cell_gen_methy_df = cancer_cell_gen_methy_df.fillna(cancer_cell_gen_methy_df.mean())

  cancer_cell_gen_methy_df = cancer_cell_gen_methy_df.fillna(cancer_cell_gen_methy_df.mean())


In [30]:
cancer_cell_gen_methy_model = get_emb_models(cancer_cell_gen_methy_df, "Unnamed: 0")

  weights = dataset.drop(id_col, 1).values


In [31]:
cancer_cell_gen_methy_model.save("..//Models//cancer_cell_gen_methy_model_no_norm")





INFO:tensorflow:Assets written to: ..//Models//cancer_cell_gen_methy_model_no_norm/assets


INFO:tensorflow:Assets written to: ..//Models//cancer_cell_gen_methy_model_no_norm/assets


In [32]:
# read cancer cell line data
cancer_cell_gen_mut_df = pd.read_csv('../data/CCLE/genomic_mutation_34673_demap_features.csv')

In [33]:
cancer_cell_gen_mut_df.shape

(961, 34674)

In [34]:
cancer_cell_gen_mut_df = cancer_cell_gen_mut_df.fillna(cancer_cell_gen_mut_df.mean())

  cancer_cell_gen_mut_df = cancer_cell_gen_mut_df.fillna(cancer_cell_gen_mut_df.mean())


In [35]:
cancer_cell_gen_mut_model = get_emb_models(cancer_cell_gen_mut_df, "Unnamed: 0")

  weights = dataset.drop(id_col, 1).values


In [36]:
cancer_cell_gen_mut_model.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization_3 (TextV  (None, 1)                0         
 ectorization)                                                   
                                                                 
 embedding_3 (Embedding)     (None, 1, 34673)          33390099  
                                                                 
 flatten_3 (Flatten)         (None, 34673)             0         
                                                                 
Total params: 33,390,099
Trainable params: 0
Non-trainable params: 33,390,099
_________________________________________________________________


In [37]:
cancer_cell_gen_mut_model.save("..//Models//cancer_cell_gen_mut_model_no_norm")





INFO:tensorflow:Assets written to: ..//Models//cancer_cell_gen_mut_model_no_norm/assets


INFO:tensorflow:Assets written to: ..//Models//cancer_cell_gen_mut_model_no_norm/assets


In [38]:
cancer_cell_lines_drug_ic_50 = pd.read_csv('../data/CCLE/GDSC_IC50.csv')

In [39]:
cancer_cell_lines_drug_ic_50.head()

Unnamed: 0.1,Unnamed: 0,ACH-002137,ACH-000474,ACH-002089,ACH-000956,ACH-000948,ACH-000323,ACH-001002,ACH-000905,ACH-000973,...,ACH-002207,ACH-000827,ACH-000534,ACH-001709,ACH-000332,ACH-000469,ACH-000570,ACH-002208,ACH-002317,ACH-000828
0,GDSC:1,,,,,,,,,,...,,,2.724636,2.576109,,,,,2.842845,
1,GDSC:1001,7.258918,9.131374,10.182594,8.332992,8.597157,7.606939,8.168512,8.021503,7.517325,...,5.794405,7.748403,7.89933,6.896593,9.338678,9.200637,6.464226,,9.426281,8.916867
2,GDSC:1004,-3.802467,-5.702659,-4.499864,-4.366928,-4.56304,-4.633045,-6.050113,-6.618734,-5.374042,...,-5.929886,-4.435285,-5.623723,-5.886495,-2.237481,-5.476621,-4.985852,,-1.382754,-2.206867
3,GDSC:1005,4.146364,4.551663,1.772586,3.263056,4.09198,3.090641,1.67343,1.523128,0.794987,...,2.009858,2.576495,3.462176,2.572676,5.70624,2.498858,1.850879,,4.143535,5.344525
4,GDSC:1006,3.171367,-2.014854,-1.818771,0.252367,-1.688469,0.020116,-0.460778,-0.877788,-0.103689,...,-0.121731,-0.889294,0.681844,-2.04162,1.454613,-1.346782,-1.255548,,2.06733,1.230528


In [40]:
# combine all multi-omic features

In [41]:
# first_two_omics = cancer_cell_copy_num_df.merge(cancer_cell_gen_expr_df, on = ["Unnamed: 0"])

In [42]:
# first_three_omics = first_two_omics.merge(cancer_cell_gen_methy_df, on = ["Unnamed: 0"])

In [43]:
# first_four_omics = first_three_omics.merge(cancer_cell_gen_mut_df, on = ["Unnamed: 0"])

In [44]:
# first_four_omics.shape

In [45]:
drug_ids_gdsc = [int(item.split(":")[-1]) for item in cancer_cell_lines_drug_ic_50["Unnamed: 0"].values]

In [46]:
len(set(pubchem_drugs_smiles_df["drug_id"].values).intersection(drug_ids_gdsc))/len(set(pubchem_drugs_smiles_df["drug_id"].values))

1.0

In [47]:
# get the responses for the cancer cell lines and drugs

In [48]:
import deepchem as dc
from rdkit import Chem



In [49]:
sample_smile = pubchem_drugs_smiles_df["Smiles"].values[0]

In [50]:
from rdkit import Chem
import rdkit
from rdkit.Chem import Descriptors

In [51]:
from joblib import Parallel, delayed

In [52]:
## function from link: https://greglandrum.github.io/rdkit-blog/posts/2022-12-23-descriptor-tutorial.html
def getMolDescriptors(smiles, missingVal=None):
    ''' calculate the full list of descriptors for a molecule
    
        missingVal is used if the descriptor cannot be calculated
    '''
    mol = Chem.MolFromSmiles(smiles)
    res = {}
    for nm,fn in Descriptors._descList:
        # some of the descriptor fucntions can throw errors if they fail, catch those here:
        try:
            val = fn(mol)
        except:
            # print the error message:
            import traceback
            traceback.print_exc()
            # and set the descriptor value to whatever missingVal is
            val = missingVal
        res[nm] = val
    return res

In [53]:
catch = Parallel(n_jobs=15, verbose = 1)(delayed(getMolDescriptors)(i) for i in pubchem_drugs_smiles_df["Smiles"].values)

[Parallel(n_jobs=15)]: Using backend LokyBackend with 15 concurrent workers.
[Parallel(n_jobs=15)]: Done  20 tasks      | elapsed:    0.6s
[Parallel(n_jobs=15)]: Done 238 out of 238 | elapsed:    0.8s finished


In [54]:
rdkit_features = pd.DataFrame(catch)

In [55]:
rdkit_features.isnull().sum().sum()

8

In [56]:
rdkit_features = rdkit_features.fillna(rdkit_features.mean())

In [57]:
rdkit_features.shape

(238, 209)

In [58]:
# rdkit_features.head()

In [59]:
# rdkit_features.var()

In [60]:
# plt.hist(rdkit_features.var())
# plt.show()

In [61]:
from sklearn.feature_selection import VarianceThreshold

In [62]:
vt = VarianceThreshold(1)

In [63]:
vt.fit(rdkit_features)

In [64]:
selected_cols = vt.get_feature_names_out(rdkit_features.columns)

In [65]:
rdkit_features = pd.DataFrame(vt.transform(rdkit_features))

In [66]:
rdkit_features.columns = selected_cols

In [67]:
pubchem_drugs_smiles_df.shape

(238, 3)

In [68]:
pubchem_drugs_smiles_rdkit_df = pd.concat([pubchem_drugs_smiles_df, rdkit_features], 1)

  pubchem_drugs_smiles_rdkit_df = pd.concat([pubchem_drugs_smiles_df, rdkit_features], 1)


In [69]:
pubchem_drugs_smiles_rdkit_df.shape

(238, 109)

In [70]:
pubchem_drugs_smiles_rdkit_df = pubchem_drugs_smiles_rdkit_df.drop("PubCHEM", 1)

  pubchem_drugs_smiles_rdkit_df = pubchem_drugs_smiles_rdkit_df.drop("PubCHEM", 1)


In [71]:
pubchem_drugs_smiles_rdkit_df.head()

Unnamed: 0,drug_id,Smiles,MaxAbsEStateIndex,MaxEStateIndex,MinEStateIndex,MolWt,HeavyAtomMolWt,ExactMolWt,NumValenceElectrons,BCUT2D_MWHI,...,fr_C_O,fr_C_O_noCOO,fr_NH0,fr_NH1,fr_alkyl_halide,fr_allylic_oxid,fr_aniline,fr_bicyclic,fr_ether,fr_halogen
0,1242,COc1cc(O)c2c(c1)C=CCC(O)C(O)C(=O)C=CCC(C)OC2=O,12.449593,12.449593,-1.546408,362.378,340.202,362.136553,140.0,16.543459,...,2.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0
1,179,O=c1[nH]cc(F)c(=O)[nH]1,11.984815,11.984815,-1.001157,130.078,127.054,130.017856,48.0,19.14222,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0
2,86,Cc1[nH]nc2ccc(-c3cncc(OCC(N)Cc4c[nH]c5ccccc45)...,6.350272,6.350272,-0.113155,397.482,374.298,397.19026,150.0,16.478404,...,0.0,0.0,2.0,2.0,0.0,0.0,0.0,2.0,1.0,0.0
3,55,COc1cc(-c2nn(C3CCC(N4CCN(C(C)=O)CC4)CC3)c3ncnc...,13.336041,13.336041,-0.228281,621.746,582.434,621.317586,238.0,16.466511,...,2.0,2.0,7.0,1.0,0.0,0.0,2.0,2.0,1.0,0.0
4,1001,NC(=O)c1ncn(C2OC(COP(=O)(O)O)C(O)C2O)c1N,11.080113,11.080113,-4.752405,338.213,323.093,338.06275,124.0,31.204462,...,1.0,1.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0


In [72]:
pubchem_drugs_smiles_rdkit_df.isnull().sum().sum()

0

In [73]:
pubchem_drugs_smiles_df = pubchem_drugs_smiles_rdkit_df.copy()[["drug_id", "Smiles"]]

In [74]:
pubchem_drugs_smiles_df.head()

Unnamed: 0,drug_id,Smiles
0,1242,COc1cc(O)c2c(c1)C=CCC(O)C(O)C(=O)C=CCC(C)OC2=O
1,179,O=c1[nH]cc(F)c(=O)[nH]1
2,86,Cc1[nH]nc2ccc(-c3cncc(OCC(N)Cc4c[nH]c5ccccc45)...
3,55,COc1cc(-c2nn(C3CCC(N4CCN(C(C)=O)CC4)CC3)c3ncnc...
4,1001,NC(=O)c1ncn(C2OC(COP(=O)(O)O)C(O)C2O)c1N


In [75]:
pubchem_drugs_smiles_df.to_csv('..//data//drugs_smile_strings.csv', index = False)

In [76]:
pubchem_drugs_rdkit_df = pubchem_drugs_smiles_rdkit_df.copy().drop(["Smiles"], 1)

  pubchem_drugs_rdkit_df = pubchem_drugs_smiles_rdkit_df.copy().drop(["Smiles"], 1)


In [77]:
pubchem_drugs_rdkit_df.head()

Unnamed: 0,drug_id,MaxAbsEStateIndex,MaxEStateIndex,MinEStateIndex,MolWt,HeavyAtomMolWt,ExactMolWt,NumValenceElectrons,BCUT2D_MWHI,BCUT2D_MRHI,...,fr_C_O,fr_C_O_noCOO,fr_NH0,fr_NH1,fr_alkyl_halide,fr_allylic_oxid,fr_aniline,fr_bicyclic,fr_ether,fr_halogen
0,1242,12.449593,12.449593,-1.546408,362.378,340.202,362.136553,140.0,16.543459,5.969311,...,2.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0
1,179,11.984815,11.984815,-1.001157,130.078,127.054,130.017856,48.0,19.14222,4.826193,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0
2,86,6.350272,6.350272,-0.113155,397.482,374.298,397.19026,150.0,16.478404,5.861654,...,0.0,0.0,2.0,2.0,0.0,0.0,0.0,2.0,1.0,0.0
3,55,13.336041,13.336041,-0.228281,621.746,582.434,621.317586,238.0,16.466511,6.071345,...,2.0,2.0,7.0,1.0,0.0,0.0,2.0,2.0,1.0,0.0
4,1001,11.080113,11.080113,-4.752405,338.213,323.093,338.06275,124.0,31.204462,7.4583,...,1.0,1.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0


In [78]:
pubchem_drugs_rdkit_df["drug_id"] = pubchem_drugs_rdkit_df["drug_id"].astype(str)

In [79]:
pubchem_drugs_rdkit_model = get_emb_models(pubchem_drugs_rdkit_df, "drug_id", False)

  weights = dataset.drop(id_col, 1).values


In [80]:
pubchem_drugs_rdkit_model.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization_4 (TextV  (None, 1)                0         
 ectorization)                                                   
                                                                 
 embedding_4 (Embedding)     (None, 1, 106)            25440     
                                                                 
 flatten_4 (Flatten)         (None, 106)               0         
                                                                 
Total params: 25,440
Trainable params: 0
Non-trainable params: 25,440
_________________________________________________________________


In [81]:
pubchem_drugs_rdkit_model.save("..//Models//pubchem_drugs_rdkit_model_no_norm")





INFO:tensorflow:Assets written to: ..//Models//pubchem_drugs_rdkit_model_no_norm/assets


INFO:tensorflow:Assets written to: ..//Models//pubchem_drugs_rdkit_model_no_norm/assets


In [82]:
cancer_cell_line_drug_ic50 = cancer_cell_lines_drug_ic_50.melt(id_vars=['Unnamed: 0'], var_name="Cancer_Cell_Line", value_name = "IC50")

In [83]:
cancer_cell_line_drug_ic50.columns = ["drug_id", "Cancer_Cell_Line", "IC50"]

In [84]:
cancer_cell_line_drug_ic50["drug_id"] = cancer_cell_line_drug_ic50["drug_id"].map(lambda x: x.split(":")[-1]).values

In [85]:
cancer_cell_line_drug_ic50 = cancer_cell_line_drug_ic50.dropna()

In [86]:
cancer_cell_line_drug_ic50.shape

(208734, 3)

In [87]:
cancer_cell_line_drug_ic50.head()

Unnamed: 0,drug_id,Cancer_Cell_Line,IC50
1,1001,ACH-002137,7.258918
2,1004,ACH-002137,-3.802467
3,1005,ACH-002137,4.146364
4,1006,ACH-002137,3.171367
5,1007,ACH-002137,-4.959442


In [88]:
cancer_cell_line_drug_ic50["drug_id"] = cancer_cell_line_drug_ic50["drug_id"].astype(np.int64)

In [89]:
cancer_cell_line_drug_ic50.shape

(208734, 3)

In [90]:
cancer_cell_line_drug_ic50.isnull().sum().sum()

0

In [91]:
cancer_cell_line_drug_ic50.to_csv('..//data//drugs_cell_lines_ic50.csv', index = False)