In [1]:
import numpy as np
import pandas as pd
import pickle 
import gzip

In [2]:
#contains the data in both feature ordering ways (drug A - drug B - cell line and drug B - drug A - cell line)
#in the first half of the data the features are ordered (drug A - drug B - cell line)
#in the second half of the data the features are ordered (drug B - drug A - cell line)
file = gzip.open('X.p.gz', 'rb')
X = pickle.load(file)
file.close()

In [3]:
#contains synergy values and fold split (numbers 0-4)
labels = pd.read_csv('labels.csv', index_col=0)
#labels are duplicated for the two different ways of ordering in the data
labels = pd.concat([labels, labels])

In [4]:
labels.shape

(46104, 5)

In [5]:
X.shape

(46104, 12758)

In [6]:
len_features = 1309+802+2276
len_cell_line = 3984

print(len_features)
print(len_cell_line)

4387
3984


In [7]:
drugsFeatures = X[:,:len(X[0])-len_cell_line]
dict_drug2features = {}

for i, drug_a in enumerate(labels['drug_a_name']):
    if drug_a in dict_drug2features.keys():
        continue
    else:
        dict_drug2features[drug_a] = drugsFeatures[i,:len_features]

for i, drug_b in enumerate(labels['drug_b_name']):
    if drug_b in dict_drug2features.keys():
        continue
    else:
        dict_drug2features[drug_b] = drugsFeatures[i,:len_features]

In [8]:
print(len(dict_drug2features.keys()))
dict_drug2features.keys()

38


dict_keys(['5-FU', 'ABT-888', 'AZD1775', 'BEZ-235', 'BORTEZOMIB', 'CARBOPLATIN', 'CYCLOPHOSPHAMIDE', 'DASATINIB', 'DEXAMETHASONE', 'DINACICLIB', 'DOXORUBICIN', 'ERLOTINIB', 'ETOPOSIDE', 'GELDANAMYCIN', 'GEMCITABINE', 'L778123', 'LAPATINIB', 'METFORMIN', 'METHOTREXATE', 'MITOMYCINE', 'MK-2206', 'MK-4541', 'MK-4827', 'MK-5108', 'MK-8669', 'MRK-003', 'OXALIPLATIN', 'PACLITAXEL', 'PD325901', 'SN-38', 'SORAFENIB', 'SUNITINIB', 'TEMOZOLOMIDE', 'TOPOTECAN', 'VINBLASTINE', 'VINORELBINE', 'ZOLINZA', 'MK-8776'])

In [9]:
print(len(dict_drug2features['5-FU']))
dict_drug2features['5-FU']

4387


array([ 0.02631579,  0.05263158,  0.02631579, ..., 16.345     ,
        1.993     ,  0.        ])

In [10]:
pd_drug2features = pd.DataFrame(dict_drug2features)
pd_drug2features.head()

Unnamed: 0,5-FU,ABT-888,AZD1775,BEZ-235,BORTEZOMIB,CARBOPLATIN,CYCLOPHOSPHAMIDE,DASATINIB,DEXAMETHASONE,DINACICLIB,...,PD325901,SN-38,SORAFENIB,SUNITINIB,TEMOZOLOMIDE,TOPOTECAN,VINBLASTINE,VINORELBINE,ZOLINZA,MK-8776
0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316
1,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
2,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316
3,1.0,1.0,1.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,...,1.0,1.0,1.0,2.0,1.0,1.0,0.0,0.0,2.0,1.0
4,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


In [11]:
import pickle

with open("dict_drugName2myEncode", "rb") as fp:   # Unpickling
    dict_drug2number = pickle.load(fp)

In [12]:
dict_drug2number

{'GELDANAMYCIN': 0,
 'ABT-888': 1,
 'SN-38': 2,
 'DINACICLIB': 3,
 'MK-8776': 4,
 'TOPOTECAN': 5,
 'AZD1775': 6,
 'LAPATINIB': 7,
 'PD325901': 8,
 'SORAFENIB': 9,
 'METHOTREXATE': 10,
 'ERLOTINIB': 11,
 '5-FU': 12,
 'METFORMIN': 13,
 'BEZ-235': 14,
 'VINBLASTINE': 15,
 'MK-4827': 16,
 'MK-2206': 17,
 'ETOPOSIDE': 18,
 'SUNITINIB': 19,
 'CYCLOPHOSPHAMIDE': 20,
 'MK-5108': 21,
 'DEXAMETHASONE': 22,
 'DASATINIB': 23,
 'L778123': 24,
 'ZOLINZA': 25,
 'GEMCITABINE': 26,
 'PACLITAXEL': 27,
 'MK-8669': 28,
 'VINORELBINE': 29,
 'DOXORUBICIN': 30,
 'BORTEZOMIB': 31}

In [13]:
len(dict_drug2number.keys())

32

In [14]:
pd_drug2features.head()

Unnamed: 0,5-FU,ABT-888,AZD1775,BEZ-235,BORTEZOMIB,CARBOPLATIN,CYCLOPHOSPHAMIDE,DASATINIB,DEXAMETHASONE,DINACICLIB,...,PD325901,SN-38,SORAFENIB,SUNITINIB,TEMOZOLOMIDE,TOPOTECAN,VINBLASTINE,VINORELBINE,ZOLINZA,MK-8776
0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316
1,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
2,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316
3,1.0,1.0,1.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,...,1.0,1.0,1.0,2.0,1.0,1.0,0.0,0.0,2.0,1.0
4,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


In [15]:
pd_drug2my_features = pd_drug2features[list(dict_drug2number.keys())]
print(pd_drug2my_features.shape)
pd_drug2my_features.head()

(4387, 32)


Unnamed: 0,GELDANAMYCIN,ABT-888,SN-38,DINACICLIB,MK-8776,TOPOTECAN,AZD1775,LAPATINIB,PD325901,SORAFENIB,...,DEXAMETHASONE,DASATINIB,L778123,ZOLINZA,GEMCITABINE,PACLITAXEL,MK-8669,VINORELBINE,DOXORUBICIN,BORTEZOMIB
0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.052632,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,...,0.0,1.0,1.0,2.0,0.0,1.0,1.0,0.0,0.0,2.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [16]:
import os
from os.path import dirname
import torch

path2save = dirname(os.getcwd())+os.sep+'synergy-prediction-gcn-2'+os.sep+'applyModelONeil'+os.sep+'data'+os.sep

matrix_drug2my_features = torch.tensor(pd_drug2my_features.values)
#torch.save(matrix_drug2my_features,path2save+'drug_feature_matrix_deepSynergy')

  from .autonotebook import tqdm as notebook_tqdm


In [17]:
print(len(matrix_drug2my_features[:1309,0]))
print(matrix_drug2my_features[:1309,0].min())
print(matrix_drug2my_features[:1309,0].max())
matrix_drug2my_features[:1309,0]

1309
tensor(0., dtype=torch.float64)
tensor(29., dtype=torch.float64)


tensor([0., 0., 0.,  ..., 0., 0., 0.], dtype=torch.float64)

In [32]:
float(torch.tensor(0., dtype=torch.float64))

0.0

In [39]:
print( sorted(set([float(x) for x in matrix_drug2my_features[:1309,0]])))

[0.0, 0.02631578947368421, 0.05263157894736842, 0.07894736842105263, 0.13157894736842105, 0.42105263157894735, 0.7631578947368421, 1.0, 2.0, 3.0, 4.0, 9.0, 29.0]


In [40]:
print(matrix_drug2my_features[:1309,:].max())

tensor(53., dtype=torch.float64)


In [29]:
print(len(matrix_drug2my_features[1309:802+1309,0]))
print(matrix_drug2my_features[1309:802+1309,0].min())
print(matrix_drug2my_features[1309:802+1309,0].max())
matrix_drug2my_features[1309:802+1309,0]

802
tensor(0., dtype=torch.float64)
tensor(1., dtype=torch.float64)


tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0789, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 

In [28]:
print(len(matrix_drug2my_features[802+1309:,0]))
print(matrix_drug2my_features[802+1309:,0].min())
print(matrix_drug2my_features[802+1309:,0].max())
matrix_drug2my_features[802+1309:,0]

2276
tensor(-2214.8870, dtype=torch.float64)
tensor(22421.9590, dtype=torch.float64)


tensor([  0.0000,   0.0000,   0.0000,  ..., 102.8270,   5.4850,   0.0000],
       dtype=torch.float64)

In [28]:
pd_drug2my_features.values

array([[  0.        ,   0.        ,   0.        , ...,   0.        ,
          0.        ,   0.        ],
       [  0.        ,   0.        ,   0.        , ...,   0.        ,
          0.        ,   0.        ],
       [  0.        ,   0.        ,   0.        , ...,   0.        ,
          0.        ,   0.        ],
       ...,
       [102.827     ,  36.277     , 186.8852973 , ..., 178.766     ,
        107.801     ,  64.523     ],
       [  5.485     ,   3.136     ,   3.95327027, ...,   4.671     ,
          4.393     ,   3.886     ],
       [  0.        ,   0.        ,   0.        , ...,   0.        ,
          0.        ,   0.        ]])

In [40]:
drug_feature_matrix_deepSynergy = torch.load(path2save+'drug_feature_matrix_deepSynergy')

In [41]:
drug_feature_matrix_deepSynergy

tensor([[  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
        ...,
        [102.8270,  36.2770, 186.8853,  ..., 178.7660, 107.8010,  64.5230],
        [  5.4850,   3.1360,   3.9533,  ...,   4.6710,   4.3930,   3.8860],
        [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000]],
       dtype=torch.float64)

In [42]:
type(drug_feature_matrix_deepSynergy[0][0])

torch.Tensor