# Merged data
Loads the bdm data calculated in the different servers and saves it in a single file to be loaded by the deep learning model. 

In [1]:
import numpy as np
import scipy.sparse as sp
import pickle
import shelve
from pybdm import BDM
from algorithms import PerturbationExperiment, NodePerturbationExperiment
import datetime

## Loading data
The final version can receive as parameter the filename of the data shelf 

### Database structures

In [89]:
filename = './data_structures/DS_se3_2020_05_26'
with open(filename, 'rb') as f:
    DS = pickle.load(f)
    for key in DS.keys():
        globals()[key]=DS[key]
        print(key,"Imported successfully")

gene2idx Imported successfully
drug2idx Imported successfully
se_mono_name2idx Imported successfully
se_combo_name2idx Imported successfully
ddi_adj_list Imported successfully
ddi_degrees_list Imported successfully
dti_adj Imported successfully
ppi_adj Imported successfully
ppi_degrees Imported successfully
drug_feat Imported successfully
prot_feat Imported successfully
norm_prot_feat Imported successfully


In [90]:
print(drug_feat.shape)

(513, 9479)


### BDM DDI

In [91]:
filename = './data_structures/ddi_bdm_se3_drugs513_juan8'
with open(filename, 'rb') as f:
    ddi = pickle.load(f)
    for key in ddi.keys():
        globals()[key]=ddi[key]
        print(key,"Imported successfully")

nodebdm_ddi_list Imported successfully
edgebdm_ddi_list Imported successfully
vms_ddi Imported successfully
rss_ddi Imported successfully
time_ddi Imported successfully
jobs_ddi Imported successfully


In [92]:
# concatenation ddi-bdm feature vectors
nodebdm_ddi_array = np.hstack([i.reshape(-1,1) for i in nodebdm_ddi_list])
edgebdm_ddi_array = np.hstack([i.reshape(-1,1) for i in edgebdm_ddi_list])

In [93]:
print(nodebdm_ddi_array.shape)
print(edgebdm_ddi_array.shape)
nd, se = nodebdm_ddi_array.shape

(513, 3)
(513, 3)


### BDM DTI

In [None]:
filename = ''
with open(filename, 'rb') as f:
    dti = pickle.load(f)
    for key in dti.keys():
        globals()[key]=dti[key]
        print(key,"Imported successfully")

### BDM PPI

In [None]:
filename = ''
with open(filename, 'rb') as f:
    ppi = pickle.load(f)
    for key in ppi.keys():
        globals()[key]=ppi[key]
        print(key,"Imported successfully")

## Concatenation fo features

## Exporting

In [None]:
now = datetime.datetime.now() # current date and time
year = now.strftime("%Y")
month = now.strftime("%m")
day = now.strftime("%d")

In [None]:
filename = './data_structures/decagon_se'+se+'_genes'+ng+'_drugs'+nd+'_'+year+'_'+month+'_'+day
new_feat = {}
new_feat['drug_feat'] = sp.csr_matrix(drug_feat)
new_feat['prot_feat'] = sp.csr_matrix(prot_feat)
# Dictionaries
new_feat['gene2idx'] = gene2idx
new_feat['drug2idx'] = drug2idx
new_feat['se_mono_name2idx'] = se_mono_name2idx
new_feat['se_combo_name2idx'] = se_combo_name2idx
# DDI
new_feat['ddi_adj_list'] = ddi_adj_list
new_feat['ddi_degrees_list'] = ddi_degrees_list
# DTI
new_feat['dti_adj'] = dti_adj
# PPI
new_feat['ppi_adj'] = ppi_adj
new_feat['ppi_degrees'] = ppi_degrees
with open(filename, 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(new_feat, f, protocol=2)

# CMM data

In [74]:
data_decagon = shelve.open("./data_structures/decagon_CMM")
print("Data Structures:")
for key in data_decagon:
    globals()[key]=data_decagon[key]
    print(key,"Imported successfully")
data_decagon.close()
print('\n')
print("BDM feature vectors:")
data_bdm = shelve.open("./data_structures/bdm_CMM")
for key in data_bdm:
    globals()[key]=data_bdm[key]
    print(key,"Imported successfully")
data_bdm.close()

Data Structures:
drug2idx Imported successfully
dti_adj Imported successfully
dti_genes Imported successfully
ppi_degrees Imported successfully
ddi_adj_list Imported successfully
prot_feat Imported successfully
ddi_degrees_list Imported successfully
se_mono_name2idx Imported successfully
drug_feat Imported successfully
gene2idx Imported successfully
dti_drugs Imported successfully
ppi_adj Imported successfully


BDM feature vectors:
rss Imported successfully
vms Imported successfully
bdm_drugs_dti Imported successfully
bdm_ddi_list Imported successfully
bdm_ppi Imported successfully
total_time Imported successfully
bdm_genes_dti Imported successfully


In [75]:
arr_ddi = np.hstack([np.array(i[0]).reshape(-1,1) for i in bdm_ddi_list ])
arr_ddi.shape

(476, 3)

In [77]:
bdm_drugsdti = np.array(bdm_genes_dti).reshape(-1,1)
bdm_drugsdti.shape

(476, 1)

In [78]:
drug_feat.shape

(476, 9411)

In [80]:
drug_feat = np.concatenate((drug_feat.todense(),arr_ddi,bdm_drugsdti),axis=1)
print(drug_feat.shape)

(476, 9415)


In [81]:
bdm_genesdti = np.array(bdm_drugs_dti).reshape(-1,1)
bdm_genesdti.shape

(16235, 1)

In [85]:
bdm_ppi = np.array(bdm_ppi).reshape(-1,1)
bdm_ppi.shape

(16235, 1)

In [86]:
prot_feat.shape

(16235, 6)

In [87]:
prot_feat = np.concatenate((prot_feat.todense(),bdm_ppi,bdm_genesdti),axis=1)
prot_feat.shape

(16235, 8)

Re-exporting decagon variables to avoid inconsistencies among python versions

In [None]:
filename = './data_structures/decagon_se'+
new_feat = {}
new_feat['drug_feat'] = sp.csr_matrix(drug_feat)
new_feat['prot_feat'] = sp.csr_matrix(prot_feat)
# Dictionaries
new_feat['gene2idx'] = gene2idx
new_feat['drug2idx'] = drug2idx
new_feat['se_mono_name2idx'] = se_mono_name2idx
# DDI
new_feat['ddi_adj_list'] = ddi_adj_list
new_feat['ddi_degrees_list'] = ddi_degrees_list
# DTI
new_feat['dti_adj'] = dti_adj
# PPI
new_feat['ppi_adj'] = ppi_adj
new_feat['ppi_degrees'] = ppi_degrees
with open(filename, 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(new_feat, f, protocol=2)