# Merged data
Loads the bdm data calculated in the different servers and saves it in a single file to be loaded by the deep learning model. 

In [1]:
import numpy as np
import scipy.sparse as sp
import pickle
import shelve
from pybdm import BDM
from algorithms import PerturbationExperiment, NodePerturbationExperiment
import datetime

## Loading data
The final version can receive as parameter the filename of the data shelf 

### Database structures

In [2]:
filename = './data_structures/DS_se964_2020_05_27'
with open(filename, 'rb') as f:
    DS = pickle.load(f)
    for key in DS.keys():
        globals()[key]=DS[key]
        print(key,"Imported successfully")

gene2idx Imported successfully
drug2idx Imported successfully
se_mono_name2idx Imported successfully
se_combo_name2idx Imported successfully
ddi_adj_list Imported successfully
ddi_degrees_list Imported successfully
dti_adj Imported successfully
ppi_adj Imported successfully
ppi_degrees Imported successfully
drug_feat Imported successfully
prot_feat Imported successfully
norm_prot_feat Imported successfully


### BDM DDI

In [91]:
filename = './data_structures/ddi_bdm_se3_drugs513_juan8'
with open(filename, 'rb') as f:
    ddi = pickle.load(f)
    for key in ddi.keys():
        globals()[key]=ddi[key]
        print(key,"Imported successfully")

nodebdm_ddi_list Imported successfully
edgebdm_ddi_list Imported successfully
vms_ddi Imported successfully
rss_ddi Imported successfully
time_ddi Imported successfully
jobs_ddi Imported successfully


In [92]:
# concatenation ddi-bdm feature vectors
nodebdm_ddi_array = np.hstack([i.reshape(-1,1) for i in nodebdm_ddi_list])
edgebdm_ddi_array = np.hstack([i.reshape(-1,1) for i in edgebdm_ddi_list])

In [93]:
print(nodebdm_ddi_array.shape)
print(edgebdm_ddi_array.shape)
nd, se = nodebdm_ddi_array.shape

(513, 3)
(513, 3)


### BDM DTI

In [None]:
filename = ''
with open(filename, 'rb') as f:
    dti = pickle.load(f)
    for key in dti.keys():
        globals()[key]=dti[key]
        print(key,"Imported successfully")

### BDM PPI

In [None]:
filename = ''
with open(filename, 'rb') as f:
    ppi = pickle.load(f)
    for key in ppi.keys():
        globals()[key]=ppi[key]
        print(key,"Imported successfully")

## Importing of toy data

In [47]:
filename = './data_structures/toyDS_genes16271_drugs639_se964_2020_05_28'
with open(filename, 'rb') as f:
    DS = pickle.load(f)
    for key in DS.keys():
        globals()[key]=DS[key]
        print(key,"Imported successfully")

ddi_adj_list Imported successfully
ddi_degrees_list Imported successfully
dti_adj Imported successfully
ppi_adj Imported successfully
ppi_degrees Imported successfully
drug_feat Imported successfully
prot_feat Imported successfully


## Concatenation fo features

In [48]:
def sparse_to_tuple(sparse_mx):
    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    values = sparse_mx.data
    shape = sparse_mx.shape
    return coords, values, shape

In [49]:
gene_nonzero_feat, gene_num_feat = 2*[prot_feat.shape[1]]
drug_nonzero_feat, drug_num_feat = 2*[drug_feat.shape[1]]
drug_feat = sparse_to_tuple(drug_feat.tocoo())
gene_feat = sparse_to_tuple(prot_feat.tocoo())

In [51]:
print(gene_nonzero_feat,gene_num_feat,drug_nonzero_feat,drug_num_feat)

16271 16271 639 639


# Creation of Decagon dictionaries

In [52]:
adj_mats_orig = {
    (0, 0): [ppi_adj, ppi_adj.transpose(copy=True)],
    (0, 1): [dti_adj],
    (1, 0): [dti_adj.transpose(copy=True)],
    (1, 1): ddi_adj_list + [x.transpose(copy=True) for x in ddi_adj_list],
}

In [53]:
degrees = {
    0: [ppi_degrees, ppi_degrees],
    1: ddi_degrees_list + ddi_degrees_list, 
}

In [54]:
edge_type2dim = {k: [adj.shape for adj in adjs] for k, adjs in adj_mats_orig.items()}

In [55]:
edge_type2decoder = {
    (0, 0): 'bilinear',
    (0, 1): 'bilinear',
    (1, 0): 'bilinear',
    (1, 1): 'dedicom',
}

In [56]:
edge_types = {k: len(v) for k, v in adj_mats_orig.items()}

In [57]:
num_edge_types = sum(edge_types.values())
print("Edge types:", "%d" % num_edge_types)

Edge types: 1932


In [58]:
num_feat = {
    0: gene_num_feat,
    1: drug_num_feat,
}

In [59]:
nonzero_feat = {
    0: gene_nonzero_feat,
    1: drug_nonzero_feat,
}

In [60]:
feat = {
    0: gene_feat,
    1: drug_feat,
}

## Exporting

In [61]:
now = datetime.datetime.now() # current date and time
year = now.strftime("%Y")
month = now.strftime("%m")
day = now.strftime("%d")
filename = './data_structures/DECAGON_toy_no_feat'

In [62]:
data_structures = {}
# Graph data structures
data_structures['adj_mats_orig'] = adj_mats_orig
data_structures['degrees'] = degrees
data_structures['edge_type2dim'] = edge_type2dim
data_structures['edge_type2decoder'] = edge_type2decoder
data_structures['edge_types'] = edge_types
data_structures['num_edge_types'] = num_edge_types
# Feature data structures
data_structures['num_feat'] = num_feat
data_structures['nonzero_feat'] = nonzero_feat
data_structures['feat'] = feat
# Dictionaries
#data_structures['gene2idx'] = gene2idx
#data_structures['drug2idx'] = drug2idx
#data_structures['se_mono_name2idx'] = se_mono_name2idx
#data_structures['se_combo_name2idx'] = se_combo_name2idx

In [63]:
with open(filename, 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(data_structures, f, protocol=2)

In [64]:
filename

'./data_structures/DECAGON_toy_no_feat'