# Python 3

In [1]:
import numpy as np
import networkx as nx
import scipy.sparse as sp
import pickle
import time
from itertools import combinations, chain
from joblib import Parallel, delayed
import datetime as dt

In [13]:
# Parameters
n_genes = 16269
g1,g2 = 561,29
n_drugs = 630
n_se_combo = 6
n_se_mono = 9688
n_pf = 5

In [16]:
# Creates random network as graph
start = time.time()
assert g1*g2==n_genes
gene_net = nx.planted_partition_graph(g1, g2, 0.15, 0.05, seed=42)
ppi_adj = nx.adjacency_matrix(gene_net)
print('PPI adjacency matrix created in ',dt.timedelta(seconds=time.time()-start))
ppi_degrees = np.array(ppi_adj.sum(axis=0)).squeeze()
print('The PPI adj matrix is filled in a',round(np.sum(ppi_adj)/pow(n_genes,2)*100,2),'%')

PPI adjacency matrix created in  0:00:40.433140
The PPI adj matrix is filled in a 5.02 %


In [17]:
# Creates random adjacency matrix for genes and drugs
start = time.time()
dti_adj = sp.csr_matrix((10 * np.random.randn(n_genes, n_drugs) > 15).astype(int))
print('DTI adjacency matrix created in ',dt.timedelta(seconds=time.time()-start))

DTI adjacency matrix created in  0:00:00.451586


In [18]:
# DDI adjacency matrices
start = time.time()
tmp = np.dot(dti_adj.T, dti_adj)
def se_adj_matrix(i):
    mat = np.zeros([n_drugs,n_drugs],dtype=int)
    for d1, d2 in combinations(list(range(n_drugs)), 2):
        if tmp[d1, d2] == i + 5:
            mat[d1, d2] = mat[d2, d1] = 1
    return sp.csr_matrix(mat) 
ddi_adj_list = Parallel(n_jobs=8)\
    (delayed(se_adj_matrix)(d) for d in range(n_se_combo))        
ddi_degrees_list = [np.array(drug_adj.sum(axis=0)).squeeze() for drug_adj in ddi_adj_list]
print('DDI adjacency matrices created in ',dt.timedelta(seconds=time.time()-start))

DDI adjacency matrices created in  0:00:06.483239


# Saving

In [19]:
filename = './data_structures/DS_toy_DSE_' + str(n_se_mono) + '_PF_'+str(n_pf) +\
'_genes_'+str(n_genes) + '_drugs_' + str(n_drugs) + '_se_' + str(n_se_combo)
data = {}
# Dictionaries & numbers
data['n_genes'] = n_genes
data['n_drugs'] = n_drugs
data['n_se_combo'] = n_se_combo
data['n_se_mono'] = n_se_mono
# DDI
data['ddi_adj_list'] = ddi_adj_list
data['ddi_degrees_list'] = ddi_degrees_list
# DTI
data['dti_adj'] = dti_adj
# PPI
data['ppi_adj'] = ppi_adj
data['ppi_degrees'] = ppi_degrees
# DSE
data['drug_feat'] = sp.csr_matrix((10 * np.random.randn(n_drugs, n_se_mono) > 15).astype(int))
# PF
data['prot_feat'] = sp.csr_matrix((10 * np.random.randn(n_genes, n_pf) > 15).astype(int))

In [20]:
print(filename)

./data_structures/DS_toy_DSE_9688_PF_5_genes_16269_drugs_630_se_6


In [21]:
with open(filename, 'wb') as f:
    pickle.dump(data, f, protocol=2)