# DTI BDM
This notebook calculates the algoritmic complexity of the gene-drug network of the DECAGON dataset. The dataset is given as a bipartite adjacency matrix of size $N_{genes}\times N_{drugs}$. The code uses the package pybdm to calculate the complexity contribution of each node and its corresponding edges in both axis of the matrix separately. The calculated feature vectors are exported along with relevant data as a `pickle` readable format file.

## Python 3

In [1]:
import numpy as np
import time
import os
import psutil
import pickle
import datetime as dt
from pybdm import BDM
from pybdm.partitions import PartitionRecursive
from algorithms import PerturbationExperiment, NodePerturbationExperiment
from getpass import getuser

In [2]:
# Settings and loading of adj matrix
input_file = './data_structures/DS/DS_toy_genes500_drugs400_se3'
start = time.time() 
pid = os.getpid()
ps= psutil.Process(pid)
with open(input_file, 'rb') as f:
    dti_adj = pickle.load(f)['dti_adj']
print('Input data loaded')
jobs = 8
usrnm = getuser()
bdm = BDM(ndim=2, partition=PartitionRecursive)

Input data loaded


In [3]:
# Node perturbation
start1 = time.time()
dti_nodeper = NodePerturbationExperiment(bdm,metric='bdm',bipartite_network=True, 
                                         parallel=True,jobs=jobs)
dti_nodeper.set_data(np.array(dti_adj.todense()))
print("Initial BDM calculated for nodes")
nodebdm_genes_dti,nodebdm_drugs_dti = dti_nodeper.run()
print('BDM for DTI calculated')
print(dt.timedelta(seconds=time.time()-start1))

Initial BDM calculated for nodes
BDM for DTI calculated
0:03:30.641618


In [4]:
# Edge perturbation
start1 = time.time()
dti_edgeper = PerturbationExperiment(bdm, bipartite_network=True)
dti_edgeper.set_data(np.array(dti_adj.todense()))
print("Initial BDM calculated for edges")
add_edgebdm_genes_dti, add_edgebdm_drugs_dti = dti_edgeper.run_adding_edges()
rem_edgebdm_genes_dti, rem_edgebdm_drugs_dti = dti_edgeper.run_removing_edges()
print('Edge BDM for DTI calculated')
print(dt.timedelta(seconds=time.time()-start1))

Initial BDM calculated for edges
Edge BDM for DTI calculated
0:00:15.163457


In [5]:
# EXPORTING
genes,drugs = dti_adj.shape
memUse = ps.memory_info()
total_time = time.time()-start
output_data = {}
output_data['nodebdm_drugs_dti'] = nodebdm_drugs_dti
output_data['nodebdm_genes_dti'] = nodebdm_genes_dti
output_data['add_edgebdm_drugs_dti'] = add_edgebdm_drugs_dti
output_data['add_edgebdm_genes_dti'] = add_edgebdm_genes_dti
output_data['rem_edgebdm_drugs_dti'] = rem_edgebdm_drugs_dti
output_data['rem_edgebdm_genes_dti'] = rem_edgebdm_genes_dti
output_data['vms_dti'] = memUse.vms
output_data['rss_dti'] = memUse.rss
output_data['time_dti'] = total_time
output_data['jobs_dti'] = jobs

In [6]:
path = os.getcwd()
words = input_file.split('_')
print(words)

['./data', 'structures/DS/DS', 'toy', 'genes500', 'drugs400', 'se3']


In [7]:
output_file = path + '/data_structures/BDM/DTI_BDM_' + words[2] + 'genes' + str(genes) +\
             '_drugs' + str(drugs) + '_' + usrnm + str(jobs)
with open(output_file, 'wb') as f:
    pickle.dump(output_data, f, protocol=3)
print('Output data exported')

Output data exported
