# PPI BDM
This notebook calculates the algoritmic complexity of the gene network of the DECAGON dataset. The dataset is given as an adjacency matrix of size $N_{genes} \times N_{genes}$. The code uses the package pybdm to calculate the complexity contribution of each node and its corresponding edges. The calculated feature vector along with relevant data are exported as a `pickle` readable format file.

## Python 3

In [1]:
import numpy as np
import time
import os
import psutil
import pickle
import datetime as dt
from pybdm import BDM
from pybdm.partitions import PartitionRecursive
from algorithms import PerturbationExperiment, NodePerturbationExperiment
from getpass import getuser

In [2]:
# Settings and loading of the list of adj matrices
input_file = './data_structures/DS/DS_toy_genes500_drugs400_se3'
start = time.time() 
pid = os.getpid()
ps= psutil.Process(pid)
with open(input_file, 'rb') as f:
    ppi_adj = pickle.load(f)['ppi_adj']
print('Input data loaded')
jobs = 8
usrnm = getuser()
bdm = BDM(ndim=2, partition=PartitionRecursive)

Input data loaded


In [3]:
# Node perturbation
start1 = time.time()
ppi_nodeper = NodePerturbationExperiment(bdm,metric='bdm',bipartite_network=False,
                                         parallel=True,jobs=jobs)
ppi_nodeper.set_data(np.array(ppi_adj.todense()))
print("Initial BDM calculated for nodes")
nodebdm_ppi = ppi_nodeper.run()
print('Node BDM for PPI calculated')
print(dt.timedelta(seconds=time.time()-start1))

Initial BDM calculated for nodes
Node BDM for PPI calculated
0:02:13.105046


In [4]:
# Edge perturbation
start1 = time.time()
ppi_edgeper = PerturbationExperiment(bdm, bipartite_network=False)
ppi_edgeper.set_data(np.array(ppi_adj.todense()))
print("Initial BDM calculated for nodes")
add_edgebdm_ppi = ppi_edgeper.run_adding_edges()
rem_edgebdm_ppi = ppi_edgeper.run_removing_edges()
print('Edge BDM for PPI calculated')
print(dt.timedelta(seconds=time.time()-start1))

Initial BDM calculated for nodes
Edge BDM for PPI calculated
0:00:27.929216


In [5]:
# EXPORTING
genes = len(nodebdm_ppi)
memUse = ps.memory_info()
total_time = time.time()-start
output_data = {}
output_data['nodebdm_ppi'] = nodebdm_ppi
output_data['add_edgebdm_ppi'] = add_edgebdm_ppi
output_data['rem_edgebdm_ppi'] = rem_edgebdm_ppi
output_data['vms_ppi'] = memUse.vms
output_data['rss_ppi'] = memUse.rss
output_data['time_ppi'] = total_time
output_data['jobs_ppi'] = jobs

In [6]:
path = os.getcwd()
words = input_file.split('_')
print(words)

['./data', 'structures/DS/DS', 'toy', 'genes500', 'drugs400', 'se3']


In [8]:
output_file = path + '/data_structures/BDM/PPI_BDM_' + words[2] + 'genes' + str(genes) + '_'\
             + usrnm + str(jobs)
with open(output_file, 'wb') as f:
    pickle.dump(output_data, f, protocol=3)
print('Output data exported')

Output data exported
