In [5]:
import tables

import pandas as pd
from neo4j.v1 import GraphDatabase
import hetio.readwrite
import hetio.neo4j
import hetio.pathtools
import numpy as np
from scipy import sparse
import time
from hetmech.matrix import get_node_to_position, metaedge_to_adjacency_matrix

In [3]:
url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0.json.bz2'
graph = hetio.readwrite.read_graph(url)
metagraph = graph.metagraph

# NPY Saving and loading

In [38]:
r, c, gig_adj = metaedge_to_adjacency_matrix(graph, 'GiG')
r, c, ctd_adj = metaedge_to_adjacency_matrix(graph, 'CtD')
r, c, sparse_gig_adj = metaedge_to_adjacency_matrix(graph, 'GiG', sparse_threshold=1)
r, c, sparse_ctd_adj = metaedge_to_adjacency_matrix(graph, 'CtD', sparse_threshold=1)

## Save matrices

In [17]:
# Dense matrices into .npy
t1 = time.time()
np.save('data/gig_adj', gig_adj)
np.save('data/ctd_adj', ctd_adj)
t2 = time.time()
print(f'{(t2 - t1)*1000} ms')

930.0286769866943 ms


In [43]:
# Sparse matrices into .npy
t1 = time.time()
np.save('data/sparse_gig_adj', sparse_gig_adj)
np.save('data/sparse_ctd_adj', sparse_ctd_adj)
t2 = time.time()
print(f'{(t2 - t1)*1000} ms')

19.55270767211914 ms


## Load matrices

In [21]:
# Dense matrices out of .npy
t1 = time.time()
gig_adj_load = np.load('data/gig_adj.npy')
ctd_adj_load = np.load('data/ctd_adj.npy')
t2 = time.time()
print(f'{(t2 - t1)*1000} ms')

163.57731819152832 ms


In [68]:
# Sparse matrices out of .npy
t1 = time.time()
sparse_gig_adj_load = np.load('data/sparse_gig_adj.npy')
sparse_ctd_adj_load = np.load('data/sparse_ctd_adj.npy')
t2 = time.time()
print(f'{(t2 - t1)*1000} ms')

11.081695556640625 ms


# HDF5 Saving and loading

In [25]:
abbr = ['AdG', 'AeG', 'AuG', 'CbG', 'CcSE', 'CdG', 'CpD', 'CrC', 'CtD', 'CuG', 'DaG', 'DdG', 
        'DlA', 'DpS', 'DrD', 'DuG', 'GcG', 'GiG', 'GpBP', 'GpCC', 'GpMF', 'GpPW', 'Gr>G', 'PCiC']

In [28]:
h5file = tables.open_file('data/sparse_matrices.h5', mode='w', title='Adjacency Matrices')

for edge in abbr:
    group_path = f'{edge[0]}{edge[-1]}'
    try:
        h5file.create_group('/', group_path)
    except:
        pass
    r, c, sparse_matrix = metaedge_to_adjacency_matrix(graph, edge, sparse_threshold=1)
    group = h5file.create_group('/'+group_path+'/', edge)
    h5file.create_array(group, edge+'_data', sparse_matrix.data)
    h5file.create_array(group, edge+'_indices', sparse_matrix.indices)
    h5file.create_array(group, edge+'_indptr', sparse_matrix.indptr)
    h5file.create_array(group, edge+'_shape', sparse_matrix.shape)

h5file.close()



In [46]:
t1 = time.time()
h5file = tables.open_file('data/sparse_matrices.h5', 'a')

node = 'CtD'
data = h5file.get_node(f'/{node[0]}{node[-1]}/{node}/{node}_data').read()
indices = h5file.get_node(f'/{node[0]}{node[-1]}/{node}/{node}_indices').read()
indptr = h5file.get_node(f'/{node[0]}{node[-1]}/{node}/{node}_indptr').read()
mat_shape = h5file.get_node(f'/{node[0]}{node[-1]}/{node}/{node}_shape').read()

sparse_ctd_load = sparse.csc_matrix((data, indices, indptr), shape=mat_shape)

node = 'GiG'
data = h5file.get_node(f'/{node[0]}{node[-1]}/{node}/{node}_data').read()
indices = h5file.get_node(f'/{node[0]}{node[-1]}/{node}/{node}_indices').read()
indptr = h5file.get_node(f'/{node[0]}{node[-1]}/{node}/{node}_indptr').read()
mat_shape = h5file.get_node(f'/{node[0]}{node[-1]}/{node}/{node}_shape').read()

sparse_gig_load = sparse.csc_matrix((data, indices, indptr), shape=mat_shape)

h5file.close()
t2 = time.time()
print(f'{(t2 - t1)*1000} ms')

4.135847091674805 ms
