# Proof of concept DWWC matrix computation

In [1]:
import pandas
from neo4j.v1 import GraphDatabase
import hetio.readwrite
import hetio.neo4j

from hetmech.diffusion import dwwc, get_node_to_position

In [2]:
url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0.json.bz2'
graph = hetio.readwrite.read_graph(url)
metagraph = graph.metagraph

In [3]:
damping_exponent = 0.4
# CbGeAlD does not contain duplicate nodes, so DWWC is equivalent to DWPC
metapath = metagraph.metapath_from_abbrev('CbGeAlD')
metapath.get_unicode_str()

'Compound–binds–Gene–expresses–Anatomy–localizes–Disease'

In [4]:
%%time
CbGeAlD_pc = dwwc(graph, metapath, damping=0).astype(int)
CbGeAlD_dwwc = dwwc(graph, metapath, damping=damping_exponent)

CPU times: user 6.09 s, sys: 648 ms, total: 6.74 s
Wall time: 5.81 s


In [5]:
CbGeAlD_dwwc.shape

(137, 1552)

In [6]:
# Density
CbGeAlD_dwwc.astype(bool).mean()

0.80004609075174959

In [7]:
# Path count matrix
CbGeAlD_pc

array([[ 0,  8,  0, ...,  0,  0,  5],
       [ 4, 16,  4, ...,  0,  0, 24],
       [11, 29, 11, ...,  0,  4, 46],
       ..., 
       [ 0,  3,  0, ...,  0,  0,  5],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 6, 25,  6, ...,  0,  2, 19]])

In [8]:
# DWWC count matrix
CbGeAlD_dwwc

array([[ 0.        ,  0.02988823,  0.        , ...,  0.        ,
         0.        ,  0.01392936],
       [ 0.012456  ,  0.0217836 ,  0.012456  , ...,  0.        ,
         0.        ,  0.04430506],
       [ 0.02499535,  0.01942093,  0.02499535, ...,  0.        ,
         0.03016969,  0.05443763],
       ..., 
       [ 0.        ,  0.00398213,  0.        , ...,  0.        ,
         0.        ,  0.02561832],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.05142607,  0.02868431,  0.05142607, ...,  0.        ,
         0.01563081,  0.0410031 ]])

### Neo4j DWPC query

In [9]:
compound = 'DB00050'
disease = 'DOID:0050425'
query = hetio.neo4j.construct_dwpc_query(metapath, property='identifier')
print(query)

MATCH path = (n0:Compound)-[:BINDS_CbG]-(n1)-[:EXPRESSES_AeG]-(n2)-[:LOCALIZES_DlA]-(n3:Disease)
USING JOIN ON n1
WHERE n0.identifier = { source }
AND n3.identifier = { target }
WITH
[
size((n0)-[:BINDS_CbG]-()),
size(()-[:BINDS_CbG]-(n1)),
size((n1)-[:EXPRESSES_AeG]-()),
size(()-[:EXPRESSES_AeG]-(n2)),
size((n2)-[:LOCALIZES_DlA]-()),
size(()-[:LOCALIZES_DlA]-(n3))
] AS degrees, path
RETURN
count(path) AS PC,
sum(reduce(pdp = 1.0, d in degrees| pdp * d ^ -{ w })) AS DWPC


In [10]:
driver = GraphDatabase.driver("bolt://neo4j.het.io")
params = {
    'source': compound,
    'target': disease,
    'w': damping_exponent,
}
with driver.session() as session:
    result = session.run(query, params)
    result = result.single()
result

<Record PC=4 DWPC=0.0014783782118662789>

In [11]:
compound_to_position = {x.identifier: i for x, i in get_node_to_position(graph, 'Compound').items()}
disease_to_position = {x.identifier: i for x, i in get_node_to_position(graph, 'Disease').items()}
j = compound_to_position[compound]
i = disease_to_position[disease]

In [12]:
result['PC'], CbGeAlD_pc[i, j]

(4, 4)

In [14]:
result['DWPC'], CbGeAlD_dwwc[i, j]

(0.0014783782118662789, 0.012455997962816732)