# Proof of concept DWWC matrix computation

In [1]:
import pandas
from neo4j.v1 import GraphDatabase
import hetio.readwrite
import hetio.neo4j
import hetio.pathtools

from hetmech.degree_weight import dwwc
from hetmech.matrix import get_node_to_position

In [2]:
url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0.json.bz2'
graph = hetio.readwrite.read_graph(url)
metagraph = graph.metagraph

In [3]:
compound = 'DB01156'  # Bupropion
disease = 'DOID:0050742'  # nicotine dependences

damping_exponent = 0.4

# CbGpPWpGaD contains duplicate metanodes, so DWPC is not equivalent to DWPC
metapath = metagraph.metapath_from_abbrev('CbGpPWpGaD')
metapath.get_unicode_str()

'Compound–binds–Gene–participates–Pathway–participates–Gene–associates–Disease'

In [4]:
%%time
CbGpPWpGaD_pc = dwwc(graph, metapath, damping=0).astype(int)
CbGpPWpGaD_dwwc = dwwc(graph, metapath, damping=damping_exponent)

CPU times: user 18.5 s, sys: 764 ms, total: 19.3 s
Wall time: 8.35 s


In [5]:
CbGpPWpGaD_dwwc.shape

(1552, 137)

In [6]:
# Density
CbGpPWpGaD_pc.astype(bool).mean()

0.8040061328918654

In [7]:
# Path count matrix
CbGpPWpGaD_pc

array([[  11,   13,  192, ...,   40,    0,  758],
       [  23,   30,  400, ...,   77,    0, 1929],
       [  11,   13,  192, ...,   40,    0,  758],
       ..., 
       [   0,    0,    0, ...,    0,    0,    0],
       [   5,    7,   97, ...,   22,    0,  352],
       [   0,    3,  114, ...,    0,    0,  156]])

In [8]:
# DWWC matrix
CbGpPWpGaD_dwwc

array([[ 0.00074141,  0.00057174,  0.01138872, ...,  0.00350678,
         0.        ,  0.02075716],
       [ 0.00057175,  0.00088381,  0.01745733, ...,  0.0050468 ,
         0.        ,  0.03172209],
       [ 0.00074141,  0.00057174,  0.01138872, ...,  0.00350678,
         0.        ,  0.02075716],
       ..., 
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.0003131 ,  0.00071464,  0.01298882, ...,  0.00495461,
         0.        ,  0.01838852],
       [ 0.        ,  0.00025659,  0.00779677, ...,  0.        ,
         0.        ,  0.00381554]])

In [9]:
compound_to_position = {x.identifier: i for x, i in get_node_to_position(graph, 'Compound').items()}
disease_to_position = {x.identifier: i for x, i in get_node_to_position(graph, 'Disease').items()}
i = compound_to_position[compound]
j = disease_to_position[disease]

In [10]:
# Path count
CbGpPWpGaD_pc[i, j]

152

In [11]:
# degree-weighted walk count
CbGpPWpGaD_dwwc[i, j]

0.038040121429465001

### Cypher DWPC implementation

In [12]:
query = hetio.neo4j.construct_dwpc_query(metapath, property='identifier', unique_nodes=True)
print(query)

MATCH path = (n0:Compound)-[:BINDS_CbG]-(n1)-[:PARTICIPATES_GpPW]-(n2)-[:PARTICIPATES_GpPW]-(n3)-[:ASSOCIATES_DaG]-(n4:Disease)
USING JOIN ON n2
WHERE n0.identifier = { source }
AND n4.identifier = { target }
AND n1 <> n3
WITH
[
size((n0)-[:BINDS_CbG]-()),
size(()-[:BINDS_CbG]-(n1)),
size((n1)-[:PARTICIPATES_GpPW]-()),
size(()-[:PARTICIPATES_GpPW]-(n2)),
size((n2)-[:PARTICIPATES_GpPW]-()),
size(()-[:PARTICIPATES_GpPW]-(n3)),
size((n3)-[:ASSOCIATES_DaG]-()),
size(()-[:ASSOCIATES_DaG]-(n4))
] AS degrees, path
RETURN
count(path) AS PC,
sum(reduce(pdp = 1.0, d in degrees| pdp * d ^ -{ w })) AS DWPC


In [13]:
driver = GraphDatabase.driver("bolt://neo4j.het.io")
params = {
    'source': compound,
    'target': disease,
    'w': damping_exponent,
}
with driver.session() as session:
    result = session.run(query, params)
    result = result.single()
result

<Record PC=142 DWPC=0.03287590886921623>

### hetio DWPC implementation

In [14]:
compound_id = 'Compound', compound
disease_id = 'Disease', disease
paths = hetio.pathtools.paths_between(
    graph, 
    source=graph.node_dict[compound_id],
    target=graph.node_dict[disease_id],
    metapath=metapath,
    duplicates=False,
)

In [15]:
# Path count
len(paths)

142

In [16]:
# DWPC
hetio.pathtools.DWPC(paths, damping_exponent=damping_exponent)

0.03287590886921622