# Querying WikiPathways and DrugBank Jointly as CX RDF

Representing CX as RDF overcomes its limitation of combining and querying networks jointly. In this example, WikiPathways and DrugBank are combine to identify chemicals that interact with the Notch Signaling Pathway.

In [1]:
import os
import sys
import time

import bio2bel_wikipathways
import bio2bel_drugbank
import cx_rdf
import pybel_cx
import pandas as pd
import pybel_tools
from pybel_tools.visualization import to_jupyter
import rdflib
import pybel

INFO:rdflib:RDFLib Version: 4.2.1


In [2]:
print(sys.version)

3.6.5 (default, Jun 17 2018, 12:13:06) 
[GCC 4.2.1 Compatible Apple LLVM 9.1.0 (clang-902.0.39.2)]


In [3]:
print(time.asctime())

Fri Jul 27 11:43:57 2018


In [4]:
print(f'PyBEL Version: {pybel.get_version()}')
print(f'PyBEL-Tools Version: {pybel_tools.get_version()}')
print(f'PyBEL-CX Version: {pybel_cx.get_version()}')
print(f'Bio2BEL WikiPathways Version: {bio2bel_wikipathways.get_version()}')
print(f'Bio2BEL Drugbank Version: {bio2bel_drugbank.get_version()}')
print(f'CX-RDF Version: {cx_rdf.get_version()}')

PyBEL Version: 0.11.11-dev
PyBEL-Tools Version: 0.6.1-dev
PyBEL-CX Version: 0.1.2-dev
Bio2BEL WikiPathways Version: 0.1.0-dev
Bio2BEL Drugbank Version: 0.1.0-dev
CX-RDF Version: 0.0.1-dev


## Get Data

### WikiPathways Data

In [5]:
wikipathways_manager = bio2bel_wikipathways.Manager()
wikipathways_manager

<WikipathwaysManager url=mysql+mysqldb://root@localhost/pybel12?charset=utf8>

What chemicals target the Notch Signaling Pathway in WikiPathways?

For more information about the representation f the Notch Signaling Pathway, see: 
https://compath.scai.fraunhofer.de/pathway/wikipathways/WP61

In [6]:
notch_wikipathways_id = 'WP61'

notch_bel_graph = wikipathways_manager.get_pathway_graph_by_id(notch_wikipathways_id)

print(f'The Notch Signaling Pathway has {notch_bel_graph.number_of_nodes()} nodes '
      f'and {notch_bel_graph.number_of_edges()} edges.')

The Notch Signaling Pathway has 63 nodes and 62 edges.


In [7]:
to_jupyter(notch_bel_graph)

<IPython.core.display.Javascript object>

In [8]:
notch_cx = pybel_cx.to_cx(notch_bel_graph)

In [9]:
notch_rdf = cx_rdf.cx_to_rdf_graph(notch_cx)

print(f'The Notch Signaling Pathway as CX RDF has {len(notch_rdf)} triples.')

The Notch Signaling Pathway as CX RDF has 2241 triples.


### DrugBank Data

In [10]:
drugbank_pickle_path = 'drugbank.bel.gpickle'

if os.path.exists(drugbank_pickle_path):
    drugbank_bel = pybel.from_pickle(drugbank_pickle_path)    
else:
    drugbank_manager = bio2bel_drugbank.Manager()
    drugbank_bel = drugbank_manager.to_bel()
    
    pybel.to_pickle(drugbank_bel, drugbank_pickle_path)
    
print(f'DrugBank has {drugbank_bel.number_of_nodes()} nodes '
      f'and {drugbank_bel.number_of_edges()} edges.')

DrugBank has 7698 nodes and 17492 edges.


In [11]:
drugbank_cx = pybel_cx.to_cx(drugbank_bel)

In [12]:
drugbank_rdf = cx_rdf.cx_to_rdf_graph(drugbank_cx)

print(f'DrugBank as CX RDF has {len(drugbank_rdf)} triples.')

DrugBank as CX RDF has 519300 triples.


### Combine RDF

`RDFLib` has a Pythonic interface for concatenating several triple stores.

In [13]:
rdf = notch_rdf + drugbank_rdf

print(f'The Notch Signaling Pathway and DrugBank combine as CX RDF has {len(rdf)} triples.')

The Notch Signaling Pathway and DrugBank combine as CX RDF has 521541 triples.


## Query

In [14]:
init_ns = {
    'cx': cx_rdf.CX,
    'drugbank': rdflib.Namespace('http://identifiers.org/drugbank/'),
    'hgnc': rdflib.Namespace('http://identifiers.org/hgnc/'),
    'wikipathways': rdflib.Namespace('http://identifiers.org/wikipathways/')
}

### Show identifiers.org cross references

In [15]:
query = """
SELECT ?s ?p
WHERE {
    ?s ?p hgnc:10899 .
}
"""

result = rdf.query(query, initNs=init_ns)
pd.DataFrame(list(result), columns=['Identifier', 'Label'])

Unnamed: 0,Identifier,Label
0,N906d8a9d81ec49a690afaeb508abcdc6,http://ndexbio.org/rdfs#node_has_alias
1,Ndd41f240cc39477d9dbde5658e4814f6,http://ndexbio.org/rdfs#node_has_alias


### Get node information by alias

The SPK1 gene (hgnc:10899) appears in both networks, as different nodes.

In [16]:
query = """
SELECT ?document ?node ?label ?attribute_name ?attribute_value
WHERE {
    ?node cx:node_has_alias hgnc:10899 .
    ?node rdfs:label ?label .
    ?document cx:has_node ?node .
    ?node cx:node_has_attribute ?name_attribute .
    ?name_attribute cx:attribute_has_name ?attribute_name.
    ?name_attribute cx:attribute_has_value ?attribute_value .
}
LIMIT 13
"""

result = rdf.query(query, initNs=init_ns)
pd.DataFrame(list(result), columns=['Document', 'Node', 'Label', 'Attribute', 'Value'])

Unnamed: 0,Document,Node,Label,Attribute,Value
0,N50f83229ef274fe6ac46ad5e0cf863c7,N906d8a9d81ec49a690afaeb508abcdc6,S-phase kinase-associated protein 1,label,S-phase kinase-associated protein 1
1,N50f83229ef274fe6ac46ad5e0cf863c7,N906d8a9d81ec49a690afaeb508abcdc6,S-phase kinase-associated protein 1,identifier,10899
2,N50f83229ef274fe6ac46ad5e0cf863c7,N906d8a9d81ec49a690afaeb508abcdc6,S-phase kinase-associated protein 1,function,Protein
3,N50f83229ef274fe6ac46ad5e0cf863c7,N906d8a9d81ec49a690afaeb508abcdc6,S-phase kinase-associated protein 1,namespace,hgnc
4,N50f83229ef274fe6ac46ad5e0cf863c7,N906d8a9d81ec49a690afaeb508abcdc6,S-phase kinase-associated protein 1,alias,hgnc:10899
5,Nf9961ec6ea7e40e998829cfbb3bdd615,Ndd41f240cc39477d9dbde5658e4814f6,SKP1,label,SKP1
6,Nf9961ec6ea7e40e998829cfbb3bdd615,Ndd41f240cc39477d9dbde5658e4814f6,SKP1,cname,SKP1
7,Nf9961ec6ea7e40e998829cfbb3bdd615,Ndd41f240cc39477d9dbde5658e4814f6,SKP1,identifier,10899
8,Nf9961ec6ea7e40e998829cfbb3bdd615,Ndd41f240cc39477d9dbde5658e4814f6,SKP1,namespace,hgnc
9,Nf9961ec6ea7e40e998829cfbb3bdd615,Ndd41f240cc39477d9dbde5658e4814f6,SKP1,function,Protein


### Get the Notch Signaling Pathway

Find all predicate/object pairs for the Notch Signaling Pathway (wikipathways:WP61).

In [17]:
query = """
SELECT ?p ?o
WHERE {
    ?notch cx:node_has_alias wikipathways:WP61 .
    ?notch ?p ?o
}
LIMIT 5
"""

result = rdf.query(query, initNs=init_ns)
pd.DataFrame(list(result), columns=['Predicate', 'Object'])

Unnamed: 0,Predicate,Object
0,http://ndexbio.org/rdfs#node_has_attribute,Ndfdc4696a10d4aecb35204583282523b
1,http://ndexbio.org/rdfs#node_has_attribute,N27eb3cd7748348e382caf173ba16d52e
2,http://ndexbio.org/rdfs#node_has_attribute,N3f52e0af116149a1ae41b7b489c91a7c
3,http://ndexbio.org/rdfs#node_has_attribute,N1b005ed20761447888daa53de1536818
4,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://ndexbio.org/rdfs#node


Find all subject/predicate pairs to which the Notch Signaling Pathway is the object.

In [18]:
query = """
SELECT ?s ?p
WHERE {
    ?notch cx:node_has_alias wikipathways:WP61 .
    ?s ?p ?notch
}
LIMIT 5
"""

result = rdf.query(query, initNs=init_ns)
pd.DataFrame(list(result), columns=['Subject', 'Predicate'])

Unnamed: 0,Subject,Predicate
0,N5af6e918757243d29f60956066381429,Ne212220b4b884e2c8d6d6bd5db7c4297
1,N0cd8fd8d30924a958b8da72f198ab3f4,N81a711261d4e4240b0c0d1d13700df0f
2,Nae6a71ee497e478e88488ba56b7dae51,N855ed79829a94c988f2cc1ddbeea84c3
3,Neca334cbede24666a9ee00c27e704eae,N7a066ef3eeb84c2583b877d8747dbf44
4,Nc9b355a97b8649be8f8e26c29cb539e6,Nec24bb1917a84c7493c634109e24f685


### Which proteins are in the Notch Pathway?

Peek at the first couple of proteins that are part of the Notch Signaling Pathway

In [19]:
query = """
SELECT ?protein_label ?protein_alias ?predicate ?pathway_label
WHERE {
    ?notch cx:node_has_alias wikipathways:WP61 .
    ?notch rdfs:label ?pathway_label .
    ?protein ?p ?notch .
    ?p cx:edge_has_interaction ?predicate .
    ?protein rdfs:label ?protein_label .
    ?protein cx:node_has_alias ?protein_alias
}
LIMIT 5
"""

result = rdf.query(query, initNs=init_ns)
pd.DataFrame(list(result), columns=['Protein', 'URI', 'Predicate', 'Pathway'])

Unnamed: 0,Protein,URI,Predicate,Pathway
0,RBPJ,http://identifiers.org/hgnc/5724,partOf,Notch Signaling Pathway
1,NFKB1,http://identifiers.org/hgnc/7794,partOf,Notch Signaling Pathway
2,NUMBL,http://identifiers.org/hgnc/8061,partOf,Notch Signaling Pathway
3,NOTCH3,http://identifiers.org/hgnc/7883,partOf,Notch Signaling Pathway
4,DLL4,http://identifiers.org/hgnc/2910,partOf,Notch Signaling Pathway


### What chemicals interact with proteins in the Notch Signaling Pathway?

Peek at drugs associated with proteins in the Notch Signaling Pathway.

Note: this query is a bit slow using RDFLib, but can be significantly improved with a dedicated triple store.

In [20]:
query = """
SELECT ?pathway_label ?pathway_protein_label  ?chemical_label 
WHERE {
    ?pathway cx:node_has_alias wikipathways:WP61 .
    ?pathway rdfs:label ?pathway_label .
    ?pathway_protein ?partOf ?pathway .
    ?partOf cx:edge_has_interaction ?predicate .
    ?pathway_protein rdfs:label ?pathway_protein_label .
    ?pathway_protein cx:node_has_alias ?pathway_protein_alias .
    
    ?protein cx:node_has_alias ?pathway_protein_alias .
    FILTER (?protein != ?pathway_protein)
        
    ?chemical ?p ?protein .
    ?chemical rdfs:label ?chemical_label.
}
LIMIT 10
"""

result = rdf.query(query, initNs=init_ns)
pd.DataFrame(list(result), columns=['Pathway', 'Protein', 'Chemical'])

Unnamed: 0,Pathway,Protein,Chemical
0,Notch Signaling Pathway,NFKB1,Triflusal
1,Notch Signaling Pathway,NFKB1,Pranlukast
2,Notch Signaling Pathway,NFKB1,Thalidomide
3,Notch Signaling Pathway,AKT1,Arsenic trioxide
4,Notch Signaling Pathway,AKT1,Genistein
5,Notch Signaling Pathway,AKT1,"N-[2-(5-methyl-4H-1,2,4-triazol-3-yl)phenyl]-7..."
6,Notch Signaling Pathway,AKT1,Perifosine
7,Notch Signaling Pathway,AKT1,ATP
8,Notch Signaling Pathway,AKT1,Resveratrol
9,Notch Signaling Pathway,AKT1,"5-(5-chloro-7H-pyrrolo[2,3-d]pyrimidin-4-yl)-4..."
