# Wikipathways Demo

In [26]:
import biobricks as bb
from rdflib import Graph, Namespace
from rdflib.plugins.stores import sparqlstore
from rdflib_hdt import HDTStore
import pandas as pd

wikipathways = bb.assets('wikipathways')
hdt_path = wikipathways.wikipathways_hdt

We can run sparql queries on the Wikipathways RDF data to get information about pathways, genes, proteins, and metabolites. The queries should return the same results as https://sparql.wikipathways.org/

In [27]:
# Create a Graph with HDTStore
store = HDTStore(hdt_path)
g = Graph(store=store)

# SPARQL query with explicit namespace definitions
query = """
PREFIX wp: <http://vocabularies.wikipathways.org/wp#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX purl: <http://purl.org/dc/elements/1.1/>

SELECT DISTINCT ?pathway_label (STR(?gene_label) AS ?geneProduct) WHERE {
    ?geneProduct a wp:GeneProduct . 
    ?geneProduct rdfs:label ?gene_label .
    ?geneProduct dcterms:isPartOf ?pathway .
    ?pathway a wp:Pathway .
    # ?pathway dcterms:identifier "WP1560" .
    ?pathway purl:title ?pathway_label .
} LIMIT 10
"""

# Execute the query
results = g.query(query)
df = pd.DataFrame(results, columns=['pathway', 'geneProduct'])
store.close()

df

Unnamed: 0,pathway,geneProduct
0,Linoleic acid metabolism affected by SARS-CoV-2,Linoleoyl-CoA desaturase
1,Lycopene biosynthesis,EC:1.3.5.5 (PDS)
2,Lycopene biosynthesis,EC:1.3.5.6 (ZDS)
3,Neuroinflammation,Nitrate reductase
4,Cysteine and methionine catabolism,SO
5,Cysteine and methionine catabolism,MT
6,Lycopene biosynthesis,EC:2.5.1.32 (PSY)
7,Linoleic acid metabolism affected by SARS-CoV-2,Cytosolic phospholipase A2 (cPLA2)
8,Lycopene biosynthesis,EC:5.2.1.12 (Z-ISO)
9,Lycopene biosynthesis,EC:5.2.1.13 (CRITSO)
