# Using ShEx with PyShExs
## 1. Local file, all subjects

In [1]:
import rdflib
from pyshex.shex_evaluator import ShExEvaluator
from pyshex.user_agent import SlurpyGraphWithAgent
from pyshex.utils.sparql_query import SPARQLQuery

### Open local file

In [2]:
var = open('aop.ttl', 'r').read()

### Find all subjects

In [3]:
g = rdflib.Graph()
g.parse('aop.ttl', format="ttl") #adapt format if necessary
nodes = set()
for subject, predicate, object in g.triples((None, None, None)):
    nodes.add(subject)
print(nodes)

{rdflib.term.URIRef('http://identifiers.org/aop.events/593'), rdflib.term.URIRef('http://identifiers.org/aop.relationships/597'), rdflib.term.URIRef('http://identifiers.org/aop.events/585')}


### Define Shape Expression to test on the subjects (nodes)

In [4]:
shex = """
PREFIX aop.relationships: <http://identifiers.org/aop.relationships/> 
PREFIX aop.events: <http://identifiers.org/aop.events/> 
PREFIX aopo: <http://aopkb.org/aop_ontology#> 
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 

start = @<ker>
<ker> {
  aopo:has_upstream_key_event     @<ke> ;
  aopo:has_downstream_key_event   @<ke>
}

<ke> {
   a [ aopo:KeyEvent ]
}"""

### Execute

In [5]:
result = ShExEvaluator(var, shex, nodes).evaluate()

for r in result:
    print(f"{r.focus}: ", end="")
    if not r.result:
        print(f"FAIL: {r.reason}")
    else:
        print("PASS")

http://identifiers.org/aop.events/593: FAIL:   Testing aop.events:593 against shape ker
       No matching triples found for predicate aopo:has_upstream_key_event
http://identifiers.org/aop.relationships/597: PASS
http://identifiers.org/aop.events/585: FAIL:   Testing aop.events:585 against shape ker
       No matching triples found for predicate aopo:has_upstream_key_event


## 2. SPARQL endpoint, selected subjects

In [6]:
from pyshex.shex_evaluator import ShExEvaluator
from pyshex.user_agent import SlurpyGraphWithAgent
from pyshex.utils.sparql_query import SPARQLQuery

### Define the SPARQL endpoint URL

In [7]:
endpoint = 'http://aopwiki-rdf.prod.openrisknet.org/sparql/'

### Define the SPARQL query to extract subjects to test on with the ShExs

In [8]:
sparql = """
PREFIX aop.relationships: <http://identifiers.org/aop.relationships/> 
PREFIX aop.events: <http://identifiers.org/aop.events/> 
PREFIX aopo: <http://aopkb.org/aop_ontology#> 
SELECT DISTINCT ?item WHERE {
  ?item rdf:type aopo:AdverseOutcomePathway
}
"""

### Define Shape Expression to test on the subjects (nodes)

In [9]:
shex = """
PREFIX aop.relationships: <http://identifiers.org/aop.relationships/> 
PREFIX aop.events: <http://identifiers.org/aop.events/> 
PREFIX aopo: <http://aopkb.org/aop_ontology#> 
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 

start = @<aop>
<aop> {
  aopo:has_key_event     @<ke>+ ;
  aopo:has_key_event_relationship   @<ker>+ ;
  aopo:has_molecular_initiating_event @<ke>+ ;
  aopo:has_adverse_outcome @<ke>+
}

<ke> {
   a [ aopo:KeyEvent ]
}

<ker> {
  aopo:has_upstream_key_event     @<ke> ;
  aopo:has_downstream_key_event   @<ke>
}"""

### Execute

In [10]:
result = ShExEvaluator(SlurpyGraphWithAgent(endpoint),
                       shex, #fill in the shex variable to use
                       SPARQLQuery(endpoint, sparql).focus_nodes()).evaluate() #fill in the sparql variable to use
for r in result:
    print(f"{r.focus}: ", end="")
    if not r.result:
        print(f"FAIL: {r.reason}")
    else:
        print("PASS")

http://identifiers.org/aop/1: PASS
http://identifiers.org/aop/10: PASS
http://identifiers.org/aop/100: PASS
http://identifiers.org/aop/101: PASS
http://identifiers.org/aop/102: PASS
http://identifiers.org/aop/103: PASS
http://identifiers.org/aop/104: PASS
http://identifiers.org/aop/105: PASS
http://identifiers.org/aop/106: PASS
http://identifiers.org/aop/107: PASS
http://identifiers.org/aop/108: PASS
http://identifiers.org/aop/109: PASS
http://identifiers.org/aop/11: FAIL:   Testing <http://identifiers.org/aop/11> against shape aop
       No matching triples found for predicate aopo:has_key_event_relationship
http://identifiers.org/aop/110: PASS
http://identifiers.org/aop/111: PASS
http://identifiers.org/aop/112: PASS
http://identifiers.org/aop/113: PASS
http://identifiers.org/aop/114: PASS
http://identifiers.org/aop/115: PASS
http://identifiers.org/aop/116: PASS
http://identifiers.org/aop/117: PASS
http://identifiers.org/aop/118: PASS
http://identifiers.org/aop/119: PASS
http://identi

In [11]:
%load_ext watermark

#python, ipython, packages, and machine characteristics
%watermark -v -m -p rdflib,pyshex

#dte
print(" ")
%watermark -u -n -t -z

CPython 3.7.3
IPython 7.6.1

rdflib 5.0.0
pyshex unknown

compiler   : GCC 7.3.0
system     : Linux
release    : 5.4.0-52-generic
machine    : x86_64
processor  : x86_64
CPU cores  : 8
interpreter: 64bit
 
last updated: Wed Nov 18 2020 18:34:55 CET
