# PySHACL: SHACL in Python

see https://github.com/RDFLib/pySHACL


## Preparations

install and import rdflib and pyshacl

In [3]:
# Install required packages in the current Jupyter kernel
!pip install -q rdflib 
!pip3 install -q pyshacl

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/528.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━[0m [32m317.4/528.1 kB[0m [31m9.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m528.1/528.1 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.5/54.5 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ipython-sql 0.4.1 requires prettytable<1, but you have prettytable 2.5.0 which i

In [4]:
# Imports
from rdflib import Graph, Literal, RDF, URIRef, BNode, Namespace, Dataset
from rdflib.namespace import FOAF , XSD , RDFS 
from rdflib.plugins.sparql.processor import SPARQLResult
from rdflib.namespace import NamespaceManager

from pyshacl import validate

import pandas as pd

def sparql_select(graph,query,use_prefixes=True):
  results = graph.query(query)          # execute the query against the graph, resulting in a rdflib.plugins.sparql.processor.SPARQLResult
  rows = [ { var : res[var].n3(graph.namespace_manager) if (isinstance(res[var],URIRef) and use_prefixes) else res[var] for var in results.vars } for res in results ]     
                                        # construct a list of dictionaries, as intermediate format to construct the pandas DataFrame, use prefixes to abbreviate URIs                
  return pd.DataFrame(rows,columns=results.vars)        
                                        # return a pandas DataFrame constructed from the list of dictionaries, with the variables from the result set as columns      

def validation_report_as_dataframe(validation_report):
  df = sparql_select(results_graph,"""
		SELECT  ?focusNode ?resultPath ?value ?sourceConstraintComponent ?sourceShape ?resultMessage
		WHERE
  		{ ?vr	a sh:ValidationResult ;
						sh:focusNode ?focusNode ;
						sh:sourceConstraintComponent ?sourceConstraintComponent ;
						sh:sourceShape ?sourceShape ;
						sh:resultMessage ?resultMessage .					 
				OPTIONAL { ?vr sh:value ?value . }
				OPTIONAL { ?vr sh:resultPath ?resultPath . }
  		}
  """,use_prefixes=True)
  return df

def shacl_validate(dg,sg):
  return validate(dg,shacl_graph=sg,
      inference='rdfs',
      abort_on_first=False,
      allow_infos=False,
      allow_warnings=False,
      meta_shacl=False,
      advanced=False,
      js=False,
      debug=False)  
  

def shacl_validate_with_rules(dg,sg):
	return validate(dg,shacl_graph=sg,
      inference='rdfs',
      abort_on_first=False,
      allow_infos=False,
      allow_warnings=False,
      meta_shacl=False,
      advanced=True,
      iterate_rules=True, inplace=True,
      js=False,
      debug=False)


## SHACL - First Example 

In [5]:
dg = Graph() # the Data Graph
dg.parse(format="turtle", data="""
PREFIX  rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX  rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
PREFIX  sh:    <http://www.w3.org/ns/shacl#>
PREFIX  xsd:   <http://www.w3.org/2001/XMLSchema#>
PREFIX  :      <http://example.org/>
BASE   	       <http://example.org/>

<Alice>	a <Person> ;
  :ssn "987-65-432A" .  
<Bob>	a <Person> ;
  :ssn "123-45-6789" ;
  :ssn "124-35-6789" .  
<Calvin> a <Person> ;
  :birthDate  "1971-07-07"^^xsd:date ;
  :worksFor  <UntypedCompany> .     
""")

sg = Graph() # the Shapes Graph
sg.parse(format="turtle", data="""
PREFIX  rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX  rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
PREFIX  sh:    <http://www.w3.org/ns/shacl#>
PREFIX  xsd:   <http://www.w3.org/2001/XMLSchema#>
PREFIX  :      <http://example.org/>
BASE   	       <http://example.org/>

	<PersonShape>
		a sh:NodeShape ;
		sh:targetClass <Person> ; 
		sh:property <SsnShape>, <WorksForShape>; 
		sh:closed true ;
		sh:ignoredProperties ( rdf:type ) .

	<SsnShape> 
	  a sh:PropertyShape ;
	  sh:path <ssn> ;           
		sh:maxCount 1 ;
		sh:datatype xsd:string  ;
		sh:pattern "^\\\\d{3}-\\\\d{2}-\\\\d{4}$" . # in Python we need to double-escape backslashes

	<WorksForShape>
	  a sh:PropertyShape ;
	  sh:path <worksFor> ;
		sh:class <Company> ;
		sh:nodeKind sh:IRI .
""")

conforms, results_graph, results_text = shacl_validate(dg,sg)  

if conforms:
	print("everything good")
else:
	print(results_graph.serialize(format='turtle'))

validation_report_as_dataframe(results_graph)

@prefix : <http://example.org/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] a sh:ValidationReport ;
    sh:conforms false ;
    sh:result [ a sh:ValidationResult ;
            sh:focusNode :Calvin ;
            sh:resultMessage "Value does not have class :Company" ;
            sh:resultPath :worksFor ;
            sh:resultSeverity sh:Violation ;
            sh:sourceConstraintComponent sh:ClassConstraintComponent ;
            sh:sourceShape :WorksForShape ;
            sh:value :UntypedCompany ],
        [ a sh:ValidationResult ;
            sh:focusNode :Calvin ;
            sh:resultMessage "Node :Calvin is closed. It cannot have value: Literal(\"1971-07-07\", datatype=xsd:date)" ;
            sh:resultPath :birthDate ;
            sh:resultSeverity sh:Violation ;
            sh:sourceConstraintComponent sh:ClosedConstraintComponent ;
            sh:sourceShape :PersonShape ;
            sh:value "1971-07-07"^^xsd:date ],
  

Unnamed: 0,focusNode,resultPath,value,sourceConstraintComponent,sourceShape,resultMessage
0,:Bob,:ssn,,sh:MaxCountConstraintComponent,:SsnShape,More than 1 values on :Bob->:ssn
1,:Alice,:ssn,987-65-432A,sh:PatternConstraintComponent,:SsnShape,Value does not match pattern '^\d{3}-\d{2}-\d{...
2,:Calvin,:worksFor,:UntypedCompany,sh:ClassConstraintComponent,:WorksForShape,Value does not have class :Company
3,:Calvin,:birthDate,1971-07-07,sh:ClosedConstraintComponent,:PersonShape,Node :Calvin is closed. It cannot have value: ...


## Example 2: Target Declarations

In [6]:
dg = Graph() # the Data Graph
dg.parse(format="turtle", data="""
PREFIX  rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
PREFIX  rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX  sh:    <http://www.w3.org/ns/shacl#>
PREFIX  xsd:   <http://www.w3.org/2001/XMLSchema#>
PREFIX  :      <http://example.org/>

:Mary	a :Woman ;
	:knows :John, :Ben, :Bello.  
:John	a :Man ;  
	:knows :Ben .  
:Ben 	a :Man . 
:Man rdfs:subClassOf :Person .
:Woman rdfs:subClassOf :Person .  
""")

sg = Graph() # the Shapes Graph
sg.parse(format="turtle", data="""
PREFIX  rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
PREFIX  rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX  sh:    <http://www.w3.org/ns/shacl#>
PREFIX  xsd:   <http://www.w3.org/2001/XMLSchema#>
PREFIX  :      <http://example.org/>

:ManShape  a sh:PropertyShape ;
    sh:targetClass :Man ; 
    sh:path :name; sh:minCount 1.  
:BenShape a sh:PropertyShape ;
    sh:targetNode :Ben ; 
    sh:path :name; sh:minCount 1.
:PersonShape a sh:PropertyShape ;
    sh:targetClass :Person ; 
    sh:path :name; sh:minCount 1.
:KnowsSubjectShape  a sh:PropertyShape ;
    sh:targetSubjectsOf :knows ; 
    sh:path :name; sh:minCount 1.
:KnowsObjectsShape  a sh:PropertyShape ;
    sh:targetObjectsOf :knows ; 
    sh:path :name; sh:minCount 1.
""")

conforms, results_graph, results_text = shacl_validate(dg,sg)  
validation_report_as_dataframe(results_graph)

Unnamed: 0,focusNode,resultPath,value,sourceConstraintComponent,sourceShape,resultMessage
0,:Ben,:name,,sh:MinCountConstraintComponent,:BenShape,Less than 1 values on :Ben->:name
1,:John,:name,,sh:MinCountConstraintComponent,:ManShape,Less than 1 values on :John->:name
2,:Ben,:name,,sh:MinCountConstraintComponent,:ManShape,Less than 1 values on :Ben->:name
3,:John,:name,,sh:MinCountConstraintComponent,:KnowsSubjectShape,Less than 1 values on :John->:name
4,:Mary,:name,,sh:MinCountConstraintComponent,:KnowsSubjectShape,Less than 1 values on :Mary->:name
5,:John,:name,,sh:MinCountConstraintComponent,:KnowsObjectsShape,Less than 1 values on :John->:name
6,:Ben,:name,,sh:MinCountConstraintComponent,:KnowsObjectsShape,Less than 1 values on :Ben->:name
7,:Bello,:name,,sh:MinCountConstraintComponent,:KnowsObjectsShape,Less than 1 values on :Bello->:name
8,:John,:name,,sh:MinCountConstraintComponent,:PersonShape,Less than 1 values on :John->:name
9,:Ben,:name,,sh:MinCountConstraintComponent,:PersonShape,Less than 1 values on :Ben->:name


## Example 3: References to Node Shapes

In [7]:
dg = Graph() # the Data Graph
dg.parse(format="turtle", data="""
PREFIX  rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
PREFIX  rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX  sh:    <http://www.w3.org/ns/shacl#>
PREFIX  xsd:   <http://www.w3.org/2001/XMLSchema#>
PREFIX  :      <http://example.org/>

:Earth a :Planet ; 
    :apoapsis [rdf:value 1.017 ; :uom :AU ];
    :neighbour :Venus .    
:Venus a :Planet;
    :apoapsis :v1;
    :radius [rdf:value 3389.5 ; :uom :KM; a :Radius] .
:v1 rdf:value 0.728 . 
""")

sg = Graph() # the Shapes Graph
sg.parse(format="turtle", data="""
PREFIX  rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
PREFIX  rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX  sh:    <http://www.w3.org/ns/shacl#>
PREFIX  xsd:   <http://www.w3.org/2001/XMLSchema#>
PREFIX  :      <http://example.org/>

:PlanetShape  a sh:NodeShape ;
   sh:targetClass :Planet ; 
   sh:property [
     sh:path :radius;
     sh:node :ComplexValueShape ];
   sh:property [
     sh:path :apoapsis;
     sh:node :ComplexValueShape ].

:ComplexValueShape a sh:NodeShape ;
   sh:closed true;
   sh:property [ 
     sh:path rdf:value; 
     sh:minCount 1; sh:maxCount 1 ];
   sh:property [ 
     sh:path :uom; 
     sh:minCount 1; sh:maxCount 1 ]. 

""")

conforms, results_graph, results_text = shacl_validate(dg,sg)  
validation_report_as_dataframe(results_graph)

Unnamed: 0,focusNode,resultPath,value,sourceConstraintComponent,sourceShape,resultMessage
0,:Venus,:radius,na54dddd695c14713812244b9d489d0d9b2,sh:NodeConstraintComponent,n5044856666e147f4bf5b98c4af331fd8b1,Value does not conform to Shape :ComplexValueS...
1,:Venus,:apoapsis,:v1,sh:NodeConstraintComponent,n5044856666e147f4bf5b98c4af331fd8b2,Value does not conform to Shape :ComplexValueS...


## Example 4: Logical Constraint Component

In [8]:
dg = Graph() # the Data Graph
dg.parse(format="turtle", data="""
PREFIX  rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
PREFIX  rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX  sh:    <http://www.w3.org/ns/shacl#>
PREFIX  xsd:   <http://www.w3.org/2001/XMLSchema#>
PREFIX  :      <http://example.org/>

:Earth a :Planet; 
  :radius [rdf:value 3389.5; :uom :KM].
:Venus a :Planet; 
  :radius [:nilReason "unknown"].
:Mars a :Planet; 
  :radius [rdf:value 234.4].
:Mercury a :Planet; 
  :radius [rdf:value 3389.5; :uom :KM; :nilReason "other"].
""")

sg = Graph() # the Shapes Graph
sg.parse(format="turtle", data="""
PREFIX  rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
PREFIX  rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX  sh:    <http://www.w3.org/ns/shacl#>
PREFIX  xsd:   <http://www.w3.org/2001/XMLSchema#>
PREFIX  :      <http://example.org/>

:PlanetShape  a sh:NodeShape ;
  sh:targetClass :Planet ; 
  sh:property [ 
    sh:path :radius;
    sh:xone (:ComplexValueShape :NilShape)
  ].
    
:ComplexValueShape a sh:NodeShape ;
  sh:closed true;
  sh:property [ 
    sh:path rdf:value; 
    sh:minCount 1; sh:maxCount 1 ];
  sh:property [ 
    sh:path :uom; 
    sh:minCount 1; sh:maxCount 1 ]. 

:NilShape a sh:NodeShape ;
  sh:closed true;
  sh:property [ 
    sh:path :nilReason; 
    sh:in ( "unknown" "inapplicable" "other" );
    sh:minCount 1; sh:maxCount 1 ]. 

""")

conforms, results_graph, results_text = shacl_validate(dg,sg)  
validation_report_as_dataframe(results_graph)

Unnamed: 0,focusNode,resultPath,value,sourceConstraintComponent,sourceShape,resultMessage
0,:Mars,:radius,n2679620c69b54b3988b18a13ce897b05b3,sh:XoneConstraintComponent,n1e5e778b7af94f23aff0424318483154b1,Node [ rdf:type rdfs:Resource ; rdf:value Lite...
1,:Mercury,:radius,n2679620c69b54b3988b18a13ce897b05b4,sh:XoneConstraintComponent,n1e5e778b7af94f23aff0424318483154b1,"Node [ :nilReason Literal(""other"") ; :uom :KM ..."


# SHACL Rules

In [9]:
dg = Graph() # the Data Graph
dg.parse(format="turtle", data="""
PREFIX  rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX  rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
PREFIX  sh:    <http://www.w3.org/ns/shacl#>
PREFIX  xsd:   <http://www.w3.org/2001/XMLSchema#>
PREFIX  :      <http://example.org/>
BASE   	       <http://example.org/>

	<Employee> rdfs:subClassOf <Person> .
	<Peter> a <Employee> ;
	   :hasParent <Mary> . 
    <Mary> a <Employee> ;
      :hasParent <Susan> ;
      :employer <Acme> .
  <Susan> a <Employee> ;
      :hasParent <John> .
""")

sg = Graph() # the Shapes Graph
sg.parse(format="turtle", data="""
PREFIX  rdf:   <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX  rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
PREFIX  sh:    <http://www.w3.org/ns/shacl#>
PREFIX  xsd:   <http://www.w3.org/2001/XMLSchema#>
PREFIX  :      <http://example.org/>
BASE   <http://example.org/>

	<Prefixes> sh:declare 
  [ sh:prefix "rdf";
    sh:namespace "http://www.w3.org/1999/02/22-rdf-syntax-ns#"^^xsd:anyURI ] ,
  [ sh:prefix "rdfs";
    sh:namespace "http://www.w3.org/2000/01/rdf-schema#"^^xsd:anyURI ] ,
  [ sh:prefix "sh";
    sh:namespace "http://www.w3.org/ns/shacl#"^^xsd:anyURI ] ,
  [ sh:prefix "xsd";
    sh:namespace "http://www.w3.org/2001/XMLSchema#"^^xsd:anyURI ] ,
  [ sh:prefix "";
    sh:namespace "http://example.org/"^^xsd:anyURI ] .

	<Person> a rdfs:Class, sh:NodeShape ;
		sh:rule [
			a sh:SPARQLRule ;
			sh:prefixes <Prefixes> ;
			sh:construct \"\"\"
				CONSTRUCT {	?employer :employee $this .	}
				WHERE {	$this :employer ?employer . }
				\"\"\" ;
		] ;
		sh:rule [
			a sh:SPARQLRule ;
			sh:prefixes <Prefixes> ;
			sh:construct \"\"\"
				CONSTRUCT {	$this :hasAncestor $x .	}
				WHERE {	$this :hasParent+ ?x . }
				\"\"\" ;
		] ;
		sh:rule [
			a sh:TripleRule ;
			sh:subject [sh:path :hasParent ] ;
			sh:predicate :hasChild ;
			sh:object sh:this 
    ] ;
		sh:rule [
			a sh:TripleRule ;
			sh:subject [sh:path [ sh:oneOrMorePath :hasParent ] ] ;
			sh:predicate :hasDescendant ;
			sh:object sh:this 
    ] .
""")


conforms, results_graph, results_text = shacl_validate_with_rules(dg,sg)
print(dg.serialize(format='turtle'))

@prefix : <http://example.org/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

:Acme a rdfs:Resource ;
    :employee :Mary .

:employer a rdf:Property ;
    rdfs:subPropertyOf :employer .

:hasParent a rdf:Property ;
    rdfs:subPropertyOf :hasParent .

rdf:type a rdf:Property ;
    rdfs:subPropertyOf rdf:type .

rdfs:subClassOf a rdf:Property ;
    rdfs:subPropertyOf rdfs:subClassOf .

rdfs:subPropertyOf a rdf:Property ;
    rdfs:subPropertyOf rdfs:subPropertyOf .

:Employee a rdfs:Resource ;
    rdfs:subClassOf :Person .

:John a rdfs:Resource ;
    :hasChild :Susan ;
    :hasDescendant :Mary,
        :Peter,
        :Susan .

:Person a rdfs:Resource .

:Peter a :Employee,
        :Person,
        rdfs:Resource ;
    :hasAncestor :John,
        :Mary,
        :Susan ;
    :hasParent :Mary .

:Susan a :Employee,
        :Person,
        rdfs:Resource ;
    :hasAncestor :John ;
    :hasChild :Mary ;
    :hasDescen