# Validate data graph with SHACL graph

Use rdflib to load and parse the shape and data sources, then use pySHACL to apply the SHACL shapes graph to the data graph.

In [44]:
import rdflib
import pyshacl

# Load the SHACL graph
shacl_source = """
@prefix SO: <http://schema.org/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix SOSO: <http://science-on-schema.org/examples/shacl/> .

SOSO:incomplete-eg-01-Dataset
    a sh:NodeShape ;
    sh:targetClass SO:Dataset ;

    sh:property [
        sh:path SO:name ;
        sh:minCount 1 ;
        sh:message "A dataset must have a name."@en ;
    ] .
"""
sg = rdflib.ConjunctiveGraph()
sg.parse(data=shacl_source, format="turtle", publicID="https://science-on-schema.org/examples/shacl/")

<Graph identifier=https://science-on-schema.org/examples/shacl/ (<class 'rdflib.graph.Graph'>)>

In [45]:
# Load the data graph
dg_source = """
{
    "@context":[
      "https://schema.org/docs/jsonldcontext.jsonld",
      {
          "SO":"http://schema.org/"
      }
    ],
    "@id":"incomplete-eg-01",
    "@type":"SO:Dataset",    
    "SO:name":"Incomplete example 01"
}
"""
dg = rdflib.ConjunctiveGraph()
dg.parse(data=dg_source, format="json-ld", publicID="https://science-on-schema.org/examples/data/")

<Graph identifier=https://science-on-schema.org/examples/data/ (<class 'rdflib.graph.Graph'>)>

In [46]:
def printValidationResults(conforms, report_graph, report_text):
    def printMsg(msg, v):
      print(f"{msg}\n{'-'*len(msg)}\n{v}\n")
    
    printMsg("Conforms", conforms)
    printMsg("Report Text", report_text)
    printMsg("Report Graph", report_graph.serialize(format="turtle", indent=2).decode("utf8"))

#Validate and printout the results  
conforms, report_graph, report_text = pyshacl.validate( dg, shacl_graph=sg)

printValidationResults(conforms, report_graph, report_text)

Conforms
--------
True

Report Text
-----------
Validation Report
Conforms: True


Report Graph
------------
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] a sh:ValidationReport ;
    sh:conforms true ;
    sh:failureCount 0 ;
    sh:shapeCount 2 ;
    sh:shapesApplied 2 .





Change the data graph to be invalid by mis spelling the `name` property.

In [47]:
dg_source = """
{
    "@context":[
      "https://schema.org/docs/jsonldcontext.jsonld",
      {
          "@vocab":"http://schema.org/"
      }
    ],
    "@id":"incomplete-eg-02",
    "@type":"Dataset",    
    "SO:nameX":"Incomplete example 02, no name"
}
"""

dg = rdflib.ConjunctiveGraph()
dg.parse(data=dg_source, format="json-ld", publicID="https://science-on-schema.org/examples/data/")

conforms, report_graph, report_text = pyshacl.validate( dg, shacl_graph=sg)

printValidationResults(conforms, report_graph, report_text)

Conforms
--------
False

Report Text
-----------
Validation Report
Conforms: False
Results (1):
Constraint Violation in MinCountConstraintComponent (http://www.w3.org/ns/shacl#MinCountConstraintComponent):
	Severity: sh:Violation
	Source Shape: [ sh:message Literal("A dataset must have a name.", lang=en) ; sh:minCount Literal("1", datatype=xsd:integer) ; sh:path SO:name ]
	Focus Node: <https://science-on-schema.org/examples/data/incomplete-eg-02>
	Result Path: SO:name
	Message: A dataset must have a name.


Report Graph
------------
@prefix SO: <http://schema.org/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] a sh:ValidationReport ;
    sh:conforms false ;
    sh:failureCount 1 ;
    sh:result [ a sh:ValidationResult ;
            sh:focusNode <https://science-on-schema.org/examples/data/incomplete-eg-02> ;
            sh:resultMessage "A dataset must have a name."@en ;
            sh:resultPath SO:name ;
            sh:resultSeve

## Including ontology

The schema.org class hierarchy is expressed in the document: 

  `https://schema.org/version/latest/schema.jsonld`
  
Assert that instances of `SO:CreativeWork` or its subclasses must have a `name` property:

In [22]:
shacl_source = """
@prefix SO: <http://schema.org/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix SOSO: <http://science-on-schema.org/examples/shacl/> .

SOSO:incomplete-eg-02-CreativeWork
    a sh:NodeShape ;
    sh:targetClass SO:CreativeWork ;

    sh:property [
        sh:path SO:name ;
        sh:minCount 1 ;
        sh:message "A CreativeWork must have a name."@en ;
    ] .
"""

dg_source = """
{
    "@context":[
      "https://schema.org/docs/jsonldcontext.jsonld",
      {
          "SO":"http://schema.org/"
      }
    ],
    "@id":"incomplete-eg-01",
    "@type":"SO:Dataset",    
    "SO:name":"Incomplete example 01"
}
"""
# load the shapes graph
sg = rdflib.ConjunctiveGraph()
sg.parse(data=shacl_source, format="turtle", publicID="https://science-on-schema.org/examples/shacl/")

# Load the data graph
dg = rdflib.ConjunctiveGraph()
dg.parse(data=dg_source, format="json-ld", publicID="https://science-on-schema.org/examples/data/")

# Download and load the schema.org Ontology
ont_source = "https://schema.org/version/latest/schema.jsonld"
og = rdflib.ConjunctiveGraph()
og.parse(ont_source, format="json-ld")

conforms, report_graph, report_text = pyshacl.validate( dg, shacl_graph=sg, ont_graph=og, inference="both")

printValidationResults(conforms, report_graph, report_text)

Conforms
--------
True

Report Text
-----------
Validation Report
Conforms: True


Report Graph
------------
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] a sh:ValidationReport ;
    sh:conforms true ;
    sh:failureCount 0 ;
    sh:shapeCount 2 ;
    sh:shapesApplied 2 .



