# Evaluating identifier

The identifier property is a string, URI, or PropertyValue.

A Dataset identifier property that is an instance of PropertyValue must have properties:

* propertyID
* value
* url

In [65]:
import rdflib
import pyshacl

def validate(data_graph, shacl_graph, ont_graph=None, advanced=True, inference="rdfs"):
    return pyshacl.validate(data_graph, shacl_graph=shacl_graph, ont_graph=ont_graph, advanced=advanced, inference=inference)

def validateAndPrint(data_graph, shacl_graph, ont_graph=None, advanced=True, inference="rdfs"):
    def printMsg(msg, v):
      print(f"{msg}\n{'-'*len(msg)}\n{v}\n")
    
    # Apply the SHACL rules
    conforms, report_graph, report_text = validate(data_graph, shacl_graph=shacl_graph, ont_graph=ont_graph, advanced=advanced, inference=inference)
    printMsg("Conforms", conforms)
    printMsg("Report Text", report_text)
    printMsg("Report Graph", report_graph.serialize(format="turtle", indent=2).decode())
    return conforms, report_graph, report_text

dg01_src = """
{
  "@context": {
    "@vocab": "http://schema.org/"
  },
  "@graph": [
  {"@id": "test01",
  "@type": "Dataset",
  "name": "Test 01",
  "description": "Testing identifier structure",
  "url": "https://www.sample-data-repository.org/dataset/472032",
  "sameAs": "https://search.dataone.org/#view/https://www.sample-data-repository.org/dataset/472032",
  "version": "2013-11-21",
  "keywords": ["ocean acidification", "Dissolved Organic Carbon", "bacterioplankton respiration", "pCO2", "carbon dioxide", "oceans"],
  "identifier":
      {
        "@id": "https://doi.org/10.5066/F7VX0DMQ",
        "@type": "PropertyValue",
        "propertyID": "https://registry.identifiers.org/registry/doi",
        "valuex": "doi:10.5066/F7VX0DMQ",
        "url": "https://doi.org/10.5066/F7VX0DMQ"
      }
   }
   ]
}
"""

dg = rdflib.ConjunctiveGraph()
dg.parse(data=dg01_src, format="json-ld")

<Graph identifier=Nebdba439c7a94402a835a30403ee4457 (<class 'rdflib.graph.Graph'>)>

In [68]:
sg01_src = '''
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix SO: <http://schema.org/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix soso: <http://science-on-schema.org/1.2.1/validation/shacl#> .
@prefix datacite: <http://purl.org/spar/datacite/> .


soso:IDShape
    a sh:NodeShape ;
    sh:targetClass SO:Dataset ;
    sh:message "Dataset must have an ID"@en ;
    sh:description "https://github.com/ESIPFed/science-on-schema.org/blob/1.1.0/guides/Dataset.md#common-properties" ;
    sh:nodeKind sh:IRI ;
    .
    
SO:Dataset
    a rdfs:Class ;
    a sh:NodeShape ;
    #sh:targetClass SO:Dataset ; #Implied, https://www.w3.org/TR/shacl/#implicit-targetClass
    sh:message "Dataset validation"@en ;
    sh:description "https://github.com/ESIPFed/science-on-schema.org/blob/1.1.0/guides/Dataset.md#common-properties" ;
    sh:property 
        SO:Dataset-identifier ;
.

SO:Dataset-identifier
    a sh:PropertyShape ;
    sh:path SO:identifier;
    sh:minCount 1 ;
    sh:or (
        [
            sh:nodeKind sh:Literal ;
        ]
        [
            sh:class SO:URL ;
        ]
        [
            sh:node soso:PVI ;
        ]
    );
    sh:message "Dataset identifiers must be a URL, Text or PropertyValue"@en ;
    sh:description "https://github.com/ESIPFed/science-on-schema.org/blob/1.1.0/guides/Dataset.md#identifier" ;    
.

soso:PVI
    a sh:NodeShape ;
    sh:property [
        sh:path SO:value ;
        sh:minCount 1 ;
        sh:message "PropertyValue as Dataset identifier instance must have a value" ;
    ] ;
    sh:property [
        sh:path SO:propertyID ;
        sh:minCount 1 ;
    ] ;
    sh:property [
        sh:path SO:url ;
        sh:minCount 1 ;
    ] ;
    sh:nodeKind sh:IRI ;    
    sh:class SO:PropertyValue ;
.


soso:rdfPrefix
  sh:declare [
    sh:namespace "http://www.w3.org/1999/02/22-rdf-syntax-ns#"^^xsd:anyURI ;
    sh:prefix "rdf" ;
  ] .

soso:schemaPrefix
  sh:declare [
    sh:namespace "http://schema.org/"^^xsd:anyURI ;
    sh:prefix "SO" ;
  ] .


soso:PropertyValue-identifier
    a sh:NodeShape ;
    sh:target [
        a sh:SPARQLTarget ;
        sh:prefixes soso:rdfPrefix, soso:schemaPrefix ;
        sh:select """
            SELECT ?this
            WHERE {
                ?DS rdf:type SO:Dataset .
                ?DS SO:identifier ?this .
                ?this rdf:type SO:PropertyValue .
            }
        """
    ] ;
    sh:property [
        sh:path SO:value ;
        sh:minCount 1 ;
        sh:message "PropertyValue as Dataset identifier instance must have a value" ;
    ] ;
    sh:property [
        sh:path SO:propertyID ;
        sh:minCount 1 ;
    ] ;
    sh:property [
        sh:path SO:url ;
        sh:minCount 1 ;
    ] ;
    sh:nodeKind sh:IRI ;    
.

'''
sg = rdflib.ConjunctiveGraph()
sg.parse(data=sg01_src, format="turtle")

res = validateAndPrint(dg, sg)


Conforms
--------
False

Report Text
-----------
Validation Report
Conforms: False
Results (2):
Constraint Violation in OrConstraintComponent (http://www.w3.org/ns/shacl#OrConstraintComponent):
	Severity: sh:Violation
	Source Shape: SO:Dataset-identifier
	Focus Node: <file:///Users/vieglais/Documents/Projects/2020-SOSOV/git/shacl-pad/test01>
	Value Node: <https://doi.org/10.5066/F7VX0DMQ>
	Result Path: SO:identifier
	Message: Dataset identifiers must be a URL, Text or PropertyValue
Constraint Violation in MinCountConstraintComponent (http://www.w3.org/ns/shacl#MinCountConstraintComponent):
	Severity: sh:Violation
	Source Shape: [ sh:message Literal("PropertyValue as Dataset identifier instance must have a value") ; sh:minCount Literal("1", datatype=xsd:integer) ; sh:path SO:value ]
	Focus Node: <https://doi.org/10.5066/F7VX0DMQ>
	Result Path: SO:value
	Message: PropertyValue as Dataset identifier instance must have a value


Report Graph
------------
@prefix SO: <http://schema.org/> .
