In [14]:
from pyshacl import validate

f = open('shacl.ttl')
shapes_file = f.read()
shapes_file_format = 'turtle'

data_file = '''{"@type":"bio:DataCatalog"}'''
data_file_format = 'json-ld'



In [15]:
conforms, v_graph, v_text = validate(data_file, shacl_graph=shapes_file,
                                     data_graph_format=data_file_format,
                                     shacl_graph_format=shapes_file_format,
                                     inference='rdfs', debug=True,
                                     serialize_report_graph=True)
print(conforms)
print(v_graph)
print(v_text)

ShapeLoadError: A Shape-Expecting & List-Expecting predicate should get a well-formed RDF list with 1 or more members.
https://www.w3.org/TR/shacl/#shapes-recursion

In [11]:
import rdflib


g = rdflib.Graph()
g.parse("bioschemas_shacl.json", format="json-ld",publicID = "http://bioschemas.org/specifications/")

<Graph identifier=N27087c401c234625a10c63c277e1f5e5 (<class 'rdflib.graph.Graph'>)>

In [12]:
import pprint
for stmt in g:
    pprint.pprint(stmt)

(rdflib.term.BNode('Nf61d2116d5684d83bd08b988276a1af1'),
 rdflib.term.URIRef('http://www.w3.org/ns/shacl#severity'),
(rdflib.term.URIRef('http://bioschemas.org/specifications/EventShape'),
 rdflib.term.URIRef('http://www.w3.org/ns/shacl#property'),
 rdflib.term.BNode('N0f24aa8de4ef46e3b3273cff3ce9e3a4'))
(rdflib.term.BNode('N6376130065df4d89b96419b93dd7252e'),
 rdflib.term.URIRef('http://www.w3.org/ns/shacl#path'),
 rdflib.term.Literal('schema:maximumAttendeeCapacity'))
(rdflib.term.BNode('Nc4e0a11224614779a864f32da67ae58b'),
 rdflib.term.URIRef('http://www.w3.org/ns/shacl#path'),
 rdflib.term.Literal('schema:eventType'))
(rdflib.term.URIRef('http://bioschemas.org/specifications/DatasetShape'),
 rdflib.term.URIRef('http://www.w3.org/ns/shacl#property'),
 rdflib.term.BNode('N70bb0552afbc4d1eac363cda8d7dfebb'))
(rdflib.term.BNode('Nd0525804ada743afbaa49226f65cbaa8'),
 rdflib.term.URIRef('http://www.w3.org/ns/shacl#or'),
 rdflib.term.BNode('N18cde36aeeb44cddbe41c119ea957184'))
(rdflib.ter

In [13]:
g.serialize(destination='shacl.ttl', format='turtle')

In [2]:
import yaml
import requests
import json
import rdflib

def read_in_spec(type):

    if not isinstance(type,str):
        return("Type must be a string.")

    url = 'https://raw.githubusercontent.com/BioSchemas/specifications/master/'+type+'/specification.html'

    req = requests.get(url)
    print(type)
    document = req.content.decode().split('/n')[-1]

    document = document[:document.rfind('\n')]

    specs = yaml.load(document)

    return(specs)


specs = []
types = ['DataCatalog','Dataset','Event','Sample',\
         'Taxon','TrainingMaterial']

for type in types:

    spec = read_in_spec(type)
    specs.append(spec)

shacl_rules = {
    "@context": {
        "rdf":  "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
        "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
        "sh":   "http://www.w3.org/ns/shacl#",
        "xsd":  "http://www.w3.org/2001/XMLSchema#",
        "schema": "http://schema.org/",
        "bio":"http://bioschemas.org/specifications/"
    },
    "sh:shapeGraph":[]
  }

for i in range(len(specs)):

    spec = specs[i]
    type = types[i]

    specGraph = {
        '@id':type + "Shape",
        "@type":"sh:NodeShape",
        "sh:targetClass":"bio:"+type,
        "sh:property":[]
    }

    if not spec.get('properties'):
        continue

    for property in spec.get('properties'):

        prop_dict = {
            "sh:path":"schema:"+property.get('name')
        }

        severity = property.get('marginality')

        if severity == 'Minimum':
            prop_dict['sh:severity'] = 'sh:Violation'
            prop_dict['sh:minCount'] = 1

        elif severity == 'Recommended':
            prop_dict['sh:severity'] = 'sh:Warning'

        cardinality = property.get('cardinality')

        if cardinality == 'ONE':
            prop_dict['sh:maxCount'] = 1

        expected_types = property.get('expected_type')

        if len(expected_types) > 1:

            prop_dict['sh:or'] = []

            for allowed in expected_types:

                prop_dict['sh:or'].append({'sh:class':'schema:' + allowed})

                if allowed == 'Text':
                    prop_dict['sh:or'].append({"sh:datatype": "xsd:string"})

                elif allowed == 'URL':
                    prop_dict['sh:or'].append({"sh:datatype": "xsd:anyURL"})
        else:
            prop_dict['sh:class'] = expected_types[0]

        specGraph['sh:property'].append(prop_dict)

    shacl_rules['sh:shapeGraph'].append(specGraph)


g = rdflib.Graph()
g.parse(json.dumps(shacl_rules), format="json-ld",publicID = "http://bioschemas.org/specifications/")


DataCatalog
Dataset
Event


file:///Users/justinniestroy-admin/Documents/Work/bioschemas-shacl-generator/{"@context": {"rdf": "http:/www.w3.org/1999/02/22-rdf-syntax-ns does not look like a valid URI, trying to serialize this will break.


Sample
Taxon
TrainingMaterial


FileNotFoundError: [Errno 2] No such file or directory: '/Users/justinniestroy-admin/Documents/Work/bioschemas-shacl-generator/{"@context": {"rdf": "http:/www.w3.org/1999/02/22-rdf-syntax-ns'