In [1]:
from rdflib.collection import Collection

from common import *

## Helper Functions

In [2]:
def add_class(graph, nodes):
    l = nodes if isinstance(nodes, list) else [nodes]
    for node in l:
        graph.add((node, RDF.type, OWL.Class))

In [3]:
def add_union(graph, nodes):
    sequence = Collection(graph, BNode(), nodes)
    union = BNode()
    graph.add((union, OWL.unionOf, sequence.uri))
    return union

In [4]:
def add_object_property(graph, property, domain, range):
    graph.add((property, RDF.type, OWL.ObjectProperty))
    if domain:
        graph.add((property, RDFS.domain, add_union(graph, domain) if isinstance(domain, list) else domain))
    if range:
        graph.add((property, RDFS.range, add_union(graph, range) if isinstance(range, list) else range))


def add_datatype_property(graph, property, domain, range):
    if isinstance(range, list):
        for r in range:
            assert r in XSD
    else:
        assert range in XSD
    graph.add((property, RDF.type, OWL.DatatypeProperty))
    if domain:
        graph.add((property, RDFS.domain, add_union(graph, domain) if isinstance(domain, list) else domain))
    if range:
        graph.add((property, RDFS.range, add_union(graph, range) if isinstance(range, list) else range))


def add_property(graph, property, domain, range):
    graph.add((property, RDF.type, RDF.Property))
    if domain:
        graph.add((property, RDFS.domain, add_union(graph, domain) if isinstance(domain, list) else domain))
    if range:
        graph.add((property, RDFS.range, add_union(graph, range) if isinstance(range, list) else range))

# Ontology Building

In [5]:
ontology = get_graph_xp()

## Classes

In [6]:
classes = [
    dtbox.Data,
    dtbox.Intent,
    dtbox.Problem,
    dtbox.Algorithm,
    dtbox.Workflow,
    dtbox.DataTag,
    dtbox.Step,
    dtbox.Component,
    dtbox.LearnerComponent,
    dtbox.ApplierComponent,
    dtbox.Implementation,
    dtbox.LearnerImplementation,
    dtbox.ApplierImplementation,
    dtbox.Parameter,
    dtbox.ParameterValue,
    dtbox.Transformation,
    dtbox.CopyTransformation,
    dtbox.LoaderTransformation,
    dtbox.IOSpec,
    dtbox.IO,
]
add_class(ontology, classes)

In [7]:
ontology.add((dtbox.CopyTransformation, RDFS.subClassOf, dtbox.Transformation))
ontology.add((dtbox.LoaderTransformation, RDFS.subClassOf, dtbox.Transformation))

ontology.add((dtbox.LearnerImplementation, RDFS.subClassOf, dtbox.Implementation))
ontology.add((dtbox.ApplierImplementation, RDFS.subClassOf, dtbox.Implementation))
ontology.add((dtbox.LearnerImplementation, OWL.disjointWith, dtbox.ApplierImplementation))

ontology.add((dtbox.LearnerComponent, RDFS.subClassOf, dtbox.Component))
ontology.add((dtbox.ApplierComponent, RDFS.subClassOf, dtbox.Component))
ontology.add((dtbox.LearnerComponent, OWL.disjointWith, dtbox.ApplierComponent))

<Graph identifier=Nbb76eb4ea02f4f859bb3d4910e4b418c (<class 'rdflib.graph.Graph'>)>

## Object Properties

In [8]:
properties = [
    # Intent
    (dtbox.overData, dtbox.Intent, dtbox.Data),
    (dtbox.tackles, dtbox.Intent, [dtbox.Problem, dtbox.Algorithm]),
    (dtbox.usingParameter, dtbox.Intent, [dtbox.Parameter, dtbox.ParameterValue]),
    (dtbox.createdFor, dtbox.Workflow, dtbox.Intent),
    # Problem
    (dtbox.subProblemOf, dtbox.Problem, dtbox.Problem),
    (dtbox.solves, [dtbox.Algorithm, dtbox.Workflow], dtbox.Problem),
    # Workflow
    (dtbox.applies, dtbox.Workflow, dtbox.Algorithm),
    (dtbox.hasStep, dtbox.Workflow, dtbox.Step),
    # Workflow / Implementation
    (dtbox.hasParameter, [dtbox.Workflow, dtbox.Implementation], dtbox.Parameter),
    (dtbox.specifiesInput, [dtbox.Workflow, dtbox.Implementation], dtbox.IOSpec),
    (dtbox.specifiesOutput, [dtbox.Workflow, dtbox.Implementation], dtbox.IOSpec),
    # Implementation
    (dtbox.hasParameter, dtbox.Implementation, dtbox.Parameter),
    (dtbox.hasLearner, dtbox.ApplierImplementation, dtbox.LearnerImplementation),
    (dtbox.hasApplier, dtbox.LearnerImplementation, dtbox.ApplierImplementation),
    # Component
    (dtbox.hasTransformation, dtbox.Component, RDF.List),
    (dtbox.hasImplementation, dtbox.Component, dtbox.Implementation),
    (dtbox.overridesParameter, dtbox.Component, dtbox.ParameterValue),
    (dtbox.exposesParameter, dtbox.Component, dtbox.Parameter),
    (dtbox.hasLearner, dtbox.ApplierComponent, dtbox.LearnerComponent),
    (dtbox.hasApplier, dtbox.LearnerComponent, dtbox.ApplierComponent),
    # Step
    (dtbox.followedBy, dtbox.Step, dtbox.Step),
    (dtbox.runs, dtbox.Step, [dtbox.Workflow, dtbox.Implementation]),
    (dtbox.hasParameterValue, dtbox.Step, dtbox.ParameterValue),
    (dtbox.hasInput, dtbox.Step, dtbox.IO),
    (dtbox.hasOutput, dtbox.Step, dtbox.IO),
    # Parameter
    (dtbox.forParameter, dtbox.ParameterValue, dtbox.Parameter),
    (dtbox.hasDatatype, dtbox.Parameter, None),
    (dtbox.hasDefaultValue, dtbox.Parameter, None),
    # Data
    # (dtbox.conformsTo, dtbox.Data, dtbox.DataTag),
    # IOSpec
    (dtbox.hasTag, dtbox.IOSpec, dtbox.DataTag),
    # IO
    (dtbox.hasData, dtbox.IOSpec, dtbox.Data),
]
for s, p, o in properties:
    add_object_property(ontology, s, p, o)

In [9]:
ontology.add((dtbox.subProblemOf, RDF.type, OWL.TransitiveProperty))

<Graph identifier=Nbb76eb4ea02f4f859bb3d4910e4b418c (<class 'rdflib.graph.Graph'>)>

## Datatype Properties

In [10]:
dproperties = [
    # Transformation
    (dtbox.copy_input, dtbox.CopyTransformation, XSD.integer),
    (dtbox.copy_output, dtbox.CopyTransformation, XSD.integer),
    (dtbox.transformation_language, dtbox.Transformation, XSD.string),
    (dtbox.transformation_query, dtbox.Transformation, XSD.string),
    # IO
    (dtbox.has_position, [dtbox.IO, dtbox.IOSpec, dtbox.Step, dtbox.Parameter], XSD.integer),
]

for s, p, o in dproperties:
    add_datatype_property(ontology, s, p, o)

## Open Properties (no Range or Domain)

In [11]:
oproperties = [
    (dtbox.has_value, dtbox.ParameterValue, None),
]

for s, p, o in oproperties:
    add_property(ontology, s, p, o)

## Data Properties
These properties are used temporarily to store information that belongs to the ontology used for data representation

In [12]:
properties = [
    # Column
    (dmop.hasColumnName, dmop.ColumnInfoProperty),
    (dmop.hasDataPrimitiveTypeColumn, dmop.ColumnInfoProperty),
    (dmop.hasPosition, dmop.ColumnInfoProperty),
    (dmop.isCategorical, dmop.ColumnInfoProperty),
    (dmop.isFeature, dmop.ColumnInfoProperty),
    (dmop.isLabel, dmop.ColumnInfoProperty),
    (dmop.isUnique, dmop.ColumnInfoProperty),
    (dmop.containsNulls, dmop.ColumnValueInfoProperty),
    (dmop.hasMeanValue, dmop.ColumnValueInfoProperty),
    (dmop.hasStandardDeviation, dmop.ColumnValueInfoProperty),
    (dmop.hasMaxValue, dmop.ColumnValueInfoProperty),
    (dmop.hasMinValue, dmop.ColumnValueInfoProperty),

    # Dataset
    (dmop.delimiter, dmop.DatasetPhysicalProperty),
    (dmop.doubleQuote, dmop.DatasetPhysicalProperty),
    (dmop.encoding, dmop.DatasetPhysicalProperty),
    (dmop.fileFormat, dmop.DatasetPhysicalProperty),
    (dmop.hasHeader, dmop.DatasetPhysicalProperty),
    (dmop.isNormalized, dmop.DatasetValueInfoProperty),
    (dmop.lineDelimiter, dmop.DatasetPhysicalProperty),
    (dmop.numberOfColumns, dmop.DatasetInfoProperty),
    (dmop.numberOfRows, dmop.DatasetInfoProperty),
    (dmop.path, dmop.DatasetPhysicalProperty),
    (dmop.quoteChar, dmop.DatasetPhysicalProperty),
    (dmop.skipInitialSpace, dmop.DatasetPhysicalProperty),
]

for s, o in properties:
    ontology.add((s, RDFS.subPropertyOf, o))

## Store

In [13]:
ontology.serialize('../ontologies/tbox.ttl', format='turtle')

<Graph identifier=Nbb76eb4ea02f4f859bb3d4910e4b418c (<class 'rdflib.graph.Graph'>)>

## Shape

In [14]:
# Define the SHACL shape
g = get_graph_xp()

shape_node = tb.IntentShape
g.add((shape_node, RDF.type, SH.NodeShape))
g.add((shape_node, SH.targetClass, tb.Intent))

# Add constraints for linking to tb:Data with tb:overData property
g.add((shape_node, SH.property, tb.overDataConstraint))
g.add((tb.overDataConstraint, SH.path, tb.overData))
g.add((tb.overDataConstraint, SH['class'], tb.Data))
g.add((tb.overDataConstraint, SH.minCount, Literal(1)))
g.add((tb.overDataConstraint, SH.maxCount, Literal(1)))

# Add constraints for linking to tb:Problem or tb:Algorithm
g.add((shape_node, SH.property, tb.problemOrAlgorithmConstraint))
g.add((tb.problemOrAlgorithmConstraint, SH.path, tb.tackles))
g.add((tb.problemOrAlgorithmConstraint, SH['class'], add_union(g, [tb.Problem, tb.Algorithm])))
g.add((tb.problemOrAlgorithmConstraint, SH.minCount, Literal(1)))
g.add((tb.problemOrAlgorithmConstraint, SH.maxCount, Literal(1)))

print(g.serialize(format="turtle"))

@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix tb: <https://extremexp.eu/ontology/tbox#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

tb:IntentShape a sh:NodeShape ;
    sh:property tb:overDataConstraint,
        tb:problemOrAlgorithmConstraint ;
    sh:targetClass tb:Intent .

tb:overDataConstraint sh:class tb:Data ;
    sh:maxCount 1 ;
    sh:minCount 1 ;
    sh:path tb:overData .

tb:problemOrAlgorithmConstraint sh:class [ owl:unionOf ( tb:Problem tb:Algorithm ) ] ;
    sh:maxCount 1 ;
    sh:minCount 1 ;
    sh:path tb:tackles .

