In [1]:
from typing import List, Tuple, Union

from pyshacl import validate
from rdflib import *
from rdflib.collection import Collection

In [2]:
ontology = Graph()
ontology.parse(r'../ontologies/ontology_full.ttl')

dmop = Namespace('http://www.e-lico.eu/ontologies/dmo/DMOP/DMOP.owl#')
bigowl_data = Namespace('https://w3id.org/BIGOWLData/')
bigowl_algorithms = Namespace('https://w3id.org/BIGOWLAlgorithms/')
bigowl_problems = Namespace('https://w3id.org/BIGOWLProblems/')
bigowl_workflows = Namespace('https://w3id.org/BIGOWLWorkflows/')
do = Namespace('https://diviloper.dev/ontology#')
dc = Namespace('https://diviloper.dev/ontology/components#')
dr = Namespace('https://diviloper.dev/ontology/restrictions#')
dru = Namespace('https://diviloper.dev/ontology/rules#')
da = Namespace('https://diviloper.dev/ABox#')


def get_graph():
    g = Graph()
    g.bind('dmop', dmop)
    g.bind('bod', bigowl_data)
    g.bind('boa', bigowl_algorithms)
    g.bind('bop', bigowl_problems)
    g.bind('bow', bigowl_workflows)
    g.bind('do', do)
    g.bind('dc', dc)
    g.bind('dr', dr)
    g.bind('da', da)
    return g

In [3]:
def get_subgraph(graph, node):
    subgraph = Graph()
    visited_nodes = set()
    to_visit = [node]
    mappings = {}

    while to_visit:
        current_node = to_visit.pop()
        visited_nodes.add(current_node)
        for s, p, o in graph.triples((current_node, None, None)):
            if (s, RDF.type, SH.NodeShape) in graph and s != node:
                if s not in mappings:
                    mappings[s] = BNode()
                s = mappings[s]

            subgraph.add((s, p, o))
            if o not in visited_nodes:
                to_visit.append(o)

    return subgraph

In [3]:
def add_component(component_graph, component, ctype, algorithm, inputs, outputs, parameters):
    component_graph.add((component, RDF.type, ctype))
    component_graph.add((component, bigowl_workflows.hasAlgorithm, algorithm))
    component_graph.add((component, bigowl_workflows.numberOfInputs, Literal(len(inputs))))
    component_graph.add((component, bigowl_workflows.numberOfOutputs, Literal(len(outputs))))
    for i in inputs:
        component_graph.add((component, bigowl_workflows.specifiesInputClass, i))
    for o in outputs:
        component_graph.add((component, bigowl_workflows.specifiesOutputClass, o))

    for param, pname, ptype, default in parameters:
        component_graph.add((param, RDF.type, dmop.Parameter))
        component_graph.add((component, bigowl_workflows.hasParameter, param))
        component_graph.add((param, bigowl_workflows.hasName, Literal(pname)))
        component_graph.add((param, bigowl_workflows.hasDataType, ptype))
        component_graph.add((param, bigowl_workflows.hasDefaultValue, Literal(default)))
        component_graph.add((component, bigowl_workflows.hasParameter, param))

In [4]:
def add_bnode(graph, triples: List[Tuple[URIRef, Union[URIRef, Literal]]]):
    bnode = BNode()
    for p, o in triples:
        graph.add((bnode, p, o))
    return bnode

In [10]:
restrictions = get_graph()

restrictions.add((dr.DataTag, RDF.type, OWL.Class))

restrictions.serialize('restrictions.ttl', format='ttl')

<Graph identifier=Ncc80f013c495430abf1c39eb86d29fe4 (<class 'rdflib.graph.Graph'>)>

In [11]:
# NonNullNumericFeatureColumnShape
column_shape = dr.NonNullNumericFeatureColumnShape
# column_shape = BNode()
restrictions.add((column_shape, RDF.type, SH.NodeShape))

numeric_column_property = dr.NumericColumnProperty
# numeric_column_property = BNode()
restrictions.add((numeric_column_property, SH.path, bigowl_data.hasDataPrimitiveTypeColumn))
restrictions.add((numeric_column_property, SH['in'],
                  Collection(restrictions, BNode(), seq=[bigowl_data.Integer, bigowl_data.Float]).uri))

non_null_column_property = dr.NonNullColumnProperty
# non_null_column_property = BNode()
restrictions.add((non_null_column_property, SH.path, do.containsNulls))
restrictions.add((non_null_column_property, SH.datatype, XSD.boolean))
restrictions.add((non_null_column_property, SH.hasValue, Literal(False)))

feature_column_property = dr.FeatureColumnProperty
# feature_column_property = BNode()
restrictions.add((feature_column_property, SH.path, do.isFeature))
restrictions.add((feature_column_property, SH.datatype, XSD.boolean))
restrictions.add((feature_column_property, SH.hasValue, Literal(True)))

feature_column = dr.FeatureColumnShape
# feature_column = BNode()
restrictions.add((feature_column, RDF.type, SH.NodeShape))
restrictions.add((feature_column, SH.targetClass, bigowl_data.Column))
restrictions.add((feature_column, SH.property, feature_column_property))

restrictions.add((column_shape, SH.property, numeric_column_property))
restrictions.add((column_shape, SH.property, non_null_column_property))
restrictions.add((column_shape, SH.targetClass, feature_column))

# NonNullNumericFeatureTabularDatasetShape
non_null_numeric_tabular_dataset_shape = dr.NonNullNumericFeatureTabularDatasetShape
restrictions.add((non_null_numeric_tabular_dataset_shape, RDF.type, SH.NodeShape))
restrictions.add((non_null_numeric_tabular_dataset_shape, SH.targetClass, do.TabularDataset))

bnode = BNode()
restrictions.add((bnode, SH.path, bigowl_data.hasColumn))
restrictions.add((bnode, SH.node, column_shape))

restrictions.add((non_null_numeric_tabular_dataset_shape, SH.property, bnode))

# LabeledTabularDatasetShape

label_column_property = dr.LabelColumnProperty
restrictions.add((label_column_property, SH.path, do.isLabel))
restrictions.add((label_column_property, SH.datatype, XSD.boolean))
restrictions.add((label_column_property, SH.hasValue, Literal(True)))

label_column_shape = dr.LabelColumnShape
restrictions.add((label_column_shape, RDF.type, SH.NodeShape))
restrictions.add((label_column_shape, SH.targetClass, bigowl_data.Column))
restrictions.add((label_column_shape, SH.property, label_column_property))

labeled_dataset_shape = dr.LabeledTabularDatasetShape
restrictions.add((labeled_dataset_shape, RDF.type, SH.NodeShape))
restrictions.add((labeled_dataset_shape, SH.targetClass, do.TabularDataset))

bnode_qualified = BNode()
restrictions.add((bnode_qualified, SH.path, do.isLabel))
restrictions.add((bnode_qualified, SH.hasValue, Literal(True)))

bnode_column = BNode()
restrictions.add((bnode_column, SH.path, bigowl_data.hasColumn))
restrictions.add((bnode_column, SH.qualifiedValueShape, bnode_qualified))
restrictions.add((bnode_column, SH.qualifiedMinCount, Literal(1)))
restrictions.add((bnode_column, SH.minCount, Literal(1)))

restrictions.add((labeled_dataset_shape, SH.property, bnode_column))

restrictions.serialize('restrictions.ttl', format='ttl')

<Graph identifier=Ncc80f013c495430abf1c39eb86d29fe4 (<class 'rdflib.graph.Graph'>)>

In [12]:
non_null_column_shape = dr.NonNullColumnShape
restrictions.add((non_null_column_shape, RDF.type, SH.NodeShape))
restrictions.add((non_null_column_shape, SH.targetClass, bigowl_data.Column))
restrictions.add((non_null_column_shape, SH.property, non_null_column_property))

bnode = BNode()
restrictions.add((bnode, SH.path, bigowl_data.hasColumn))
restrictions.add((bnode, SH.node, non_null_column_shape))

non_null_tabular_dataset_shape = dr.NonNullTabularDatasetShape
restrictions.add((non_null_tabular_dataset_shape, RDF.type, SH.NodeShape))
restrictions.add((non_null_tabular_dataset_shape, SH.targetClass, do.TabularDataset))
restrictions.add((non_null_tabular_dataset_shape, SH.property, bnode))

tabular_dataset_shape = dr.TabularDatasetShape
restrictions.add((tabular_dataset_shape, RDF.type, SH.NodeShape))
restrictions.add((tabular_dataset_shape, SH.targetClass, do.TabularDataset))

numeric_column_shape = dr.NumericColumnShape
restrictions.add((numeric_column_shape, RDF.type, SH.NodeShape))
restrictions.add((numeric_column_shape, SH.targetClass, bigowl_data.Column))
restrictions.add((numeric_column_shape, SH.property, numeric_column_property))

bnode = BNode()
restrictions.add((bnode, SH.path, bigowl_data.hasColumn))
restrictions.add((bnode, SH.node, numeric_column_shape))

numeric_tabular_dataset_shape = dr.NumericTabularDatasetShape
restrictions.add((numeric_tabular_dataset_shape, RDF.type, SH.NodeShape))
restrictions.add((numeric_tabular_dataset_shape, SH.targetClass, do.TabularDataset))
restrictions.add((numeric_tabular_dataset_shape, SH.property, bnode))

restrictions.serialize('restrictions.ttl', format='ttl')

<Graph identifier=Ncc80f013c495430abf1c39eb86d29fe4 (<class 'rdflib.graph.Graph'>)>

In [14]:
normalized_tabular_dataset_shape = dr.NormalizedTabularDatasetShape
restrictions.add((normalized_tabular_dataset_shape, RDF.type, SH.NodeShape))
restrictions.add((normalized_tabular_dataset_shape, RDF.type, dr.DataTag))
restrictions.add((normalized_tabular_dataset_shape, SH.targetClass, do.TabularDataset))

restrictions.serialize('restrictions.ttl', format='ttl')

<Graph identifier=Ncc80f013c495430abf1c39eb86d29fe4 (<class 'rdflib.graph.Graph'>)>

In [16]:
kmeans_input_shape = dr.KMeansInputShape
restrictions.add((kmeans_input_shape, RDF.type, SH.NodeShape))
restrictions.add((kmeans_input_shape, SH.targetClass, do.TabularDataset))

restrictions.add((kmeans_input_shape, SH['and'], Collection(restrictions, BNode(), seq=[non_null_tabular_dataset_shape, numeric_tabular_dataset_shape]).uri))

restrictions.serialize('restrictions.ttl', format='ttl')

<Graph identifier=Ncc80f013c495430abf1c39eb86d29fe4 (<class 'rdflib.graph.Graph'>)>

In [15]:
local_outlier_factor_input_shape = dr.LocalOutlierFactorInputShape
restrictions.add((local_outlier_factor_input_shape, RDF.type, SH.NodeShape))
restrictions.add((local_outlier_factor_input_shape, SH.targetClass, do.TabularDataset))

restrictions.add((local_outlier_factor_input_shape, SH['and'], Collection(restrictions, BNode(), seq=[non_null_tabular_dataset_shape, numeric_tabular_dataset_shape, normalized_tabular_dataset_shape]).uri))

restrictions.serialize('restrictions.ttl', format='ttl')

<Graph identifier=Ncc80f013c495430abf1c39eb86d29fe4 (<class 'rdflib.graph.Graph'>)>

In [65]:
# Load your RDF data file
data_graph = Graph()
data_graph.parse('../dataset_annotator/penguins_annotated.ttl', format='ttl')

conforms, result_graph, results = validate(data_graph, shacl_graph=restrictions,
                                           validate_shapes=[numeric_tabular_dataset_shape],
                                           focus=da.term('penguins.csv'))

print(results)

PreValidator
Validation Report
Conforms: False
Results (3):
Constraint Violation in NodeConstraintComponent (http://www.w3.org/ns/shacl#NodeConstraintComponent):
	Severity: sh:Violation
	Source Shape: [ sh:node dr:NumericColumnShape ; sh:path bod:hasColumn ]
	Focus Node: dab:penguins.csv
	Value Node: <https://diviloper.dev/ABox#penguins.csv/sex>
	Result Path: bod:hasColumn
	Message: Value does not conform to Shape dr:NumericColumnShape
Constraint Violation in NodeConstraintComponent (http://www.w3.org/ns/shacl#NodeConstraintComponent):
	Severity: sh:Violation
	Source Shape: [ sh:node dr:NumericColumnShape ; sh:path bod:hasColumn ]
	Focus Node: dab:penguins.csv
	Value Node: <https://diviloper.dev/ABox#penguins.csv/island>
	Result Path: bod:hasColumn
	Message: Value does not conform to Shape dr:NumericColumnShape
Constraint Violation in NodeConstraintComponent (http://www.w3.org/ns/shacl#NodeConstraintComponent):
	Severity: sh:Violation
	Source Shape: [ sh:node dr:NumericColumnShape ; sh

In [18]:
components = get_graph()

add_component(
    components,
    do.KMeansComponent,
    bigowl_workflows.DataAnalysing,
    do.KMeans,
    [kmeans_input_shape],
    [do.ClusteredDataset, do.Centroids],
    [(do.NumberOfClusters, Literal('Number of Clusters'), bigowl_data.Integer, Literal(3))])

add_component(
    components,
    do.DropNullsComponent,
    bigowl_workflows.DataProcessing,
    do.DropNulls,
    [tabular_dataset_shape],
    [non_null_tabular_dataset_shape],
    []
)

add_component(
    components,
    do.StandardizeComponent,
    bigowl_workflows.DataProcessing,
    do.Standardize,
    [numeric_tabular_dataset_shape],
    [normalized_tabular_dataset_shape],
    []
)

add_component(
    components,
    do.MinMaxScalerComponent,
    bigowl_workflows.DataProcessing,
    do.MinMaxScaler,
    [numeric_tabular_dataset_shape],
    [normalized_tabular_dataset_shape],
    []
)

add_component(
    components,
    do.RobustScalerComponent,
    bigowl_workflows.DataProcessing,
    do.RobustScaler,
    [numeric_tabular_dataset_shape],
    [normalized_tabular_dataset_shape],
    []
)

add_component(
    components,
    do.ConvertToNumericComponent,
    bigowl_workflows.DataProcessing,
    do.ConvertToNumeric,
    [tabular_dataset_shape],
    [numeric_tabular_dataset_shape],
    []
)

add_component(
    components,
    do.LocalOutlierFactorComponent,
    bigowl_workflows.DataAnalysing,
    do.LocalOutlierFactor,
    [local_outlier_factor_input_shape],
    [do.LabeledDataset],
    [(do.NumberOfNeighbors, Literal('Number of Neighbors'), bigowl_data.Integer, Literal(20)),
     (do.Contamination, Literal('Contamination'), bigowl_data.Float, Literal(0.1))]
)
components.serialize('components.ttl', format='ttl')

<Graph identifier=Nebb9a62205934be382d4b470874d80b9 (<class 'rdflib.graph.Graph'>)>

In [None]:
input_graph = get_graph()
output_graph = get_graph()

input_dataset = da.term('penguins.csv')
output_dataset = da.term('penguins_non_null.csv')

for s, p, o in input_graph.triples((input_dataset, bigowl_data.hasColumn, None)):
    output_graph.add((output_dataset, bigowl_data.hasColumn, o))
    for s2, p2, o2 in input_graph.triples((o, None, None)):
        if p2 == do.containsNulls:
            output_graph.add((o, p2, Literal(False)))
        else:
            output_graph.add((o, p2, o2))

## Workflow examples

In [90]:
workflow_graph = get_graph()
workflow = da.term('workflow_test')
workflow_graph.add((workflow, RDF.type, bigowl_workflows.Workflow))

load_task = da.term('load_task')
workflow_graph.add((load_task, RDF.type, bigowl_workflows.Task))
workflow_graph.add((load_task, bigowl_workflows.hasComponent, do.Import_CSV))
workflow_graph.add((load_task, bigowl_workflows.order, Literal(1)))
workflow_graph.add((load_task, bigowl_workflows.hasOutput, da.term('penguins.csv')))
workflow_graph.add((load_task, do.hasParameterValue,
                    add_bnode(workflow_graph, [
                        (do.forParameter, do.URL_Param),
                        (do.hasValue, Literal(r'/workflows/test/penguins.csv'))])))

drop_nulls_task = da.term('drop_nulls_task')
mid_dataset = add_bnode(workflow_graph, [(RDF.type, do.TabularDataset), (do.conformsTo, non_null_tabular_dataset_shape)])
workflow_graph.add((drop_nulls_task, RDF.type, bigowl_workflows.Task))
workflow_graph.add((drop_nulls_task, bigowl_workflows.hasComponent, do.DropNullsComponent))
workflow_graph.add((drop_nulls_task, bigowl_workflows.order, Literal(2)))
workflow_graph.add((drop_nulls_task, bigowl_workflows.hasInput, da.term('penguins.csv')))
workflow_graph.add((drop_nulls_task, bigowl_workflows.hasOutput, mid_dataset))
workflow_graph.add((drop_nulls_task, do.hasParameterValue,
                    add_bnode(workflow_graph, [
                        (do.forParameter, do.dropNullsMethod),
                        (do.hasValue, Literal('drop_rows'))])))

convert_to_numeric_task = da.term('convert_to_numeric_task')
mid_dataset2 = add_bnode(workflow_graph, [(RDF.type, do.TabularDataset), (do.conformsTo, numeric_tabular_dataset_shape), (do.conformsTo, non_null_tabular_dataset_shape)])
workflow_graph.add((convert_to_numeric_task, RDF.type, bigowl_workflows.Task))
workflow_graph.add((convert_to_numeric_task, bigowl_workflows.hasComponent, do.ConvertToNumericComponent))
workflow_graph.add((convert_to_numeric_task, bigowl_workflows.order, Literal(3)))
workflow_graph.add((convert_to_numeric_task, bigowl_workflows.hasInput, mid_dataset))
workflow_graph.add((convert_to_numeric_task, bigowl_workflows.hasOutput, mid_dataset2))

kmeans_task = da.term('kmeans_task')
final_dataset = add_bnode(workflow_graph, [(RDF.type, do.TabularDataset), (do.conformsTo, labeled_dataset_shape)])
clusters = add_bnode(workflow_graph, [(RDF.type, do.TabularDataset), (do.conformsTo, labeled_dataset_shape)])

workflow_graph.add((kmeans_task, RDF.type, bigowl_workflows.Task))
workflow_graph.add((kmeans_task, bigowl_workflows.hasComponent, do.KMeansComponent))
workflow_graph.add((kmeans_task, bigowl_workflows.order, Literal(4)))
workflow_graph.add((kmeans_task, bigowl_workflows.hasInput, mid_dataset2))
workflow_graph.add((kmeans_task, bigowl_workflows.hasOutput, final_dataset))
workflow_graph.add((kmeans_task, bigowl_workflows.hasOutput, clusters))
workflow_graph.add((kmeans_task, do.hasParameterValue,
                    add_bnode(workflow_graph, [
                        (do.forParameter, do.NumberOfClusters),
                        (do.hasValue, Literal(3))])))

store_as_csv_task = da.term('store_as_csv_task')
workflow_graph.add((store_as_csv_task, RDF.type, bigowl_workflows.Task))
workflow_graph.add((store_as_csv_task, bigowl_workflows.hasComponent, do.Export_CSV))
workflow_graph.add((store_as_csv_task, bigowl_workflows.order, Literal(5)))
workflow_graph.add((store_as_csv_task, bigowl_workflows.hasInput, final_dataset))
workflow_graph.add((store_as_csv_task, do.hasParameterValue,
                    add_bnode(workflow_graph, [
                        (do.forParameter, do.URL_Param),
                        (do.hasValue, Literal(r'/workflows/test/clustered_penguins.csv'))])))


workflow_graph.add((workflow, bigowl_workflows.hasTask, load_task))
workflow_graph.add((workflow, bigowl_workflows.hasTask, drop_nulls_task))
workflow_graph.add((workflow, bigowl_workflows.hasTask, convert_to_numeric_task))
workflow_graph.add((workflow, bigowl_workflows.hasTask, kmeans_task))

workflow_graph.add((load_task, do.connectedTo, drop_nulls_task))
workflow_graph.add((drop_nulls_task, do.connectedTo, convert_to_numeric_task))
workflow_graph.add((convert_to_numeric_task, do.connectedTo, kmeans_task))

workflow_graph.serialize('workflow_test.ttl', format='ttl')

<Graph identifier=N1ad235b07f45405a9a8882690f4ca151 (<class 'rdflib.graph.Graph'>)>

In [22]:
workflow_graph = get_graph()


workflow = da.term('workflow_test')
workflow_graph.add((workflow, RDF.type, bigowl_workflows.Workflow))


load_task = da.term('load_task')
workflow_graph.add((load_task, bigowl_workflows.hasComponent, do.Import_CSV))

drop_nulls_task = da.term('drop_nulls_task')
workflow_graph.add((drop_nulls_task, bigowl_workflows.hasComponent, do.DropNullRowsComponent))

convert_to_numeric_task = da.term('convert_to_numeric_task')
workflow_graph.add((convert_to_numeric_task, bigowl_workflows.hasComponent, do.OneHotEncoderComponent))

robust_scaler_task = da.term('robust_scaler_task')
workflow_graph.add((robust_scaler_task, bigowl_workflows.hasComponent, do.RobustScalerComponent))

local_outlier_factor_task = da.term('local_outlier_factor_task')
workflow_graph.add((local_outlier_factor_task, bigowl_workflows.hasComponent, do.LocalOutlierFactorComponent))

store_as_csv_task = da.term('store_as_csv_task')
workflow_graph.add((store_as_csv_task, bigowl_workflows.hasComponent, do.Export_CSV))

workflow_graph.add((workflow, bigowl_workflows.hasTask, load_task))
workflow_graph.add((workflow, bigowl_workflows.hasTask, drop_nulls_task))
workflow_graph.add((workflow, bigowl_workflows.hasTask, convert_to_numeric_task))
workflow_graph.add((workflow, bigowl_workflows.hasTask, robust_scaler_task))
workflow_graph.add((workflow, bigowl_workflows.hasTask, local_outlier_factor_task))
workflow_graph.add((workflow, bigowl_workflows.hasTask, store_as_csv_task))

workflow_graph.add((load_task, do.connectedTo, drop_nulls_task))
workflow_graph.add((drop_nulls_task, do.connectedTo, convert_to_numeric_task))
workflow_graph.add((convert_to_numeric_task, do.connectedTo, robust_scaler_task))
workflow_graph.add((robust_scaler_task, do.connectedTo, local_outlier_factor_task))
workflow_graph.add((local_outlier_factor_task, do.connectedTo, store_as_csv_task))

workflow_graph.serialize('workflow_test1.ttl', format='ttl')

<Graph identifier=Nfa894e17782548a3b36ad57c0ced26de (<class 'rdflib.graph.Graph'>)>

In [21]:
workflow_graph = get_graph()


workflow = da.term('workflow_test')
workflow_graph.add((workflow, RDF.type, bigowl_workflows.Workflow))


load_task = da.term('load_task')
workflow_graph.add((load_task, bigowl_workflows.hasComponent, do.Import_CSV))

drop_nulls_task = da.term('drop_nulls_task')
workflow_graph.add((drop_nulls_task, bigowl_workflows.hasComponent, do.MeanImputationComponent))

convert_to_numeric_task = da.term('convert_to_numeric_task')
workflow_graph.add((convert_to_numeric_task, bigowl_workflows.hasComponent, do.LabelEncoderComponent))

robust_scaler_task = da.term('robust_scaler_task')
workflow_graph.add((robust_scaler_task, bigowl_workflows.hasComponent, do.RobustScalerComponent))

local_outlier_factor_task = da.term('local_outlier_factor_task')
workflow_graph.add((local_outlier_factor_task, bigowl_workflows.hasComponent, do.LocalOutlierFactorComponent))

store_as_csv_task = da.term('store_as_csv_task')
workflow_graph.add((store_as_csv_task, bigowl_workflows.hasComponent, do.Export_CSV))

workflow_graph.add((workflow, bigowl_workflows.hasTask, load_task))
workflow_graph.add((workflow, bigowl_workflows.hasTask, drop_nulls_task))
workflow_graph.add((workflow, bigowl_workflows.hasTask, convert_to_numeric_task))
workflow_graph.add((workflow, bigowl_workflows.hasTask, robust_scaler_task))
workflow_graph.add((workflow, bigowl_workflows.hasTask, local_outlier_factor_task))
workflow_graph.add((workflow, bigowl_workflows.hasTask, store_as_csv_task))

workflow_graph.add((load_task, do.connectedTo, drop_nulls_task))
workflow_graph.add((drop_nulls_task, do.connectedTo, convert_to_numeric_task))
workflow_graph.add((convert_to_numeric_task, do.connectedTo, robust_scaler_task))
workflow_graph.add((robust_scaler_task, do.connectedTo, local_outlier_factor_task))
workflow_graph.add((local_outlier_factor_task, do.connectedTo, store_as_csv_task))

workflow_graph.serialize('workflow_test2.ttl', format='ttl')

<Graph identifier=N2142e13a19a74f7c93156ff057f2eb7b (<class 'rdflib.graph.Graph'>)>

# Test with SHACL Rules

In [None]:
rules = get_graph()

drop_nulls_rule = dru.term('drop_nulls_rule')
rules.add((drop_nulls_rule, RDF.type, SH.Rule))
rules.add((drop_nulls_rule, SH.targetClass, bigowl_workflows.Task))
rules.add((drop_nulls_rule, SH.condition, add_bnode(rules, [
    (SH.property, bigowl_workflows.hasComponent),
    (SH.value, do.DropNullsComponent),
    (SH.minCount, Literal(1)),
    (SH.maxCount, Literal(1)),
    (SH.message, Literal('DropNullsComponent must have exactly one input'))
])))