In [63]:
from owlrl import DeductiveClosure, OWLRL_Semantics
from rdflib import *
from rdflib.extras.infixowl import Restriction

In [111]:
g = Graph()
g.parse(r'../ontologies/ontology_turtle.ttl')
# g.parse(r'../ontologies/BIGOWLData.owl')
# g.parse(r'../ontologies/BIGOWLProblems.owl')
# g.parse(r'../ontologies/BIGOWLAlgorithms.owl')
# g.parse(r'../ontologies/BIGOWLWorkflows.owl')
# g.parse(r'../dataset_annotator/penguins_annotated.ttl')
# g.parse(r'../dataset_annotator/titanic_annotated.ttl')

bigowl_data = Namespace('https://w3id.org/BIGOWLData/')
bigowl_algorithms = Namespace('https://w3id.org/BIGOWLAlgorithms/')
bigowl_problems = Namespace('https://w3id.org/BIGOWLProblems/')
bigowl_workflows = Namespace('https://w3id.org/BIGOWLWorkflows/')
do = Namespace('https://diviloper.dev/ontology#')
abox = Namespace('https://diviloper.dev/ABox#')
dmop = Namespace('http://www.e-lico.eu/ontologies/dmo/DMOP/DMOP.owl#')

In [112]:
# DataSet Format
g.add((do.fileFormat, RDF.type, OWL.DatatypeProperty))
g.add((do.fileFormat, RDFS.domain, bigowl_data.DataSet))
g.add((do.fileFormat, RDFS.range, XSD.string))

g.add((do.Import_CSV, do.fileFormat, Literal('csv')))

<Graph identifier=Nabfc29fb97cd4c479cfb633900d500da (<class 'rdflib.graph.Graph'>)>

In [113]:
# Parameter Value
g.add((do.ParameterValue, RDF.type, OWL.Class))
g.add((do.forParameter, RDF.type, OWL.ObjectProperty))
g.add((do.forParameter, RDFS.domain, do.ParameterValue))
g.add((do.forParameter, RDFS.range, dmop.Parameter))
g.add((do.hasValue, RDF.type, OWL.DatatypeProperty))
g.add((do.hasValue, RDFS.domain, do.ParameterValue))
g.add((do.hasParameterValue, RDF.type, OWL.ObjectProperty))
g.add((do.hasParameterValue, RDFS.range, dmop.Parameter))

<Graph identifier=Nabfc29fb97cd4c479cfb633900d500da (<class 'rdflib.graph.Graph'>)>

In [114]:
# Tabular Dataset with Unique Columns
g.add((do.UniqueTabularDataset, RDFS.subClassOf, do.TabularDataset))
Restriction(do.isUnique, value=Literal('true', datatype=XSD.boolean), graph=g, identifier=do.UniqueColumnRestriction)
Restriction(bigowl_data.hasColumn, someValuesFrom=do.UniqueColumnRestriction, graph=g, identifier=do.UniqueTabularDatasetRestriction)
g.add((do.UniqueTabularDataset, OWL.equivalentClass, do.UniqueTabularDatasetRestriction))

<Graph identifier=Nabfc29fb97cd4c479cfb633900d500da (<class 'rdflib.graph.Graph'>)>

In [115]:
# Tabular Dataset with Categorical Columns
g.add((do.CategoricalTabularDataset, RDFS.subClassOf, do.TabularDataset))
Restriction(do.isCategorical, value=Literal('true', datatype=XSD.boolean), graph=g, identifier=do.CategoricalColumnRestriction)
Restriction(bigowl_data.hasColumn, someValuesFrom=do.CategoricalColumnRestriction, graph=g, identifier=do.CategoricalTabularDatasetRestriction)
g.add((do.CategoricalTabularDataset, OWL.equivalentClass, do.CategoricalTabularDatasetRestriction))

<Graph identifier=Nabfc29fb97cd4c479cfb633900d500da (<class 'rdflib.graph.Graph'>)>

In [116]:
# Input/Output specification of TabularAggregator
g.add((do.TabularAggregator, bigowl_workflows.specifiesInputClass, do.TabularDataset))
g.add((do.TabularAggregator, bigowl_workflows.specifiesOutputClass, do.UniqueTabularDataset))

<Graph identifier=Nabfc29fb97cd4c479cfb633900d500da (<class 'rdflib.graph.Graph'>)>

In [117]:
# Algorithms
g.add((do.ClusteringAlgorithm, RDFS.subClassOf, bigowl_algorithms.DataAnalysingAlgorithm))
g.add((do.KMeans, RDF.type, do.ClusteringAlgorithm))
g.add((do.KMeans, bigowl_algorithms.solves, do.Clustering))
g.add((do.DBSCAN, RDF.type, do.ClusteringAlgorithm))
g.add((do.DBSCAN, bigowl_algorithms.solves, do.Clustering))
g.add((do.MixtureGaussian, RDF.type, do.ClusteringAlgorithm))
g.add((do.MixtureGaussian, bigowl_algorithms.solves, do.Clustering))

g.add((do.Classification, do.subProblemOf, do.Describe))
g.add((do.ClassificationAlgorithms, RDFS.subClassOf, bigowl_algorithms.DataAnalysingAlgorithm))
g.add((do.KNN, RDF.type, do.ClassificationAlgorithms))
g.add((do.KNN, bigowl_algorithms.solves, do.Classification))
g.add((do.DecisionTree, RDF.type, do.ClassificationAlgorithms))
g.add((do.DecisionTree, bigowl_algorithms.solves, do.Classification))
g.add((do.RandomForest, RDF.type, do.ClassificationAlgorithms))
g.add((do.RandomForest, bigowl_algorithms.solves, do.Classification))
g.add((do.LogisticRegression, RDF.type, do.ClassificationAlgorithms))
g.add((do.LogisticRegression, bigowl_algorithms.solves, do.Classification))

<Graph identifier=Nabfc29fb97cd4c479cfb633900d500da (<class 'rdflib.graph.Graph'>)>

In [87]:
# SubComponent Property
g.add((do.subComponentOf, RDF.type, OWL.ObjectProperty))
g.add((do.subComponentOf, RDFS.domain, bigowl_workflows.Component))
g.add((do.subComponentOf, RDFS.range, bigowl_workflows.Component))

g.add((do.subComponentConnectedTo, RDF.type, OWL.ObjectProperty))
g.add((do.subComponentConnectedTo, RDFS.domain, bigowl_workflows.Component))
g.add((do.subComponentConnectedTo, RDFS.range, bigowl_workflows.Component))

<Graph identifier=N0a5e04f630f44559ae5812e038beda4f (<class 'rdflib.graph.Graph'>)>

In [None]:
# KNN Component
g.add((do.KNNComponent, RDF.type, bigowl_workflows.MachineLearning))
g.add((do.KNNComponent, bigowl_workflows.numberOfInputs, Literal(2)))
g.add((do.KNNComponent, bigowl_workflows.numberOfOutputs, Literal(3)))
g.add((do.KNNComponent, bigowl_workflows.specifiesInputClass, do.TrainingDataset))
g.add((do.KNNComponent, bigowl_workflows.specifiesInputClass, do.TestDataset))
g.add((do.KNNComponent, bigowl_workflows.specifiesOutputClass, do.PredictedDataset))
g.add((do.KNNComponent, bigowl_workflows.specifiesOutputClass, do.KNNModel))
g.add((do.KNNComponent, bigowl_workflows.specifiesOutputClass, do.ConfusionMatrix))

g.add((do.KNNTrainComponent, RDF.type, bigowl_workflows.MachineLearning))
g.add((do.KNNTrainComponent, do.subComponentOf, do.KNNComponent))
g.add((do.KNNTrainComponent, bigowl_workflows.numberOfInputs, Literal(1)))
g.add((do.KNNTrainComponent, bigowl_workflows.numberOfOutputs, Literal(1)))
g.add((do.KNNTrainComponent, bigowl_workflows.specifiesInputClass, do.TrainingDataset))
g.add((do.KNNTrainComponent, bigowl_workflows.specifiesOutputClass, do.KNNModel))

g.add((do.KNNPredictComponent, RDF.type, bigowl_workflows.MachineLearning))

In [118]:
# KMeans Component
g.add((do.KMeansComponent, RDF.type, bigowl_workflows.DataAnalysing))
g.add((do.KMeansComponent, bigowl_workflows.hasAlgorithm, do.KMeans))
g.add((do.KMeansComponent, bigowl_workflows.numberOfInputs, Literal(1)))
g.add((do.KMeansComponent, bigowl_workflows.numberOfOutputs, Literal(2)))
g.add((do.KMeansComponent, bigowl_workflows.specifiesInputClass, do.TabularDataset))
g.add((do.KMeansComponent, bigowl_workflows.specifiesOutputClass, do.ClusteredDataset))
g.add((do.KMeansComponent, bigowl_workflows.specifiesOutputClass, do.Centroids))

g.add((do.NumberOfClusters, RDF.type, dmop.Parameter))
g.add((do.NumberOfClusters, bigowl_workflows.hasName, Literal('Number of Clusters')))
g.add((do.NumberOfClusters, bigowl_workflows.hasDataType, bigowl_data.Integer))
g.add((do.NumberOfClusters, bigowl_workflows.hasDefaultValue, Literal(None)))

g.add((do.KMeansComponent, bigowl_workflows.hasParameter, do.NumberOfClusters))

<Graph identifier=Nabfc29fb97cd4c479cfb633900d500da (<class 'rdflib.graph.Graph'>)>

In [119]:
# DBSCAN Component
g.add((do.DBSCANComponent, RDF.type, bigowl_workflows.DataAnalysing))
g.add((do.DBSCANComponent, bigowl_workflows.hasAlgorithm, do.DBSCAN))
g.add((do.DBSCANComponent, bigowl_workflows.numberOfInputs, Literal(1)))
g.add((do.DBSCANComponent, bigowl_workflows.numberOfOutputs, Literal(1)))
g.add((do.DBSCANComponent, bigowl_workflows.specifiesInputClass, do.TabularDataset))
g.add((do.DBSCANComponent, bigowl_workflows.specifiesOutputClass, do.ClusteredDataset))

g.add((do.Epsilon, RDF.type, dmop.Parameter))
g.add((do.Epsilon, bigowl_workflows.hasName, Literal('Epsilon')))
g.add((do.Epsilon, bigowl_workflows.hasDataType, bigowl_data.Float))
g.add((do.Epsilon, bigowl_workflows.hasDefaultValue, Literal(0.5)))
g.add((do.minPoints, RDF.type, dmop.Parameter))
g.add((do.minPoints, bigowl_workflows.hasName, Literal('Minimum Points')))
g.add((do.minPoints, bigowl_workflows.hasDataType, bigowl_data.Integer))
g.add((do.minPoints, bigowl_workflows.hasDefaultValue, Literal(5)))

g.add((do.DBSCANComponent, bigowl_workflows.hasParameter, do.Epsilon))
g.add((do.DBSCANComponent, bigowl_workflows.hasParameter, do.minPoints))

<Graph identifier=Nabfc29fb97cd4c479cfb633900d500da (<class 'rdflib.graph.Graph'>)>

In [120]:
# Mixture of Gaussians Component
g.add((do.MixtureGaussianComponent, RDF.type, bigowl_workflows.DataAnalysing))
g.add((do.MixtureGaussianComponent, bigowl_workflows.hasAlgorithm, do.MixtureGaussian))
g.add((do.MixtureGaussianComponent, bigowl_workflows.numberOfInputs, Literal(1)))
g.add((do.MixtureGaussianComponent, bigowl_workflows.numberOfOutputs, Literal(2)))
g.add((do.MixtureGaussianComponent, bigowl_workflows.specifiesInputClass, do.TabularDataset))
g.add((do.MixtureGaussianComponent, bigowl_workflows.specifiesOutputClass, do.ClusteredDataset))
g.add((do.MixtureGaussianComponent, bigowl_workflows.specifiesOutputClass, do.Gaussians))

g.add((do.NumberOfClusters, RDF.type, dmop.Parameter))
g.add((do.NumberOfClusters, bigowl_workflows.hasName, Literal('Number of Clusters')))
g.add((do.NumberOfClusters, bigowl_workflows.hasDataType, bigowl_data.Integer))
g.add((do.NumberOfClusters, bigowl_workflows.hasDefaultValue, Literal(None)))

g.add((do.MixtureGaussianComponent, bigowl_workflows.hasParameter, do.NumberOfClusters))

<Graph identifier=Nabfc29fb97cd4c479cfb633900d500da (<class 'rdflib.graph.Graph'>)>

In [121]:
# User Intent
g.add((do.overData, RDF.type, OWL.ObjectProperty))
g.add((do.overData, RDFS.domain, do.UserIntent))
g.add((do.overData, RDFS.range, do.Data))

<Graph identifier=Nabfc29fb97cd4c479cfb633900d500da (<class 'rdflib.graph.Graph'>)>

In [92]:
DeductiveClosure(OWLRL_Semantics).expand(g)

In [78]:
for s, p, o in g.triples((None, bigowl_algorithms.solves, None)):
    print(s, p, o)

https://diviloper.dev/ontology#ClusteringAlgorithm https://w3id.org/BIGOWLAlgorithms/solves https://diviloper.dev/ontology#Clustering


In [122]:
g.serialize(r'../ontologies/ontology_full.ttl')

<Graph identifier=Nabfc29fb97cd4c479cfb633900d500da (<class 'rdflib.graph.Graph'>)>