In [1]:
from common import *

In [2]:
abox = get_graph()

## Problems

In [3]:
problems = [
    da.Description,
    da.Explanation,
    da.Prediction,
    da.DataCleaning,
    da.DataManagement,

    da.Classification,
    da.Clustering,
    da.AnomalyDetection,

    da.MissingValueManagement,
    da.DuplicationRemoval,
    da.Normalization,
]
subproblems = [
    (da.Description, [da.Classification, da.Clustering, da.AnomalyDetection],),
    (da.DataCleaning, [da.MissingValueManagement, da.DuplicationRemoval, da.Normalization],),
]

for p in problems:
    abox.add((p, RDF.type, dtbox.Problem))

for p, sps in subproblems:
    for sp in sps:
        abox.add((sp, dtbox.subProblemOf, p))

## Algorithms

In [4]:
algorithms = [
    # Clustering
    (da.KMeans, da.Clustering),
    (da.DBSCAN, da.Clustering),
    (da.HierarchicalClustering, da.Clustering),

    # Classification
    (da.DecisionTree, da.Classification),
    (da.RandomForest, da.Classification),
    (da.NaiveBayes, da.Classification),
    (da.SVM, da.Classification),
    (da.KNN, da.Classification),

    # Anomaly Detection
    (da.OneClassSVM, da.AnomalyDetection),
    (da.IsolationForest, da.AnomalyDetection),
    (da.LocalOutlierFactor, da.AnomalyDetection),

    # Missing Value Management
    (da.MeanImputation, da.MissingValueManagement),
    (da.MedianImputation, da.MissingValueManagement),
    (da.ModeImputation, da.MissingValueManagement),
    (da.KNNImputation, da.MissingValueManagement),
    (da.MissingValueRemoval, da.MissingValueManagement),

    # Duplication Removal
    (da.DuplicateRemoval, da.DuplicationRemoval),

    # Normalization
    (da.MinMaxScaling, da.Normalization),
    (da.ZScoreScaling, da.Normalization),
    (da.RobustNormalization, da.Normalization),

    # Data Management
    (da.TrainTestSplit, da.DataManagement),
    (da.LabelExtraction, da.DataManagement),
]

for algorithm, problem in algorithms:
    abox.add((algorithm, RDF.type, dtbox.Algorithm))
    abox.add((algorithm, dtbox.solves, problem))

## Implementation

In [5]:
from implementations.knime import implementations, components

for implementation in implementations:
    print(f'Adding implementation {implementation.name}')
    implementation.add_to_graph(abox)

for implementation in implementations:
    implementation.add_counterpart_relationship(abox)

for component in components:
    print(f'Adding component {component.name}')
    component.add_to_graph(abox)

for component in components:
    component.add_counterpart_relationship(abox)

Adding implementation Partitioning
Adding implementation Decision Tree Learner
Adding implementation Decision Tree Predictor
Adding implementation Normalizer (PMML)
Adding implementation Normalizer Apply (PMML)
Adding implementation SVM Learner
Adding implementation SVM Predictor
Adding implementation Missing Value
Adding implementation Missing Value (Applier)
Adding component Random Relative Train-Test Split
Adding component Random Absolute Train-Test Split
Adding component Top K Relative Train-Test Split
Adding component Top K Absolute Train-Test Split
Adding component Decision Tree Learner
Adding component Decision Tree Predictor
Adding component Min-Max Scaling
Adding component Z-Score Scaling
Adding component Decimal Scaling
Adding component Normalizer Applier
Adding component Polynomial SVM Learner
Adding component HyperTangent SVM Learner
Adding component RBF SVM Learner
Adding component SVM Predictor
Adding component Drop Rows with Missing Values
Adding component Mean Imputatio

## Models

In [6]:
models = [
    dd.SVMModel,
    dd.DecisionTreeModel,
    dd.NormalizerModel,
    dd.MissingValueModel,
]

abox.add((dd.Model, RDFS.subClassOf, dtbox.Data))
for model in models:
    abox.add((model, RDFS.subClassOf, dd.Model))

## Store

In [7]:
abox.serialize('../ontologies/abox.ttl', format='turtle')

<Graph identifier=N403c8bbf5e8f4cd6b1cf549b36ad025e (<class 'rdflib.graph.Graph'>)>