In [1]:
from common import *

In [2]:
abox = get_graph()

## Problems

In [3]:
problems = [
    da.Description,
    da.Explanation,
    da.Prediction,
    da.DataCleaning,
    da.DataManagement,

    da.Classification,
    da.Clustering,
    da.AnomalyDetection,

    da.MissingValueManagement,
    da.DuplicationRemoval,
    da.Normalization,
]
subproblems = [
    (da.Description, [da.Classification, da.Clustering, da.AnomalyDetection],),
    (da.DataCleaning, [da.MissingValueManagement, da.DuplicationRemoval, da.Normalization],),
]

for p in problems:
    abox.add((p, RDF.type, dtbox.Problem))

for p, sps in subproblems:
    for sp in sps:
        abox.add((sp, dtbox.subProblemOf, p))

## Algorithms

In [4]:
algorithms = [
    # Clustering
    (da.KMeans, da.Clustering),
    (da.DBSCAN, da.Clustering),
    (da.HierarchicalClustering, da.Clustering),

    # Classification
    (da.DecisionTree, da.Classification),
    (da.RandomForest, da.Classification),
    (da.NaiveBayes, da.Classification),
    (da.SVM, da.Classification),
    (da.KNN, da.Classification),

    # Anomaly Detection
    (da.OneClassSVM, da.AnomalyDetection),
    (da.IsolationForest, da.AnomalyDetection),
    (da.LocalOutlierFactor, da.AnomalyDetection),

    # Missing Value Management
    (da.MeanImputation, da.MissingValueManagement),
    (da.MedianImputation, da.MissingValueManagement),
    (da.ModeImputation, da.MissingValueManagement),
    (da.KNNImputation, da.MissingValueManagement),
    (da.MissingValueRemoval, da.MissingValueManagement),

    # Duplication Removal
    (da.DuplicateRemoval, da.DuplicationRemoval),

    # Normalization
    (da.MinMaxScaling, da.Normalization),
    (da.ZScoreScaling, da.Normalization),
    (da.RobustNormalization, da.Normalization),

    # Data Management
    (da.TrainTestSplit, da.DataManagement),
    (da.LabelExtraction, da.DataManagement),
]

for algorithm, problem in algorithms:
    abox.add((algorithm, RDF.type, dtbox.Algorithm))
    abox.add((algorithm, dtbox.solves, problem))

## Implementation

In [5]:
from implementations import implementations

for implementation in implementations:
    print(f'Adding implementation {implementation.name}')
    implementation.add_to_graph(abox)

for implementation in implementations:
    implementation.add_counterpart_relationship(abox)

Adding implementation Train-Test Split
Adding implementation Label Extraction
Adding implementation Min-Max Scaling
Adding implementation Min-Max Scaling Applier
Adding implementation Z-Score Scaling
Adding implementation Z-Score Scaling Applier
Adding implementation Decision Tree Learner
Adding implementation Decision Tree Predictor
Adding implementation SVM Learner
Adding implementation SVM Predictor


## Models

In [6]:
models = [
    dd.SVMModel,
    dd.DecisionTreeModel,
    dd.ZScoreScalerModel,
    dd.MinMaxScalerModel,
]

abox.add((dd.Model, RDFS.subClassOf, dtbox.Data))
for model in models:
    abox.add((model, RDFS.subClassOf, dd.Model))

## Store

In [7]:
abox.serialize('../ontologies/abox.ttl', format='turtle')

<Graph identifier=N4f5ca2a4f3044ec1bf5edd6bc412dfe0 (<class 'rdflib.graph.Graph'>)>