In [1]:
from pathlib import Path
r30Path = Path("../master-database-files/master-thesis/r30-evaluation/referenced_papers/parameters.yaml")
ontologyTargetPath = Path("../master-database-files/master-experimental/populate_knowledge_graph_with_r30/ontology.ttl")
ontologyTargetPath.parent.mkdir(parents=True, exist_ok=True)
ontologySourcePath = Path("mlPhysOnto.ttl")

In [2]:
# Load the r30 yaml file
import yaml
r30Values = yaml.safe_load(r30Path.read_text()) 

In [3]:
from rdflib import Graph, Namespace, Literal, URIRef

In [35]:
graph = Graph()
graph.parse(str(ontologySourcePath), format="turtle")
r30ns = Namespace("http://quantsimulant.de/rdf/r30#")
mlpns = Namespace("http://quantsimulant.de/owl/mlPhysOnto#")
graph.bind("r30", r30ns)

In [36]:
def getPaperURI(paperString):
    return paperString.split(" ")[1]
def getPaperIdentifier(paperURI):
    return paperURI.split("/")[-1].replace(".", "-")
def addNode(name, type):
    graph.add((r30ns[name], URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), mlpns[type]))
    graph.add((r30ns[name], URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef("http://www.w3.org/2002/07/owl#NamedIndividual")))
def addObjectProperty(node1, relation, node2):
    graph.add((r30ns[node1], mlpns[relation], r30ns[node2]))
def addDataProperty(node, relation, value):
    graph.add((r30ns[node], mlpns[relation], Literal(value)))

In [37]:
paperSet = set()

addNode("PhysicsBasedTopQuarkClassification", "PhysicsBasedSignalValueClassificationFunction")

for modelName, content in r30Values.items():
    for property, values in content.items():
        for value, papers in values.items():
            for paper in papers:
                paperSet.add(getPaperURI(paper))
for paper in paperSet:
    addNode("paper-" + getPaperIdentifier(paper), "Paper")
    addDataProperty("paper-" + getPaperIdentifier(paper), "hasPaperURI", paper)


for modelName, content in r30Values.items():
    modelName = modelName.replace(".", "-")
    tmpf = "TrainedMLMParameterFormat-" + modelName
    tmp = "TrainedMLMParameters-" + modelName
    tsvcm = "TrainedSignalValueClassificationMLM-" + modelName
    svca = "SignalValueClassificationApproximation-" + modelName
    r30e = "R30Evaluation-" + modelName
    d = "Data-" + modelName
    n = "Name-" + modelName
    addNode(tmpf, "TrainedMLMParameterFormat")
    addNode(tmp, "TrainedMLMParameters")
    addNode(tsvcm, "TrainedSignalValueClassificationMLM")
    addNode(svca, "SignalValueClassificationApproximation")
    addNode(r30e, "R30Evaluation")
    addNode(d, "Data")
    addNode(n, "Name")
    addDataProperty(n, "hasValue", modelName)
    addObjectProperty(tmp, "hasTrainedMLMParametersFormat", tmpf)
    addObjectProperty(tsvcm, "hasTrainedMLMParameters", tmp)
    addObjectProperty(svca, "functionUsedForSignalValueClassification", tsvcm)
    addObjectProperty(r30e, "r30EvaluationOf", svca)
    addObjectProperty(r30e, "datasetUsedForR30Evaluation", d)
    addObjectProperty(svca, "targetSignalValueClassificationFunction", "PhysicsBasedTopQuarkClassification")
    addObjectProperty(tsvcm, "hasName", n)
    r30 = content.get("r30", {})
    param = content.get("param", {})
    for value, papers in r30.items():
        r30v = "R30Value-" + modelName + "-" + str(value).replace(".", "-")
        addNode(r30v, "R30Value")
        addObjectProperty(r30e, "hasR30Value", r30v)
        addDataProperty(r30v, "hasValue", value)
        for paper in papers:
            addObjectProperty(r30v, "mentionedInPaper", "paper-" + getPaperIdentifier(getPaperURI(paper)))
            addObjectProperty(n, "mentionedInPaper", "paper-" + getPaperIdentifier(getPaperURI(paper)))
    for value, papers in param.items():
        paramv = "Parameter-" + modelName + "-" + str(value).replace(".", "-")
        addNode(paramv, "ParameterCount")
        addObjectProperty(tmpf, "hasParameterCount", paramv)
        addDataProperty(paramv, "hasValue", value)
        for paper in papers:
            addObjectProperty(paramv, "mentionedInPaper", "paper-" + getPaperIdentifier(getPaperURI(paper)))
            addObjectProperty(n, "mentionedInPaper", "paper-" + getPaperIdentifier(getPaperURI(paper)))

In [38]:
# Save the graph
graph.serialize(str(ontologyTargetPath), format="turtle")

<Graph identifier=N1f0e69fed0be473daa40f678f7ee7d91 (<class 'rdflib.graph.Graph'>)>