In [1]:
from rdflib import Graph, Namespace, URIRef, Literal, BNode
from rdflib.namespace import RDF, RDFS, OWL
from owlrl import DeductiveClosure, OWLRL_Semantics
from intension import Intension
from tqdm import tqdm
import json, os, random

In [2]:
TRAIN_TRIPLES_SAMPLE_SIZE = 100
TEST_TRIPLES_SAMPLE_SIZE = 100

In [3]:
def pp_node(graph, node):
    if isinstance(node, URIRef):
        return graph.namespace_manager.normalizeUri(node)
    elif isinstance(node, Literal):
        return node.n3()
    else:
        return str(node)

In [4]:
def is_testable_triple(triple):
    s, _, o = triple
    return isinstance(s, URIRef) and not isinstance(o, BNode)

In [5]:
# Define vrd: namespace
VRD = Namespace("http://www.semanticweb.org/nesy4vrd/ontologies/vrd_world#")

# Get Turtle serialization of vrd_world_v1.owl
VRD_WORLD_OWL = open("data/NeSy4VRD/nesy4vrd_ontology/vrd_world_v1.owl", "r").read()

# Create a new graph
graph = Graph()
graph.bind("vrd", VRD)
graph.bind("rdf", RDF)
graph.bind("rdfs", RDFS)
graph.bind("owl", OWL)
graph.parse("data/NeSy4VRD/nesy4vrd_ontology/vrd_world_v1.owl", format='turtle')

# Create another graph to store deductive closure
closure = Graph()
graph.bind("vrd", VRD)
closure.bind("rdf", RDF)
closure.bind("rdfs", RDFS)
closure.bind("owl", OWL)
closure += graph
DeductiveClosure(OWLRL_Semantics).expand(closure)

# Create a graph to store set difference between closure and graph (i.e. the inferred triples)
inferred = Graph()
graph.bind("vrd", VRD)
inferred.bind("rdf", RDF)
inferred.bind("rdfs", RDFS)
inferred.bind("owl", OWL)
inferred += (closure - graph)

# Sample from the set of inferred triples, including triples with 
# IRIs as subjects and without blank nodes
inferred_triples = list(filter(is_testable_triple, inferred))
sample_size = TRAIN_TRIPLES_SAMPLE_SIZE + TEST_TRIPLES_SAMPLE_SIZE
if sample_size < len(inferred_triples):
    inferred_triples = random.sample(inferred_triples, sample_size)

# Create a graph to store the sample
test = Graph()
test.bind("vrd", VRD)
test.bind("rdf", RDF)
test.bind("rdfs", RDFS)
test.bind("owl", OWL)
for triple in inferred_triples:
    test.add(triple)

# Print triple sample
for i, (s, p, o) in enumerate(test.triples((None, None, None))):
    print(f'{i:2d}: {pp_node(test, s)} {pp_node(test, p)} {pp_node(test, o)} .')


 0: vrd:Faucet rdfs:subClassOf vrd:Device .
 1: vrd:Trees rdfs:subClassOf vrd:Trees .
 2: vrd:Perissodactyla rdfs:subClassOf vrd:NaturalEnvironmentThing .
 3: owl:Nothing rdfs:subClassOf vrd:BoatMotor .
 4: owl:Nothing rdfs:subClassOf vrd:Ramp .
 5: vrd:drive rdfs:domain vrd:Mammal .
 6: vrd:TimeKeepingDevice owl:equivalentClass vrd:TimeKeepingDevice .
 7: rdf:langString rdf:type rdfs:Datatype .
 8: vrd:CarryCapableMammal rdfs:subClassOf vrd:CarryCapableThing .
 9: vrd:Proboscidea owl:sameAs vrd:Proboscidea .
10: xsd:nonPositiveInteger rdf:type rdfs:Datatype .
11: rdf:PlainLiteral owl:sameAs rdf:PlainLiteral .
12: vrd:MixedSportingGood owl:equivalentClass vrd:MixedSportingGood .
13: vrd:Snowboard owl:sameAs vrd:Snowboard .
14: vrd:skateOn rdfs:domain vrd:Homo .
15: vrd:Animal rdfs:subClassOf vrd:NaturalEnvironmentEarthBoundThing .
16: vrd:Snowboard owl:equivalentClass vrd:Snowboard .
17: vrd:playWith owl:equivalentProperty vrd:playWith .
18: <http://www.semanticweb.org/nesy4vrd/ontolog

In [6]:
queries = [
    {
        "s": pp_node(test, s),
        "p": pp_node(test, p),
        "o": pp_node(test, o),
        "graph": VRD_WORLD_OWL
    }
    for s, p, o in test.triples((None, None, None))
]

In [7]:
json.dump(queries[:TRAIN_TRIPLES_SAMPLE_SIZE], open('data/experiment_nesy4vrd_test_set.json', "w+"))
json.dump(queries[TRAIN_TRIPLES_SAMPLE_SIZE:], open('data/experiment_nesy4vrd_train_set.json', 'w+'))