In [1]:
from rdflib import Graph, Namespace, URIRef, Literal, BNode
from rdflib.namespace import RDF, RDFS, OWL
from owlrl import DeductiveClosure, OWLRL_Semantics
from intension import Intension
from tqdm import tqdm
import json, os, random

In [2]:
MODELS = [ 
    # { "model_name": "gpt-3.5-turbo", "batch_size": 50 },
    { "model_name": "gpt-4o-2024-05-13", "batch_size": 50 },
    { "model_name": "gpt-4o-mini-2024-07-18", "batch_size": 50 },
    # { "model_name": "gpt-4-0125-preview", "batch_size": 50 },
    { "model_name": "mistralai/Mistral-7B-Instruct-v0.3", "batch_size": 50 },
    # { "model_name": "claude-3-5-sonnet-20240620", "batch_size": 1 },
    # { "model_name": "mistralai/Mixtral-8x7B-Instruct-v0.1", "batch_size": 50 },
    # { "model_name": "claude-3-opus-20240229", "batch_size": 1 },
    # { "model_name": "meta-llama/Meta-Llama-3-70B-Instruct", "batch_size": 50 },
    { "model_name": "claude-3-haiku-20240307", "batch_size": 1 },
]

In [3]:
TEST_TRIPLES_SAMPLE_SIZE = 100

In [4]:
def pp_node(graph, node):
    if isinstance(node, URIRef):
        return graph.namespace_manager.normalizeUri(node)
    elif isinstance(node, Literal):
        return node.n3()
    else:
        return str(node)

In [5]:
def is_testable_triple(triple):
    s, _, o = triple
    return isinstance(s, URIRef) and not isinstance(o, BNode)

In [6]:
# Define vrd: namespace
VRD = Namespace("http://www.semanticweb.org/nesy4vrd/ontologies/vrd_world#")

# Get Turtle serialization of vrd_world_v1.owl
VRD_WORLD_OWL = open("data/NeSy4VRD/nesy4vrd_ontology/vrd_world_v1.owl", "r").read()

# Create a new graph
graph = Graph()
graph.bind("vrd", VRD)
graph.bind("rdf", RDF)
graph.bind("rdfs", RDFS)
graph.bind("owl", OWL)
graph.parse("data/NeSy4VRD/nesy4vrd_ontology/vrd_world_v1.owl", format='turtle')

# Create another graph to store deductive closure
closure = Graph()
graph.bind("vrd", VRD)
closure.bind("rdf", RDF)
closure.bind("rdfs", RDFS)
closure.bind("owl", OWL)
closure += graph
DeductiveClosure(OWLRL_Semantics).expand(closure)

# Create a graph to store set difference between closure and graph (i.e. the inferred triples)
inferred = Graph()
graph.bind("vrd", VRD)
inferred.bind("rdf", RDF)
inferred.bind("rdfs", RDFS)
inferred.bind("owl", OWL)
inferred += (closure - graph)

# Sample from the set of inferred triples, filtering out triples with blank nodes
inferred_triples = list(filter(is_testable_triple, inferred))
if TEST_TRIPLES_SAMPLE_SIZE < len(inferred_triples):
    inferred_triples = random.sample(inferred_triples, TEST_TRIPLES_SAMPLE_SIZE)

# Create a graph to store the sample
test = Graph()
test.bind("vrd", VRD)
test.bind("rdf", RDF)
test.bind("rdfs", RDFS)
test.bind("owl", OWL)
for triple in inferred_triples:
    test.add(triple)

# Print test triples
for i, (s, p, o) in enumerate(test.triples((None, None, None))):
    print(f'{i:2d}: {pp_node(test, s)} {pp_node(test, p)} {pp_node(test, o)} .')


 0: vrd:onTheRightOf rdfs:subPropertyOf vrd:onTheRightOf .
 1: vrd:Pants rdfs:subClassOf vrd:EngineeredEnvironmentThing .
 2: vrd:TorsoArmsClothing rdfs:subClassOf vrd:Clothing .
 3: vrd:fly rdfs:range vrd:PlayWithCapableThing .
 4: vrd:ComputingDevice owl:equivalentClass vrd:ComputingDevice .
 5: vrd:ProtectiveDevice rdfs:subClassOf vrd:ProtectiveDevice .
 6: vrd:CookingPot owl:sameAs vrd:CookingPot .
 7: vrd:WasteBin rdfs:subClassOf owl:Thing .
 8: vrd:Cabinet rdfs:subClassOf owl:Thing .
 9: vrd:Image rdfs:subClassOf owl:Thing .
10: vrd:Person rdfs:subClassOf owl:Thing .
11: vrd:Engine owl:sameAs vrd:Engine .
12: vrd:Kite rdfs:subClassOf owl:Thing .
13: vrd:Cart owl:sameAs vrd:Cart .
14: vrd:feed rdfs:range owl:Thing .
15: vrd:eat rdfs:domain vrd:Animal .
16: vrd:NaturalLandscapeFeature owl:equivalentClass vrd:NaturalLandscapeFeature .
17: vrd:HelmetCase owl:sameAs vrd:HelmetCase .
18: vrd:use rdfs:range vrd:EngineeredEnvironmentThing .
19: vrd:RidableThing rdfs:subClassOf vrd:Ridabl

In [7]:
queries = [
    {
        "s": pp_node(test, s),
        "p": pp_node(test, p),
        "o": pp_node(test, o),
        "graph": VRD_WORLD_OWL
    }
    for s, p, o in test.triples((None, None, None))
]

In [8]:
for model in MODELS:
    filename = f'experiments/nesy4vrd/{model["model_name"].split("/")[-1]}-owl-inf.json'
    if os.path.isfile(filename):
        print(f'{model["model_name"]:36}: EXISTS')
    else:
        results = []
        batches = [ queries[i:i+model["batch_size"]] for i in range(0, len(queries), model["batch_size"]) ] 
        intension = Intension(model=model["model_name"])
        for batch in tqdm(batches, desc=f'{model["model_name"]:36}', total=len(batches)):
            response = intension.chain.batch(batch)
            for i, result in enumerate(response):
                result["model"] = model["model_name"]
                result["rationale"] = result["text"]["rationale"]
                result["answer"] = result["text"]["answer"]
                result.pop("text")
                result.pop("graph")
            results.extend(response)
        json.dump(results, open(filename, "w+"))

  warn_deprecated(
gpt-4o-2024-05-13                   : 100%|██████████| 2/2 [01:21<00:00, 40.65s/it]
gpt-4o-mini-2024-07-18              : 100%|██████████| 2/2 [01:05<00:00, 32.91s/it]
claude-3-haiku-20240307             : 100%|██████████| 100/100 [05:11<00:00,  3.11s/it]
