In [1]:
from rdflib import Graph, Namespace, BNode
from rdflib.namespace import RDF, RDFS, OWL
from owlrl import DeductiveClosure, OWLRL_Semantics
from intension import Intension
from tqdm import tqdm
import pandas as pd
import json, os, random

In [2]:
MODELS = [ 
    # { "model_name": "gpt-3.5-turbo", "batch_size": 50 },
    { "model_name": "gpt-4o-2024-05-13", "batch_size": 50 },
    { "model_name": "gpt-4-0125-preview", "batch_size": 50 },
    { "model_name": "mistralai/Mistral-7B-Instruct-v0.3", "batch_size": 50 },
    { "model_name": "claude-3-5-sonnet-20240620", "batch_size": 1 },
    { "model_name": "mistralai/Mixtral-8x7B-Instruct-v0.1", "batch_size": 50 },
    { "model_name": "claude-3-opus-20240229", "batch_size": 1 },
    # { "model_name": "meta-llama/Meta-Llama-3-70B-Instruct", "batch_size": 50 },
    { "model_name": "claude-3-haiku-20240307", "batch_size": 1 },
]

In [3]:
TEST_TRIPLES_SAMPLE_SIZE = 10

In [4]:
def is_not_blank_node_triple(triple):
    s, p, o = triple
    return not isinstance(s, BNode) and not isinstance(o, BNode)

In [5]:
# Define vrd: namespace
VRD = Namespace("http://www.semanticweb.org/nesy4vrd/ontologies/vrd_world#")

# Get Turtle serialization of vrd_world_v1.owl
VRD_WORLD_OWL = open("data/NeSy4VRD/nesy4vrd_ontology/vrd_world_v1.owl", "r").read()

# Create a new graph
graph = Graph()
graph.bind("vrd", VRD)
graph.bind("rdf", RDF)
graph.bind("rdfs", RDFS)
graph.bind("owl", OWL)
graph.parse("data/NeSy4VRD/nesy4vrd_ontology/vrd_world_v1.owl", format='turtle')

# Create another graph to store deductive closure
closure = Graph()
graph.bind("vrd", VRD)
closure.bind("rdf", RDF)
closure.bind("rdfs", RDFS)
closure.bind("owl", OWL)
closure += graph
DeductiveClosure(OWLRL_Semantics).expand(closure)

# Create a graph to store set difference between closure and graph (i.e. the inferred triples)
inferred = Graph()
graph.bind("vrd", VRD)
inferred.bind("rdf", RDF)
inferred.bind("rdfs", RDFS)
inferred.bind("owl", OWL)
inferred += (closure - graph)

# Sample from the set of inferred triples, filtering out triples with blank nodes
inferred_triples = list(filter(is_not_blank_node_triple, inferred))
if TEST_TRIPLES_SAMPLE_SIZE < len(inferred_triples):
    inferred_triples = random.sample(inferred_triples, TEST_TRIPLES_SAMPLE_SIZE)

# Create a graph to store the sample
test = Graph()
test.bind("vrd", VRD)
test.bind("rdf", RDF)
test.bind("rdfs", RDFS)
test.bind("owl", OWL)
for triple in inferred_triples:
    s, p, o = triple
    if not isinstance(s, BNode) and not isinstance(o, BNode):
        test.add(triple)

# Print test triples
for i, (s, p, o) in enumerate(test.triples((None, None, None))):
    print(f'{i:2d}: <{s}>, <{p}>, <{o}>')


 0: <http://www.semanticweb.org/nesy4vrd/ontologies/vrd_world#Truck>, <http://www.w3.org/2000/01/rdf-schema#subClassOf>, <http://www.semanticweb.org/nesy4vrd/ontologies/vrd_world#MotionCapableThing>
 1: <http://www.semanticweb.org/nesy4vrd/ontologies/vrd_world#ProtectiveHeadClothing>, <http://www.w3.org/2000/01/rdf-schema#subClassOf>, <http://www.semanticweb.org/nesy4vrd/ontologies/vrd_world#ProtectiveHeadClothing>
 2: <http://www.semanticweb.org/nesy4vrd/ontologies/vrd_world#feed>, <http://www.w3.org/2000/01/rdf-schema#domain>, <http://www.w3.org/2002/07/owl#Thing>
 3: <http://www.semanticweb.org/nesy4vrd/ontologies/vrd_world#HomoPart>, <http://www.w3.org/2002/07/owl#equivalentClass>, <http://www.semanticweb.org/nesy4vrd/ontologies/vrd_world#HomoPart>
 4: <http://www.semanticweb.org/nesy4vrd/ontologies/vrd_world#use>, <http://www.w3.org/2000/01/rdf-schema#domain>, <http://www.semanticweb.org/nesy4vrd/ontologies/vrd_world#NaturalEnvironmentThing>
 5: <http://www.semanticweb.org/nesy4vr

In [6]:
queries = [ { "s": str(s), "p": str(p), "o": str(o), "graph": VRD_WORLD_OWL } for s, p, o in test.triples((None, None, None)) ]

In [7]:
for model in MODELS:
    filename = f'experiments/nesy4vrd/{model["model_name"].split("/")[-1]}-owl-inf.json'
    if os.path.isfile(filename):
        print(f'{model["model_name"]:36}: EXISTS')
    else:
        results = []
        batches = [ queries[i:i+model["batch_size"]] for i in range(0, len(queries), model["batch_size"]) ] 
        intension = Intension(model=model["model_name"])
        for batch in tqdm(batches, desc=f'{model["model_name"]:36}', total=len(batches)):
            response = intension.chain.batch(batch)
            for i, result in enumerate(response):
                result["model"] = model["model_name"]
                result["rationale"] = result["text"]["rationale"]
                result["answer"] = result["text"]["answer"]
                result.pop("text")
            results.extend(response)
        json.dump(results, open(filename, "w+"))

  warn_deprecated(
gpt-4o-2024-05-13                   : 100%|██████████| 1/1 [00:13<00:00, 13.18s/it]
gpt-4-0125-preview                  : 100%|██████████| 1/1 [00:27<00:00, 27.35s/it]


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /Users/bradleyallen/.cache/huggingface/token
Login successful


mistralai/Mistral-7B-Instruct-v0.3  : 100%|██████████| 1/1 [01:09<00:00, 69.55s/it]
claude-3-5-sonnet-20240620          : 100%|██████████| 10/10 [01:24<00:00,  8.40s/it]


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /Users/bradleyallen/.cache/huggingface/token
Login successful


mistralai/Mixtral-8x7B-Instruct-v0.1: 100%|██████████| 1/1 [00:50<00:00, 50.96s/it]
claude-3-opus-20240229              : 100%|██████████| 10/10 [02:58<00:00, 17.85s/it]
claude-3-haiku-20240307             : 100%|██████████| 10/10 [00:30<00:00,  3.09s/it]
