In [9]:
import os
import json
import random

from rdflib import Graph
from openai import OpenAI
from dotenv import load_dotenv

from constants import SPARQL_GENERATION_PROMPT, TEST_DATA_PATH, TRAINED_MODEL_ID_PATH


In [10]:
load_dotenv()

True

In [8]:
g = Graph()
ttl_file = f"{os.getcwd()}/iMKG.ttl"
g.parse(ttl_file, format="turtle")

<Graph identifier=Na29a73d7a56d4f70912336a0723f2c34 (<class 'rdflib.graph.Graph'>)>

In [11]:
client = OpenAI()
with open(TRAINED_MODEL_ID_PATH, "r", encoding="utf-8") as f:
    model_name = f.read()
f.close()
print(f"Model name: {model_name}")

Model name: ft:gpt-4o-2024-08-06:personal::AJwYBDYo


In [15]:
with open(TEST_DATA_PATH, "r", encoding="utf-8") as f:
    test_dataset = json.load(f)

test_data_point = random.sample(test_dataset, 1)[0]
print(f"Test data point:\n{json.dumps(test_data_point, indent=4)}")

Test data point:
{
    "qid": "mcqa-928887a6-e67e-11ee-ad3a-58961d663d9c",
    "question": "what film has the same screenwriter as [The Glass Slipper] and [Susan Hayward] was in it?",
    "question_type": "movie_to_writer_to_movie_constraint_actor",
    "topic_entity": [
        "The Glass Slipper",
        "Susan Hayward"
    ],
    "topic_entity_id": [
        "https://www.wikidata.org/entity/Q200482",
        "https://www.wikidata.org/entity/Q4501868",
        "https://www.wikidata.org/entity/Q248837"
    ],
    "answer": [
        "Valley of the Dolls",
        "I'll Cry Tomorrow"
    ],
    "answer_id": [
        "https://www.wikidata.org/entity/Q1170515",
        "https://www.wikidata.org/entity/Q1332776"
    ],
    "sparql": "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?label ?uri WHERE { ?rel1 rdfs:label \"written_by\" . ?e1 ?rel1 ?e2 ; rdfs:label \"The Glass Slipper\" . ?uri ?rel1 ?e2 ; ?rel4 ?e4 ; rdfs:label ?label . FILTER (?label != \"The Glass Slip

In [18]:
response = client.chat.completions.create(
    model=model_name,
    messages=[
        {
            "role": "system",
            "content": SPARQL_GENERATION_PROMPT,
        },
        {
            "role": "user",
            "content": test_data_point["question"],
        },
    ],
    temperature=0,
    top_p=0,
)
generated_sparql_query = response.choices[0].message.content
print(f"Generated SPARQL query:\n{generated_sparql_query}")

Generated SPARQL query:
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?label ?uri WHERE { ?rel1 rdfs:label "written_by" . ?e1 ?rel1 ?e2 ; rdfs:label "The Glass Slipper" . ?uri ?rel1 ?e2 ; ?rel4 ?e4 ; rdfs:label ?label . FILTER (?label != "The Glass Slipper") ?rel4 rdfs:label "starred_actors" . ?e4 rdfs:label "Susan Hayward" . }


In [20]:
generated_results = set(g.query(generated_sparql_query))
sample_results = set(g.query(test_data_point["sparql"]))

# Compare the results
if generated_results == sample_results:
    print("The queries produce identical results.")
else:
    print("The queries produce different results.")

The queries produce identical results.
