In [None]:
pip install rdflib_neo4j

# Import the ontology into Neo4j using rdflib

In [None]:
from rdflib_neo4j import Neo4jStoreConfig
from rdflib_neo4j import HANDLE_VOCAB_URI_STRATEGY, HANDLE_MULTIVAL_STRATEGY
from google.colab import userdata

auth_data = {'uri': userdata.get('NEO4J_URL'),
             'database': "neo4j",
             'user': userdata.get('NEO4J_USR'),
             'pwd': userdata.get('NEO4J_PWD') }
from rdflib import Namespace

# Define your prefixes
prefixes = {
    'mv': Namespace('neo4j://graph.schema#')
}

multival_props = [("rdfs", "label")]

# Define your custom mappings
config = Neo4jStoreConfig(auth_data=auth_data,
                          custom_prefixes=prefixes,
                          handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.SHORTEN,
                          batching=True,
                          handle_multival_strategy=HANDLE_MULTIVAL_STRATEGY.ARRAY,
                          multival_props_names=multival_props)

from rdflib_neo4j import Neo4jStore
from rdflib import Graph

graph_store = Graph(store=Neo4jStore(config=config))
# example basic ontos
# https://raw.githubusercontent.com/jbarrasa/goingmeta/refs/heads/main/session30/ontos/contract.ttl
graph_store.parse("https://raw.githubusercontent.com/jbarrasa/goingmeta/refs/heads/main/session15/onto/complete-movies-goingmeta.ttl",format="ttl")
graph_store.close(True)

# Check the Ontology's Requirement Fitness using Competency Questions

In [None]:
import os, json, textwrap, sys
from typing import List
from openai import OpenAI  # pip install openai

def format_cq_block(questions: List[str]) -> str:
    lines = [f"{i+1}. {q}" for i, q in enumerate(questions)]
    return "\n".join(lines)

def evaluate_ontology_against_cq(
    ontology_text: str,
    questions: List[str],
    model: str = None,
) -> dict:
    from openai import OpenAI
    import json, textwrap, os

    client = OpenAI()
    # try with other models?
    model = model or os.getenv("OPENAI_MODEL", "gpt-4o-mini-2024-07-18")

    schema = {
        "name": "CQEval",
        "strict": True,
        "schema": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "overall_score": {"type": "number", "minimum": 0, "maximum": 1},
                "per_cq": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "additionalProperties": False,
                        "properties": {
                            "question": {"type": "string"},
                            "score": {"type": "number", "minimum": 0, "maximum": 1},
                            "reasoning": {"type": "string"},
                            "suggestions": {
                                "type": "array",
                                "items": {"type": "string"}
                            },
                            "cypher_patterns": {
                                "type": "array",
                                "items": {"type": "string"}
                            }
                        },
                        "required": ["question", "score", "reasoning", "suggestions", "cypher_patterns"]
                    }
                },
                "global_suggestions": {
                    "type": "array",
                    "items": {"type": "string"}
                }
            },
            "required": ["overall_score", "per_cq", "global_suggestions"]
        }
    }

    system_instructions = textwrap.dedent("""\
        You are an ontology QA assistant.
        Task: Given an OWL/RDFS ontology (as text) and a list of competency questions (CQs),
        judge how well the ontology *could* support answering each CQ.
        -- Scoring (per CQ) --
        1.0 = ontology clearly models required classes/properties/relationships to answer the CQ via query
        0.5 = partially modeled; answerable only with extra conventions or minor refactoring
        0.0 = not supported / critical modeling gaps (missing classes/relations/properties)
        Return concise reasoning and 1â€“3 suggestions per CQ, plus optional Cypher patterns to test.
        Keep output terse and actionable.
    """)

    user_payload = textwrap.dedent(f"""\
        ONTOLOGY (OWL/RDFS text, possibly truncated):
        ---
        {ontology_text}
        ---

        COMPETENCY QUESTIONS:
        {format_cq_block(questions)}
    """)

    resp = client.responses.create(
        model=model,
        input=[
            {"role": "system", "content": system_instructions},
            {"role": "user", "content": user_payload},
        ],
        text={
            "format": {
                "type": "json_schema",
                **schema
            }
        },
    )


    out_text = None
    if hasattr(resp, "parsed") and resp.parsed is not None:
        return resp.parsed
    if hasattr(resp, "output_text") and resp.output_text:
        out_text = resp.output_text
    else:
        out_text = resp.output[0].content[0].text

    return json.loads(out_text)


ontology_text = """

@prefix ex: <http://example.org/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix owl:  <http://www.w3.org/2002/07/owl#> .
@prefix xsd:  <http://www.w3.org/2001/XMLSchema#> .

ex:Person a owl:Class ;
  rdfs:label "Person" .

ex:Mother a owl:Class ;
  rdfs:subClassOf ex:Person ;
  rdfs:label "Mother" .

ex:hasParent a owl:ObjectProperty ;
  rdfs:domain ex:Person ;
  rdfs:range  ex:Person ;
  rdfs:label "has parent" ;
  rdfs:comment "Relates a child to a parent" .

ex:hasChild a owl:ObjectProperty ;
  rdfs:domain ex:Person ;
  rdfs:range  ex:Person ;
  owl:inverseOf ex:hasParent ;
  rdfs:label "has child" .

"""


# Example CQs
questions = [
    "Can we retrieve all parents of a given person?",
    "Can we infer that every Mother is a Person?",
    "Can we list all people who have at least two children?",
    "Can we find all diseases and their causative pathogens?"
]

os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

result = evaluate_ontology_against_cq(ontology_text, questions)
print(json.dumps(result, indent=2))

