In [95]:
import csv, urllib.parse, types
from owlready2 import get_ontology, World
from owlready2.prop import ObjectPropertyClass 
import re

In [96]:
PURL_IMPORTS = [
    'https://raw.githubusercontent.com/UTHealth-Ontology/HPV-PO/refs/heads/main/imports/iao-extract.owl',
    'http://purl.obolibrary.org/obo/bfo/2019-08-26/bfo.owl',
    'https://raw.githubusercontent.com/UTHealth-Ontology/HPV-PO/refs/heads/NCITmap/imports/OPMI-extract.owl',
    'https://raw.githubusercontent.com/UTHealth-Ontology/HPV-PO/refs/heads/NCITmap/imports/omrse-extract.owl',
    'https://raw.githubusercontent.com/UTHealth-Ontology/HPV-PO/refs/heads/NCITmap/imports/NCIT_OBO-extract.owl',
    'https://raw.githubusercontent.com/UTHealth-Ontology/HPV-PO/refs/heads/NCITmap/imports/gsso-extract.owl'
]

In [97]:
# CSV_FILE     = "new_hpvpo_terms.csv"
CSV_FILE     = "new_hpvpo_terms_names.csv"
CSV_FILE_1     = "new_object_properties.csv"
TARGET_IRI   = "http://purl.org/uth/ontology/hpv-po.owl#"
OUTPUT_FILE  = "hpv-po.owl"
ID_PREFIX    = "HPVPO_"
DIGITS       = 7

In [98]:
world = World()
hpvpo = world.get_ontology(TARGET_IRI)
for purl in PURL_IMPORTS:
    hpvpo.imported_ontologies.append(world.get_ontology(purl).load())

In [99]:
id_regex = re.compile(rf"{ID_PREFIX}(\d{{{DIGITS}}})$")

def next_hpvp_id():
    nums = []
    entities = (
        list(hpvpo.classes()) +
        list(hpvpo.object_properties()) +
        list(hpvpo.data_properties()) +
        list(hpvpo.annotation_properties()) +
        list(hpvpo.individuals())
    )
    for ent in entities:
        m = id_regex.search(ent.name)
        if m:
            nums.append(int(m.group(1)))
    nxt = max(nums) + 1 if nums else 1
    return f"{ID_PREFIX}{nxt:0{DIGITS}}"

def find_class_by_label(label_text):
    return world.search_one(label = label_text)

# def local_name(iri):
#     frag = urllib.parse.urlparse(iri).fragment
#     return frag if frag else iri.rsplit("/", 1)[-1]

In [100]:
# New: label based class creation
with hpvpo:
    with open(CSV_FILE, newline="", encoding="utf-8-sig") as fh:
        reader = csv.DictReader(fh)
        for row in reader:
            parent_label = row["parent_label"].strip()
            new_label    = row["entity_label"].strip()

            parent_cls = find_class_by_label(parent_label)
            if parent_cls is None:
                print(f"⚠ Parent with label '{parent_label}' not found — skipping '{new_label}'")
                continue

            new_name = next_hpvp_id()
            new_cls  = types.new_class(new_name, (parent_cls,))
            new_cls.label = [new_label]

            print(f"✔  {new_label}  ({new_cls.name})  subclassOf {parent_cls.label[0]}")

✔  persona  (HPVPO_0000001)  subclassOf information content entity
✔  knowledge  (HPVPO_0000002)  subclassOf information content entity
✔  belief  (HPVPO_0000003)  subclassOf information content entity
✔  healthcare related behavior  (HPVPO_0000004)  subclassOf behavior
✔  communication channel  (HPVPO_0000005)  subclassOf realizable entity
✔  health information technology utilization behavior  (HPVPO_0000006)  subclassOf healthcare related behavior
✔  healthcare funding behavior  (HPVPO_0000007)  subclassOf healthcare related behavior
✔  healthcare information seeking behavior  (HPVPO_0000008)  subclassOf healthcare related behavior
✔  healthcare policy compliance behavior  (HPVPO_0000009)  subclassOf healthcare related behavior
✔  healthcare policy implementation behavior  (HPVPO_0000010)  subclassOf healthcare related behavior
✔  healthcare provider discussion behavior  (HPVPO_0000011)  subclassOf healthcare related behavior
✔  healthcare recommendation behavior  (HPVPO_0000012)  su

In [None]:
# OLD: IRI based class creation
with hpvpo:
    with open(CSV_FILE, newline="", encoding="utf-8-sig") as fh:
        reader = csv.DictReader(fh)
        for row in reader:
            iri         = row["CIRI"].strip()
            parent_iri  = row["PIRI"].strip()
            label       = row["concept_name"].strip()

            parent_cls = world[parent_iri]
            # print("parent_cls:", parent_cls)
            if parent_cls is None:
                print(f"⚠  Parent not found, skipping → {iri}: PARENT {parent_iri}")
                continue


            local_name = urllib.parse.urlparse(iri).fragment or iri.rsplit("/", 1)[-1]


            if world[iri]:
                print(f"•  {iri} already present, skipping")
                continue

            new_cls = types.new_class(local_name, (parent_cls,))
            if label:
                new_cls.label = [label]   

                 
            print(f"✔  Added {label}  (subclass of {parent_cls.label})")

✔  Added persona  (subclass of ['information content entity', 'information content entity', 'information content entity', 'information content entity'])
✔  Added knowledge  (subclass of ['information content entity', 'information content entity', 'information content entity', 'information content entity'])
✔  Added belief  (subclass of ['information content entity', 'information content entity', 'information content entity', 'information content entity'])
✔  Added healthcare related behavior  (subclass of ['behavior'])
✔  Added communication channel  (subclass of ['realizable entity', 'realizable entity', 'realizable entity'])
✔  Added health information technology utilization behavior  (subclass of ['healthcare related behavior'])
✔  Added healthcare funding behavior  (subclass of ['healthcare related behavior'])
✔  Added healthcare information seeking behavior  (subclass of ['healthcare related behavior'])
✔  Added healthcare policy compliance behavior  (subclass of ['healthcare rela

In [None]:
# OLD: IRI based property creation
with hpvpo:
    with CSV_FILE.open(newline="", encoding="utf-8") as fh:
        reader = csv.DictReader(fh)
        for row in reader:
            iri        = row["iri"].strip()
            parent_iri = row["parent_iri"].strip()
            domain_iri = row["domain_iri"].strip()
            range_iri  = row["range_iri"].strip()
            label      = row.get("label", "").strip()

            # look up parent property
            parent_prop = world[parent_iri]
            if parent_prop is None or not isinstance(parent_prop, ObjectPropertyClass):
                print(f"⚠ parent property not found or not object property → {parent_iri}")
                continue

            # look up domain / range classes
            domain_cls = world[domain_iri]
            range_cls  = world[range_iri]
            if domain_cls is None or range_cls is None:
                print(f"⚠ domain/range class not found for {iri}")
                continue

            # skip if already exists
            if world[iri]:
                print(f"• {iri} already present, skipping")
                continue

            # create the new sub-property
            prop_name = local_name(iri)
            new_prop = types.new_class(prop_name,(parent_prop,),
                kwds={"iri": iri}
            )

            # attach domain & range axioms (lists accepted by Owlready2)
            new_prop.domain = [domain_cls]
            new_prop.range  = [range_cls]

            if label:
                new_prop.label = [label]

            print(f"✔ added property {label or prop_name}  subPropertyOf {parent_prop.name}")


In [101]:
# New: label based property creation
with hpvpo:
    with open(CSV_FILE_1, newline="", encoding="utf-8-sig") as fh:
        reader = csv.DictReader(fh)
        for row in reader:
            property_label = row["property_label"].strip()
            superproperty_label = row["superproperty_label"].strip()
            domain = row["domain"].strip()
            range = row["range"].strip()

            # print(f"Processing property: {property_label}  subPropertyOf {superproperty_label}  domain {domain}  range {range}")

            # look up parent property
            parent_prop = find_class_by_label(superproperty_label)
            if parent_prop is None or not isinstance(parent_prop, ObjectPropertyClass):
                print(f"⚠ parent property not found or not object property → {superproperty_label}")
                continue

            # look up domain / range classes
            domain_cls = find_class_by_label(domain)
            range_cls  = find_class_by_label(range)
            if domain_cls is None or range_cls is None:
                print(f"⚠ domain/range class not found for {property_label}")
                continue

            # create the new sub-property
            prop_name = next_hpvp_id()
            # print(f"Creating property: {prop_name}  subPropertyOf {parent_prop.name}")
            new_prop = types.new_class(prop_name,(parent_prop,))

            # attach domain & range axioms (lists accepted by Owlready2)
            new_prop.domain = [domain_cls]
            new_prop.range  = [range_cls]
            new_prop.label = [property_label]

            print(f"✔ added property {property_label}  subPropertyOf {parent_prop.name}")

✔ added property has status  subPropertyOf IAO_0000136
✔ added property has socioeconomic status  subPropertyOf HPVPO_0000063
✔ added property has marital status  subPropertyOf HPVPO_0000063
✔ added property has administrative region  subPropertyOf IAO_0000136
✔ added property has socioeconomic attribute  subPropertyOf IAO_0000136
✔ added property has ethnic identity  subPropertyOf IAO_0000136
✔ added property has racial identity  subPropertyOf IAO_0000136
✔ added property has gender identity  subPropertyOf IAO_0000136
✔ added property has personal information  subPropertyOf IAO_0000136
✔ added property has personal attribute  subPropertyOf IAO_0000136
✔ added property has medical history  subPropertyOf IAO_0000136
✔ added property has living arrangement  subPropertyOf IAO_0000136
✔ added property has belief  subPropertyOf IAO_0000136
✔ added property has knowledge  subPropertyOf IAO_0000136
✔ added property has disposition  subPropertyOf IAO_0000136
✔ added property has motivation  su

In [102]:
hpvpo.save(file=OUTPUT_FILE, format="rdfxml")
print(f"\nOntology saved to {OUTPUT_FILE}")


Ontology saved to hpv-po.owl
