In [7]:
import re
from rdflib import Namespace, Literal, URIRef, Dataset
from rdflib.namespace import RDF, RDFS, SKOS, OWL, DC

obo_file = '/Users/simonsnopko/Desktop/Škola/bakalarska praca/uberon-simple.obo'

# Define necessary namespaces
UBERON = Namespace("http://purl.obolibrary.org/obo/")
BIOREGISTRY = Namespace("https://bioregistry.io/")

# Create a Dataset object and set the graph name
ds = Dataset()
graph_name = URIRef("https://anatomy_ontology.org")
new_g = ds.graph(identifier=graph_name)
new_g.bind("uberon", UBERON)
new_g.bind("bioregistry", BIOREGISTRY)
new_g.bind("skos", SKOS)
new_g.bind("curie", CURIE)  # Bind CURIE namespace

# Read the OBO file
with open(obo_file, 'r') as f:
    content = f.read()

# Split the content into terms
terms = content.split("[Term]")

# Process each term
for term in terms:
    term = term.strip()
    if not term:
        continue

    term_dict = {}
    for line in term.split('\n'):
        key_value = re.split(r':\s+', line, maxsplit=1)
        if len(key_value) == 2:
            key, value = key_value
            term_dict[key] = value

    # Extract id, name, def, and subset
    id_ = term_dict.get('id')
    name = term_dict.get('name')
    definition = term_dict.get('def')
    subset = term_dict.get('subset')

    if id_ and name and definition and subset:
        # Create URI for the term
        bioregistry_uri = URIRef(BIOREGISTRY + id_.replace('_', ':'))

        # Add triples to the named graph
        new_g.add((bioregistry_uri, SKOS.prefLabel, Literal(name)))
        new_g.add((bioregistry_uri, SKOS.definition, Literal(definition)))
        new_g.add((bioregistry_uri, SKOS.narrower, Literal(subset)))
        new_g.add((bioregistry_uri, SKOS.notation, Literal(id_)))  # Add triple with CURIE number

# Serialize the dataset in N-Quads format
ds.serialize(destination="anatomy created ontology.nq", format="nquads")


<Graph identifier=Nfa8e61bd0c554ef6965f037172b189ff (<class 'rdflib.graph.Dataset'>)>