In [8]:
from rdflib import Graph, Namespace
import csv
import re

# Load RDF graph
graph = Graph()
graph.parse("extracted_triples.ttl", format="turtle")

# Define namespaces
obo = Namespace("http://purl.obolibrary.org/obo/")

# Extract triples
triples = []
for s, p, o in graph:
    # Clean URIs for Neo4j labels
    def clean_uri(uri):
        return re.sub(r"[^a-zA-Z0-9]", "", str(uri.split("/")[-1]))
    
    triples.append((clean_uri(s), clean_uri(p), clean_uri(o)))

# Write nodes.csv (all unique entities)
nodes = set()
for s, _, o in triples:
    nodes.add(s)
    nodes.add(o)

with open("nodes.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["id:ID", "name"])
    for i, node in enumerate(nodes):
        writer.writerow([i, node])

# Write relationships.csv
with open("relationships.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow([":START_ID", "type", ":END_ID"])
    
    for s, p, o in triples:
        src = list(nodes).index(s)
        dst = list(nodes).index(o)
        writer.writerow([src, p, dst])