Let's start by importing the gene ontology from the `.obo` file and making sure it's a valid DAG.

In [None]:
from obonet import read_obo

from networkx import is_directed_acyclic_graph

obo_path ="./dataset/go-basic.obo"

graph = read_obo(obo_path)

if not is_directed_acyclic_graph(graph):
    raise ValueError("Invalid gene ontology network.")

Next, we'll export the graph to a Neo4j database.

In [None]:
from neo4j import GraphDatabase

from tqdm import tqdm


uri = "neo4j://192.168.0.13:7687"
username = "neo4j"
password = "your_password"
database = "neo4j"
batch_size = 500

driver = GraphDatabase.driver(uri, auth=(username, password))

def execute_query(tx, query, params=None):
    if params:
        return tx.run(query, params)
    else:
        return tx.run(query)

with driver.session(database=database) as session:
    session.execute_write(execute_query, "MATCH (n) DETACH DELETE n")
    
    session.execute_write(
        execute_query, 
        "CREATE CONSTRAINT go_term_id IF NOT EXISTS FOR (t:GOTerm) REQUIRE t.id IS UNIQUE"
    )

nodes_list = list(graph.nodes(data=True))
total_nodes = len(nodes_list)

for i in tqdm(range(0, total_nodes, batch_size)):
    batch = nodes_list[i:i+batch_size]
    node_batch = []
    
    for node_id, attributes in batch:
        props = {"id": node_id}
        for key, value in attributes.items():
            if isinstance(value, (list, tuple)):
                value = ';'.join(map(str, value))
            props[key] = value
        
        node_batch.append(props)
    
    with driver.session() as session:
        session.execute_write(
            execute_query,
            """
            UNWIND $batch AS node
            MERGE (t:GOTerm {id: node.id})
            SET t += node
            """,
            {"batch": node_batch}
        )

edges_list = list(graph.edges(data=True))
total_edges = len(edges_list)

for i in tqdm(range(0, total_edges, batch_size)):
    batch = edges_list[i:i+batch_size]
    edge_batch = []
    
    for source, target, attributes in batch:
        rel_type = attributes.get('relationship', 'IS_A')
        if isinstance(rel_type, (list, tuple)):
            rel_type = rel_type[0]
        
        rel_type = rel_type.upper().replace(' ', '_')
        
        edge_data = {
            "source": source,
            "target": target,
            "rel_type": rel_type
        }
        edge_batch.append(edge_data)
    
    with driver.session() as session:
        session.execute_write(
            execute_query,
            """
            UNWIND $batch AS edge
            MATCH (a:GOTerm {id: edge.source})
            MATCH (b:GOTerm {id: edge.target}) 
            CALL apoc.create.relationship(a, edge.rel_type, {}, b) YIELD rel
            RETURN count(*)
            """,
            {"batch": edge_batch}
        )

print("Export complete!")

driver.close()