In [42]:
import json
import pandas as pd
import kuzu
import numpy as np
import hashlib

In [43]:
# Import graph
db_name = "onto_schub"

In [44]:
db = kuzu.Database(f'./{db_name}', buffer_pool_size=1024**3)
conn = kuzu.Connection(db)

In [67]:
def kuzu_execute(query, conn):
    res = conn.execute(query)
    df = res.get_as_df()
    return df.drop_duplicates()

In [46]:
def get_node_props(props = ["id", "name", "definition", "version", "source"], alias = "o"):
    modified = [f"{alias}.{prop} as {prop}" for prop in props]
    return ", ".join(modified)

## Basic access queries

### Get terms

In [47]:
onto_id = "CL:0002052"
osource = "Mondo Disease Ontology"
oversion = "v2022-03-01"

In [54]:
def to_string_params(props):
    params_str = []
    for k,v in props.items():
        params_str.append(f"{k}: '{v}'")
        
    return ", ".join(params_str)

### Get terms by id

In [68]:
# just by id
params = {"id": onto_id}
query = f"MATCH (s:OntoTerm {{ {to_string_params(params)} }}) RETURN {get_node_props(alias='s')}"
kuzu_execute(query, conn)

Unnamed: 0,id,name,definition,version,source
0,CL:0002052,Fraction D precursor B cell,"A pre-B cell that is pre-BCR-negative, and the...",v2022-02-21,UBERON
1,CL:0002052,Fraction D precursor B cell,"A pre-B cell that is pre-BCR-negative, and the...",v2022-03-01,Mondo Disease Ontology
2,CL:0002052,Fraction D precursor B cell,"A pre-B cell that is pre-BCR-negative, and the...",v3.39.1,Experimental Factor Ontology
3,CL:0002052,fraction D precursor B cell,"A pre-B cell that is pre-BCR-negative, and the...",v2023-04-20,Cell Ontology


### Get terms by id and source

In [69]:
# id and source
params = {"id": onto_id, "source": osource}
query = f"MATCH (s:OntoTerm {{ {to_string_params(params)} }}) RETURN {get_node_props(alias='s')}"
kuzu_execute(query, conn)

Unnamed: 0,id,name,definition,version,source
0,CL:0002052,Fraction D precursor B cell,"A pre-B cell that is pre-BCR-negative, and the...",v2022-03-01,Mondo Disease Ontology


### Get descendants

In [71]:
params = {"id": onto_id, "source": osource, "version":oversion}
query = f"MATCH (o:OntoTerm {{ {to_string_params(params)} }}) - [:child*1..20] -> (r:OntoTerm) RETURN {get_node_props(alias='r')}"
kuzu_execute(query, conn)

Unnamed: 0,id,name,definition,version,source
0,GO:0002009,morphogenesis of an epithelium,The process in which the anatomical structures...,v2022-03-01,Mondo Disease Ontology
108,GO:0051179,localization,"Any process in which a cell, a substance, or a...",v2022-03-01,Mondo Disease Ontology
124,CARO:0000000,anatomical entity,,v2022-03-01,Mondo Disease Ontology
592,CARO:0000003,connected anatomical structure,,v2022-03-01,Mondo Disease Ontology
1278,CARO:0000006,material anatomical entity,,v2022-03-01,Mondo Disease Ontology
...,...,...,...,...,...
218257,GO:0007498,mesoderm development,The process whose specific outcome is the prog...,v2022-03-01,Mondo Disease Ontology
218323,NCBITaxon:8287,Sarcopterygii,,v2022-03-01,Mondo Disease Ontology
218558,GO:0007399,nervous system development,The process whose specific outcome is the prog...,v2022-03-01,Mondo Disease Ontology
218678,GO:0007398,ectoderm development,The process whose specific outcome is the prog...,v2022-03-01,Mondo Disease Ontology


### Get parents

In [83]:
params = {"id": onto_id, "source": osource, "version":oversion}
query = f"MATCH (o:OntoTerm {{ {to_string_params(params)} }}) <- [:parent*1..20] - (r:OntoTerm) RETURN {get_node_props(alias='r')}"
kuzu_execute(query, conn)

Unnamed: 0,id,name,definition,version,source
0,GO:0002009,morphogenesis of an epithelium,The process in which the anatomical structures...,v2022-03-01,Mondo Disease Ontology
108,GO:0051179,localization,"Any process in which a cell, a substance, or a...",v2022-03-01,Mondo Disease Ontology
124,CARO:0000000,anatomical entity,,v2022-03-01,Mondo Disease Ontology
592,CARO:0000003,connected anatomical structure,,v2022-03-01,Mondo Disease Ontology
1278,CARO:0000006,material anatomical entity,,v2022-03-01,Mondo Disease Ontology
...,...,...,...,...,...
218492,GO:0010467,gene expression,The process in which a gene's sequence is conv...,v2022-03-01,Mondo Disease Ontology
218503,PATO:0000586,increased size,A size quality which is relatively high.,v2022-03-01,Mondo Disease Ontology
218558,GO:0007399,nervous system development,The process whose specific outcome is the prog...,v2022-03-01,Mondo Disease Ontology
218678,GO:0007398,ectoderm development,The process whose specific outcome is the prog...,v2022-03-01,Mondo Disease Ontology
