## install required packages
run the following command line in terminal

       pip install -r requirements_score.txt

In [None]:
import networkx as nx
import obonet
import inflect

# URL for Cell Ontology (CO) OBO file
CO_URL = 'http://purl.obolibrary.org/obo/CL.obo'
OBO_FILE_PATH = 'oboNet/cl.obo' ## set the file path to your obo file, can be downloaded for the link above
def load_ontology(url):
    graph = obonet.read_obo(url)
    return graph

inflector = inflect.engine()

def lookup_clid_by_name(graph: nx.graph, cell_name: str)->str:
   
    normalized_name = inflector.singular_noun(cell_name.lower()) or cell_name.lower()

    # Search for the term by normalized name
    for node, data in graph.nodes(data=True):
        if 'name' in data:
            # Singularize and normalize the graph's 'name'
            graph_name = inflector.singular_noun(data['name'].lower()) or data['name'].lower()
            # Compare normalized names
            if graph_name == normalized_name:
                return node  # Return the term ID if a match is found

    return None

# Load the Cell Ontology
graph = load_ontology(CO_URL)


In [None]:
ugraph = graph.to_undirected()

def calculate_difference(graph:nx.graph, clid_1:str, clid_2:str)->int:
    try:
        return nx.shortest_path_length(graph, source=clid_1, target=clid_2)
    except:
        return -1
def calculate_difference_name(graph:nx.graph, type_1:str, type_2:str)->int:
    clid_1 = lookup_clid_by_name(graph, type_1)
    clid_2 = lookup_clid_by_name(graph, type_2)
    return calculate_difference(graph, clid_1, clid_2)


### Usage 
use calculate_difference to search for distance between to clids\
use calculate_difference_name to search for distance between to cell types\
note that the broader cell type should be the clid_2 argument(i.e. the last one)\
the result will be -1 if there is no path found or node not found


In [4]:
print(calculate_difference(graph, 'CL:0002250', 'CL:0009016'))
print(calculate_difference(graph,  'CL:0009016','CL:0002250'))
# intestinal crypt stem cell 0002250
# intestinal crypt stem cell of large intestine 0009016
# should put the broader type (i.e. LLM annotated ) behind
print(calculate_difference_name(graph, 'T cells', 'Cytotoxic T cells'))
print(calculate_difference_name(graph, 'Cytotoxic T cells', 'T cells'))

-1
1
-1
3
