## install required packages
run the following command line in terminal

       pip install -r requirements_score.txt

In [8]:
import networkx as nx
import obonet
import inflect
import re
import requests

# URL for Cell Ontology (CO) OBO file
CO_URL = 'http://purl.obolibrary.org/obo/CL.obo'
OBO_FILE_PATH = 'oboNet/cl.obo' ## set the file path to your obo file, can be downloaded for the link above
def load_ontology(url):
    graph = obonet.read_obo(url)
    return graph

inflector = inflect.engine()

def search_ontology(cell_name: str, ontology='cl')->str:
    normalized_name = inflector.singular_noun(cell_name.lower()) or cell_name.lower()
    normalized_name = re.sub(r"^\(?\d+\)?\.", "", normalized_name).strip()
    
    for id, data in graph.nodes(data=True):
        if 'name' in data:
            # Singularize and normalize the graph's 'name'
            label = inflector.singular_noun(data['name'].lower()) or data['name'].lower()
            # Compare normalized names
            if label == normalized_name:
                return id, label # Return the clid and label(name) if a match is found
    
    # If no clid name found in graph, send request to ontology api    
    # OLS API URL
    url = f"https://www.ebi.ac.uk/ols/api/search?q={normalized_name}&ontology={ontology}"

    # Make the API request
    response = requests.get(url)

    if response.status_code == 200:
        if len(response.json()['response']['docs'])==0:
          return None, None
        id = response.json()['response']['docs'][0]['obo_id']
        label = response.json()['response']['docs'][0]['label']
        return id, label
    else:
        print(f"Error: {response.status_code}")
        return None, None

# Load the Cell Ontology
graph = load_ontology(CO_URL)


In [9]:
def calculate_difference(graph:nx.graph, clid_1:str, clid_2:str)->int:
    try:
        return nx.shortest_path_length(graph, source=clid_1, target=clid_2)
    except:
        return -1
def calculate_difference_name(graph:nx.graph, type_1:str, type_2:str)->int:
    clid_1, label_1 = search_ontology(type_1)
    clid_2, label_2 = search_ontology(type_2)
    return calculate_difference(graph, clid_1, clid_2)


### Usage 
use calculate_difference to search for distance between to clids\
use calculate_difference_name to search for distance between to cell types\
note that the broader cell type should be the clid_2 argument(i.e. the last one)\
the result will be -1 if there is no path found or node not found


In [None]:
print(calculate_difference(graph, 'CL:0002250', 'CL:0009016'))
print(calculate_difference(graph,  'CL:0009016','CL:0002250'))
# intestinal crypt stem cell 0002250
# intestinal crypt stem cell of large intestine 0009016
# should put the broader type (i.e. LLM annotated ) behind
print(calculate_difference_name(graph, 'T cells', 'Cytotoxic T cells'))
print(calculate_difference_name(graph, 'Cytotoxic T cells', 'T cells'))
# T-helper cells is not in graph, will send request to cell ontology
print(calculate_difference_name(graph, 'T-helper cells', 'T cells'))


-1
1
-1
3
3
