In [6]:
from backend.wmg.data.rollup import rollup_across_cell_type_descendants
import owlready2
import json
import tiledb
from backend.wmg.data.ontology_labels import ontology_term_label, ontology_term_id_labels
import pandas as pd
import numpy as np

def traverse(node):
    subclasses = list(node.subclasses())
    node_id = node.name.replace('_',':')
    if len(subclasses) == 0:
        return {"id": node.name,
                "name": id_to_name[node_id] if node_id in id_to_name else node_id,
                "n_cells_rollup": int(cell_counts_df_rollup[node_id] if node_id in cell_counts_df_rollup else 0),
                "n_cells_rollup_normalized": float(cell_counts_df_rollup_norm[node_id] if node_id in cell_counts_df_rollup_norm else 0),                
                "n_cells": int(cell_counts_df[node_id] if node_id in cell_counts_df else 0),
                "n_cells_normalized": float(cell_counts_df_norm[node_id] if node_id in cell_counts_df_norm else 0),                                
               }

    children = []
    for child in subclasses:
        children.append(traverse(child))

    return {"id": node.name,
                "name": id_to_name[node_id] if node_id in id_to_name else node_id,
                "n_cells_rollup": int(cell_counts_df_rollup[node_id] if node_id in cell_counts_df_rollup else 0),
                "n_cells_rollup_normalized": float(cell_counts_df_rollup_norm[node_id] if node_id in cell_counts_df_rollup_norm else 0),
                "n_cells": int(cell_counts_df[node_id] if node_id in cell_counts_df else 0),
                "n_cells_normalized": float(cell_counts_df_norm[node_id] if node_id in cell_counts_df_norm else 0),
                "children": children
               }
def _descendants(cell_type):
    cell_type_iri = cell_type.replace(":", "_")
    entity = ontology.search_one(iri=f"http://purl.obolibrary.org/obo/{cell_type_iri}")
    descendants = [i.name.replace("_", ":") for i in entity.descendants()] if entity else [cell_type]
    return descendants

def _ancestors(cell_type):
    cell_type_iri = cell_type.replace(":", "_")
    entity = ontology.search_one(iri=f"http://purl.obolibrary.org/obo/{cell_type_iri}")
    ancestors = [i.name.replace("_", ":") for i in entity.ancestors() if i.name!= "Thing"] if entity else [cell_type]
    return ancestors

def _children(cell_type):
    cell_type_iri = cell_type.replace(":", "_")
    entity = ontology.search_one(iri=f"http://purl.obolibrary.org/obo/{cell_type_iri}")
    children = [i.name.replace("_", ":") for i in entity.subclasses()] if entity else [cell_type]
    return children

def _parents(cell_type):
    cell_type_iri = cell_type.replace(":", "_")
    entity = ontology.search_one(iri=f"http://purl.obolibrary.org/obo/{cell_type_iri}")    
    parent_names = [parent.name.replace("_",":") for parent in entity.is_a if isinstance(parent, owlready2.ThingClass) and parent.name!= "Thing"]
    return parent_names

def dfs(parents, end, start, node=None, path = None, all_paths = []):
    if path is None and node is None:
        path = [end]
        node = end

    if node == start:
        return path
    
    for parent in parents.get(node,[]):
        full_path = dfs(parents, end, start, node=parent, path = path+[parent], all_paths=all_paths)
        if full_path:
            all_paths.append(full_path)
            
            
def truncate_graph(graph,visited_nodes_in_paths):
    if graph['id'].replace('_',':') not in visited_nodes_in_paths:
        graph['children'] = []
        del graph['children']
    else:
        visited_nodes_in_paths.remove(graph['id'].replace('_',':'))
    children= graph.get("children",[])
    for child in children:
        truncate_graph(child, visited_nodes_in_paths)
    

# Build ontology tree JSON

In [7]:
cell_counts = tiledb.open('prod-snapshot/cell_counts')
cell_counts_df = cell_counts.df[:]
cell_counts_df = cell_counts_df.groupby('cell_type_ontology_term_id').sum(numeric_only=True).reset_index()

all_cell_types = [{k: ontology_term_label(k)} for k in ontology_term_id_labels if k.startswith('CL:')]
all_cell_types_ids = [list(i.keys())[0] for i in all_cell_types]
to_attach = pd.DataFrame()
to_attach['cell_type_ontology_term_id']=[i for i in all_cell_types_ids if i not in cell_counts_df['cell_type_ontology_term_id'].values]
to_attach['n_cells']=0

cell_counts_df = pd.concat([cell_counts_df,to_attach],axis=0)
cell_counts_df_rollup = rollup_across_cell_type_descendants(cell_counts_df).set_index('cell_type_ontology_term_id')['n_cells']
cell_counts_df = cell_counts_df.set_index('cell_type_ontology_term_id')['n_cells']

cell_counts_df_rollup_norm = cell_counts_df_rollup/cell_counts_df_rollup.max()
cell_counts_df_norm = cell_counts_df/cell_counts_df.max()
id_to_name = pd.Series(index=cell_counts_df.index,data=[ontology_term_label(i) for i in cell_counts_df.index])


ontology = owlready2.get_ontology("https://github.com/obophenotype/cell-ontology/releases/latest/download/cl-basic.owl")
ontology.load()

root_node = ontology.world["http://purl.obolibrary.org/obo/CL_0000000"]

a = traverse(root_node)

json.dump(a,open('ontologyRawTree.json','w'))

In [8]:
all_children = {i: _children(i) for i in all_cell_types_ids}
all_parents = {i: _parents(i) for i in all_cell_types_ids}
all_descendants = {i: _descendants(i) for i in all_cell_types_ids}
all_ancestors = {i: _ancestors(i) for i in all_cell_types_ids}

## What are the visualization rules implied by the code below?

1. All ontology graph depictions per cell type are static. Users cannot alter the structure of the displayed ontology by collapsing or expanding nodes.
1. The root node of a cell type's ontology view is the most recent common ancestor of all instances of that node in the unrolled tree.
1. Note that if a node has only one parent, that parent will be its MRCA regardless of the MRCA's ancestry. We are not treating each instance of a cell type term in the unrolled tree as distinct. If we were, then a node's MRCA would need to consider all possible paths to get to that node from the root of the tree.
1. The direct children of all nodes in the **first** path from the MRCA root to the cell type term, including the cell type term, are displayed.

In [None]:
all_graphs_per_cell_type = {}
for end_node in all_cell_types_ids:
    print(end_node)
    start_node = 'CL:0000000'
    all_paths = []
    dfs(all_parents,end_node,start_node,all_paths=all_paths)
    all_paths = [i[::-1] for i in all_paths]
    
    scores=[]
    for ancestor in all_ancestors[end_node]:
        if ancestor == end_node:
            scores.append(-1)
            continue
        nums = []
        for path in all_paths:
            nums.append(path.index(ancestor) if ancestor in path else -1)
        if len(nums) == 0:
            mrca = end_node
        else:
            if -1 in nums:
                scores.append(-1)
            else:
                scores.append(sum(nums)/len(nums))
            mrca = all_ancestors[end_node][np.argmax(scores)]  
    
    mrca_obo=mrca.replace(":","_")
    root_node = ontology.world[f"http://purl.obolibrary.org/obo/{mrca_obo}"]
    a_mrca = traverse(root_node)    
    
    start_node = mrca
    all_paths = []
    dfs(all_parents,end_node,start_node,all_paths=all_paths)
    all_paths = [i[::-1] for i in all_paths]

    visited_nodes_in_paths = set(sum(all_paths,[]))    
    truncate_graph(a_mrca,visited_nodes_in_paths)    
    all_graphs_per_cell_type[end_node] = a_mrca    

In [13]:
json.dump(all_graphs_per_cell_type,open('allOntologyViewsPerCellType.json','w'))