In [3]:
from backend.wmg.data.rollup import rollup_across_cell_type_descendants
import owlready2
import json
import tiledb
from backend.wmg.data.ontology_labels import ontology_term_label, ontology_term_id_labels
import pandas as pd


In [56]:
cell_counts = tiledb.open('prod-snapshot/cell_counts')
cell_counts_df = cell_counts.df[:]
cell_counts_df = cell_counts_df.groupby('cell_type_ontology_term_id').sum(numeric_only=True).reset_index()

all_cell_types = [{k: ontology_term_label(k)} for k in ontology_term_id_labels if k.startswith('CL:')]
all_cell_types_ids = [list(i.keys())[0] for i in all_cell_types]
to_attach = pd.DataFrame()
to_attach['cell_type_ontology_term_id']=[i for i in all_cell_types_ids if i not in cell_counts_df['cell_type_ontology_term_id'].values]
to_attach['n_cells']=0

cell_counts_df = pd.concat([cell_counts_df,to_attach],axis=0)
cell_counts_df_rollup = rollup_across_cell_type_descendants(cell_counts_df).set_index('cell_type_ontology_term_id')['n_cells']
cell_counts_df = cell_counts_df.set_index('cell_type_ontology_term_id')['n_cells']

cell_counts_df_rollup_norm = cell_counts_df_rollup/cell_counts_df_rollup.max()
cell_counts_df_norm = cell_counts_df/cell_counts_df.max()
id_to_name = pd.Series(index=cell_counts_df.index,data=[ontology_term_label(i) for i in cell_counts_df.index])

In [68]:
"CL:00000asdsad01" in cell_counts_df_rollup

False

In [81]:
def traverse(node):
    subclasses = list(node.subclasses())
    node_id = node.name.replace('_',':')
    if len(subclasses) == 0:
        return {"id": node.name,
                "name": id_to_name[node_id] if node_id in id_to_name else node_id,
                "n_cells_rollup": int(cell_counts_df_rollup[node_id] if node_id in cell_counts_df_rollup else 0),
                "n_cells_rollup_normalized": float(cell_counts_df_rollup_norm[node_id] if node_id in cell_counts_df_rollup_norm else 0),                
                "n_cells": int(cell_counts_df[node_id] if node_id in cell_counts_df else 0),
                "n_cells_normalized": float(cell_counts_df_norm[node_id] if node_id in cell_counts_df_norm else 0),                                
               }

    children = []
    for child in subclasses:
        children.append(traverse(child))

    return {"id": node.name,
                "name": id_to_name[node_id] if node_id in id_to_name else node_id,
                "n_cells_rollup": int(cell_counts_df_rollup[node_id] if node_id in cell_counts_df_rollup else 0),
                "n_cells_rollup_normalized": float(cell_counts_df_rollup_norm[node_id] if node_id in cell_counts_df_rollup_norm else 0),
                "n_cells": int(cell_counts_df[node_id] if node_id in cell_counts_df else 0),
                "n_cells_normalized": float(cell_counts_df_norm[node_id] if node_id in cell_counts_df_norm else 0),
                "children": children
               }

In [82]:

ontology = owlready2.get_ontology("https://github.com/obophenotype/cell-ontology/releases/latest/download/cl-basic.owl")
ontology.load()

root_node = ontology.world["http://purl.obolibrary.org/obo/CL_0000000"]

a = traverse(root_node)

In [83]:
json.dump(a,open('ontologyRawTree.json','w'))