In [2]:
cd ../

/Users/atarashansky/Desktop/czi/single-cell-data-portal


In [3]:
from backend.wmg.data.rollup import rollup_across_cell_type_descendants
import owlready2
import json
import tiledb
from backend.wmg.data.ontology_labels import ontology_term_label, ontology_term_id_labels
import pandas as pd
import numpy as np

def traverse(node):
    subclasses = list(node.subclasses())
    node_id = node.name.replace('_',':')
    if len(subclasses) == 0:
        return {"id": node.name,
                "name": id_to_name[node_id] if node_id in id_to_name else node_id,
                "n_cells_rollup": int(cell_counts_df_rollup[node_id] if node_id in cell_counts_df_rollup else 0),
                "n_cells_rollup_normalized": float(cell_counts_df_rollup_norm[node_id] if node_id in cell_counts_df_rollup_norm else 0),                
                "n_cells": int(cell_counts_df[node_id] if node_id in cell_counts_df else 0),
                "n_cells_normalized": float(cell_counts_df_norm[node_id] if node_id in cell_counts_df_norm else 0),                                
               }

    children = []
    for child in subclasses:
        children.append(traverse(child))

    return {"id": node.name,
                "name": id_to_name[node_id] if node_id in id_to_name else node_id,
                "n_cells_rollup": int(cell_counts_df_rollup[node_id] if node_id in cell_counts_df_rollup else 0),
                "n_cells_rollup_normalized": float(cell_counts_df_rollup_norm[node_id] if node_id in cell_counts_df_rollup_norm else 0),
                "n_cells": int(cell_counts_df[node_id] if node_id in cell_counts_df else 0),
                "n_cells_normalized": float(cell_counts_df_norm[node_id] if node_id in cell_counts_df_norm else 0),
                "children": children
               }
def _descendants(cell_type):
    cell_type_iri = cell_type.replace(":", "_")
    entity = ontology.search_one(iri=f"http://purl.obolibrary.org/obo/{cell_type_iri}")
    descendants = [i.name.replace("_", ":") for i in entity.descendants()] if entity else [cell_type]
    return descendants

def _ancestors(cell_type):
    cell_type_iri = cell_type.replace(":", "_")
    entity = ontology.search_one(iri=f"http://purl.obolibrary.org/obo/{cell_type_iri}")
    ancestors = [i.name.replace("_", ":") for i in entity.ancestors() if i.name!= "Thing"] if entity else [cell_type]
    return ancestors

def _children(cell_type):
    cell_type_iri = cell_type.replace(":", "_")
    entity = ontology.search_one(iri=f"http://purl.obolibrary.org/obo/{cell_type_iri}")
    children = [i.name.replace("_", ":") for i in entity.subclasses()] if entity else [cell_type]
    return children

def _parents(cell_type):
    cell_type_iri = cell_type.replace(":", "_")
    entity = ontology.search_one(iri=f"http://purl.obolibrary.org/obo/{cell_type_iri}")    
    parent_names = [parent.name.replace("_",":") for parent in entity.is_a if isinstance(parent, owlready2.ThingClass) and parent.name!= "Thing"]
    return parent_names

def dfs(parents, end, start, node=None, path = None, all_paths = []):
    if path is None and node is None:
        path = [end]
        node = end

    if node == start:
        return path
    
    for parent in parents.get(node,[]):
        full_path = dfs(parents, end, start, node=parent, path = path+[parent], all_paths=all_paths)
        if full_path:
            all_paths.append(full_path)
            
            
def truncate_graph(graph,visited_nodes_in_paths):
    if 'children' in graph:
        graph['hasChildren'] = True    
    if graph['id'].replace('_',':') not in visited_nodes_in_paths:
        graph['children'] = []
        del graph['children']
    else:
        visited_nodes_in_paths.remove(graph['id'].replace('_',':'))
    children= graph.get("children",[])
    for child in children:
        truncate_graph(child, visited_nodes_in_paths)
    



# Build ontology tree JSON

In [4]:
cell_counts = tiledb.open('prod-snapshot/cell_counts')
cell_counts_df = cell_counts.df[:]
cell_counts_df = cell_counts_df.groupby('cell_type_ontology_term_id').sum(numeric_only=True).reset_index()

all_cell_types = [{k: ontology_term_label(k)} for k in ontology_term_id_labels if k.startswith('CL:')]
all_cell_types_ids = [list(i.keys())[0] for i in all_cell_types]
to_attach = pd.DataFrame()
to_attach['cell_type_ontology_term_id']=[i for i in all_cell_types_ids if i not in cell_counts_df['cell_type_ontology_term_id'].values]
to_attach['n_cells']=0

cell_counts_df = pd.concat([cell_counts_df,to_attach],axis=0)
cell_counts_df_rollup = rollup_across_cell_type_descendants(cell_counts_df).set_index('cell_type_ontology_term_id')['n_cells']
cell_counts_df = cell_counts_df.set_index('cell_type_ontology_term_id')['n_cells']

cell_counts_df_rollup_norm = cell_counts_df_rollup/cell_counts_df_rollup.max()
cell_counts_df_norm = cell_counts_df/cell_counts_df.max()
id_to_name = pd.Series(index=cell_counts_df.index,data=[ontology_term_label(i) for i in cell_counts_df.index])


ontology = owlready2.get_ontology("https://github.com/obophenotype/cell-ontology/releases/latest/download/cl-basic.owl")
ontology.load()

root_node = ontology.world["http://purl.obolibrary.org/obo/CL_0000000"]

a = traverse(root_node)

json.dump(a,open('ontologyRawTree.json','w'))

In [5]:
all_children = {i: _children(i) for i in all_cell_types_ids}
all_parents = {i: _parents(i) for i in all_cell_types_ids}
all_descendants = {i: _descendants(i) for i in all_cell_types_ids}
all_ancestors = {i: _ancestors(i) for i in all_cell_types_ids}

## What are the visualization rules implied by the code below?

1. All ontology graph depictions per cell type are static. Users cannot alter the structure of the displayed ontology by collapsing or expanding nodes.
1. The root node of a cell type's ontology view is the most recent common ancestor of all instances of that node in the unrolled tree.
1. Note that if a node has only one parent, that parent will be its MRCA regardless of the MRCA's ancestry. We are not treating each instance of a cell type term in the unrolled tree as distinct. If we were, then a node's MRCA would need to consider all possible paths to get to that node from the root of the tree.
1. The direct children of all nodes in the **first** path from the MRCA root to the cell type term, including the cell type term, are displayed.

In [7]:
all_graphs_per_cell_type = {}
for end_node in all_cell_types_ids:
    print(end_node)
    start_node = 'CL:0000000'
    all_paths = []
    dfs(all_parents,end_node,start_node,all_paths=all_paths)
    all_paths = [i[::-1] for i in all_paths]
    
    scores=[]
    mrca=end_node
    for ancestor in all_ancestors[end_node]:
        if ancestor == end_node:
            scores.append(-1)
            continue
        nums = []
        for path in all_paths:
            nums.append(path.index(ancestor) if ancestor in path else -1)
        if len(nums) == 0:
            mrca = end_node
        else:
            if -1 in nums:
                scores.append(-1)
            else:
                scores.append(sum(nums)/len(nums))
            mrca = all_ancestors[end_node][np.argmax(scores)]  
    
    mrca_obo=mrca.replace(":","_")
    root_node = ontology.world[f"http://purl.obolibrary.org/obo/{mrca_obo}"]
    a_mrca = traverse(root_node)    
    
    start_node = mrca
    all_paths = []
    dfs(all_parents,end_node,start_node,all_paths=all_paths)
    all_paths = [i[::-1] for i in all_paths]

    visited_nodes_in_paths = set(sum(all_paths,[]))    
    truncate_graph(a_mrca,visited_nodes_in_paths)    
    all_graphs_per_cell_type[end_node] = a_mrca    

CL:0000000
CL:0000540
CL:0000101
CL:0000001
CL:0000010
CL:0000002
CL:0000003
CL:0000004
CL:0000005
CL:0000057
CL:0000333
CL:0000008
CL:0000006
CL:0000197
CL:0000007
CL:0002321
CL:0000009
CL:0000578
CL:0000011
CL:0000012
CL:0000013
CL:0000014
CL:0000039
CL:0000034
CL:0000015
CL:0000586
CL:0000016
CL:0000017
CL:0000020
CL:0000018
CL:0000413
CL:0000657
CL:0000019
CL:0000408
CL:0000021
CL:0000022
CL:0000023
CL:0000548
CL:0000024
CL:0000670
CL:0000025
CL:0000675
CL:0000026
CL:0000412
CL:4029002
CL:0000722
CL:0000027
CL:0000192
CL:0000028
CL:0000338
CL:0000029
CL:0002676
CL:0000030
CL:0000055
CL:0000133
CL:0000031
CL:0000032
CL:0000114
CL:0000033
CL:0000151
CL:0011115
CL:0000035
CL:0000723
CL:0000036
CL:0000037
CL:0000988
CL:0008001
CL:0011026
CL:0000566
CL:0000038
CL:0000764
CL:0000839
CL:0000050
CL:0002371
CL:0000040
CL:0002194
CL:0017503
CL:0000763
CL:0002009
CL:0000041
CL:0000771
CL:0000145
CL:0000774
CL:0000042
CL:0000835
CL:0000834
CL:0000043
CL:0000767
CL:0002274
CL:0000770
CL:0000044

CL:0000601
CL:0000602
CL:1000082
CL:0000603
CL:0000604
CL:0000606
CL:0000607
CL:0000608
CL:0000609
CL:0002374
CL:0000610
CL:0000611
CL:0002191
CL:0000612
CL:0000772
CL:0000613
CL:0002028
CL:0000614
CL:0000768
CL:0000615
CL:0000616
CL:0000617
CL:0000620
CL:0000621
CL:0008018
CL:0000622
CL:0000623
CL:0001067
CL:0000825
CL:0000791
CL:0000625
CL:0000626
CL:0012001
CL:0000627
CL:0000628
CL:0000629
CL:0000631
CL:0000632
CL:0000633
CL:0002490
CL:0000634
CL:0002315
CL:0000635
CL:0002165
CL:0000636
CL:0000681
CL:0000637
CL:2000004
CL:0000638
CL:0000640
CL:0000641
CL:0000642
CL:0000643
CL:0002085
CL:0000644
CL:0000645
CL:0012000
CL:0000646
CL:0000647
CL:1000618
CL:0000649
CL:0000650
CL:0000651
CL:0000652
CL:0000653
CL:0002522
CL:1000450
CL:0000654
CL:0000655
CL:0000656
CL:0000659
CL:0000660
CL:0000661
CL:0000662
CL:0000663
CL:0000664
CL:0000665
CL:0000666
CL:0000668
CL:0008034
CL:0000671
CL:0000672
CL:0000673
CL:0000674
CL:0000676
CL:0000678
CL:0000679
CL:0000682
CL:0000683
CL:0000684
CL:0000685

CL:0002269
CL:0002270
CL:0002272
CL:0002271
CL:0002273
CL:0002275
CL:0002276
CL:0002277
CL:0002278
CL:0002279
CL:0002280
CL:0002281
CL:0002282
CL:0002283
CL:0002284
CL:0005018
CL:0002285
CL:0002286
CL:0002287
CL:0002288
CL:0002289
CL:0002290
CL:0002291
CL:0002292
CL:0002293
CL:0002294
CL:0002364
CL:0002295
CL:0002365
CL:0002296
CL:0002297
CL:0002298
CL:0002299
CL:0002300
CL:0002301
CL:0002302
CL:0002303
CL:0002304
CL:0002305
CL:1000494
CL:1000615
CL:0002306
CL:0002307
CL:0002308
CL:0002309
CL:0002310
CL:0002311
CL:0002312
CL:0002313
CL:0002314
CL:0002316
CL:0005014
CL:0002317
CL:0002318
CL:0002322
CL:0002323
CL:0002324
CL:0002327
CL:1001586
CL:0002326
CL:0009116
CL:0002329
CL:0002330
CL:0002331
CL:1000349
CL:0002332
CL:0002333
CL:0002337
CL:0002338
CL:0002344
CL:0002339
CL:0002342
CL:0002343
CL:0002345
CL:0002346
CL:0002347
CL:0002349
CL:0002348
CL:0002350
CL:0010008
CL:0002352
CL:0002353
CL:0002354
CL:0002355
CL:0002417
CL:0002361
CL:0002356
CL:0002357
CL:0002358
CL:0002359
CL:0002360

CL:1000333
CL:1000334
CL:1000335
CL:1000336
CL:1000346
CL:1000337
CL:1000338
CL:1000339
CL:1000340
CL:1000341
CL:1000342
CL:1000343
CL:1000344
CL:1000345
CL:1000347
CL:1000348
CL:1000352
CL:1000353
CL:1000354
CL:1000355
CL:1000356
CL:1000357
CL:1000358
CL:1000359
CL:1000361
CL:2000022
CL:1000362
CL:1000363
CL:1000480
CL:1000364
CL:1000365
CL:1000366
CL:1000367
CL:1000368
CL:1000370
CL:1000369
CL:1000481
CL:1000371
CL:1000372
CL:1000373
CL:1000374
CL:1000375
CL:2000046
CL:1000376
CL:1000377
CL:1000378
CL:1000379
CL:1000380
CL:1000381
CL:1000382
CL:1000383
CL:1000384
CL:1000385
CL:1000386
CL:1000387
CL:1000388
CL:1000389
CL:1000390
CL:1000391
CL:1000393
CL:1000394
CL:1000395
CL:1000396
CL:1000397
CL:2000053
CL:1000399
CL:1000400
CL:1000401
CL:1000402
CL:1000403
CL:1000404
CL:1000406
CL:1000407
CL:1000408
CL:1000409
CL:1000410
CL:1000411
CL:1000412
CL:1000415
CL:1000416
CL:1000417
CL:1000418
CL:1000419
CL:1000420
CL:1000421
CL:1000422
CL:1000423
CL:1000424
CL:1000425
CL:1000427
CL:1000429

In [9]:
json.dump(all_graphs_per_cell_type,open('allOntologyViews.json','w'))