In [17]:
from cellxgene_ontology_guide.entities import Ontology
from cellxgene_ontology_guide.ontology_parser import OntologyParser
from cellxgene_ontology_guide.supported_versions import load_supported_versions

In [18]:
# Get versions of each supported Ontology associated with package-supported CellXGene schema_versions
load_supported_versions()

{'v5.0.0': {'CL': {'version': 'v2024-01-04',
   'source': 'https://github.com/obophenotype/cell-ontology/releases/download',
   'filename': 'cl.owl'},
  'EFO': {'version': 'v3.62.0',
   'source': 'https://github.com/EBISPOT/efo/releases/download',
   'filename': 'efo.owl'},
  'HANCESTRO': {'version': '3.0',
   'source': 'https://github.com/EBISPOT/hancestro/raw',
   'filename': 'hancestro.owl'},
  'HsapDv': {'version': '11',
   'source': 'http://aber-owl.net/media/ontologies/HSAPDV',
   'filename': 'hsapdv.owl'},
  'MONDO': {'version': 'v2024-01-03',
   'source': 'https://github.com/monarch-initiative/mondo/releases/download',
   'filename': 'mondo.owl'},
  'MmusDv': {'version': '9',
   'source': 'http://aber-owl.net/media/ontologies/MMUSDV',
   'filename': 'mmusdv.owl'},
  'NCBITaxon': {'version': 'v2023-06-20',
   'source': 'https://github.com/obophenotype/ncbitaxon/releases/download',
   'filename': 'ncbitaxon.owl.gz'},
  'UBERON': {'version': 'v2024-01-18',
   'source': 'https://gi

In [19]:
# Init object to parse information from ontologies associated with a given CellXGene schema_version
ontology_parser = OntologyParser(schema_version="v5.0.0")

In [20]:
# Build download URL for an ontology file associated with this schema_version
ontology_parser.get_ontology_download_url(Ontology.CL)

'https://github.com/obophenotype/cell-ontology/releases/download/v2024-01-04/cl.owl'

In [21]:
ontology_parser.get_ontology_download_url(Ontology.UBERON)

'https://github.com/obophenotype/uberon/releases/download/v2024-01-18/uberon.owl'

In [22]:
# Get ancestors for a given term per schema_version associated ontologies
ancestors = ontology_parser.get_term_ancestors("CL:0000021")  # female germ cell
print(ancestors)

['CL:0000586', 'CL:0000039', 'CL:0000000', 'CL:0000021']


In [23]:
# What are the human-readable labels for these?
[ontology_parser.get_term_label(term_id) for term_id in ancestors]

['germ cell', 'germ line cell', 'cell', 'female germ cell']

In [24]:
# Get descendants for female germ cell and germ cell
ontology_parser.map_term_descendants(["CL:0000021", "CL:0000586"])

{'CL:0000021': ['CL:0000021',
  'CL:0000022',
  'CL:0000023',
  'CL:0000024',
  'CL:0000025',
  'CL:0000675',
  'CL:0000654',
  'CL:0000655',
  'CL:0002090',
  'CL:0002091',
  'CL:0002093'],
 'CL:0000586': ['CL:0000015',
  'CL:0000016',
  'CL:0000017',
  'CL:0000020',
  'CL:0000018',
  'CL:0000657',
  'CL:0000019',
  'CL:0000408',
  'CL:0000021',
  'CL:0000022',
  'CL:0000023',
  'CL:0000024',
  'CL:0000025',
  'CL:0000675',
  'CL:0000722',
  'CL:0000300',
  'CL:0000654',
  'CL:0000655',
  'CL:0000656',
  'CL:0002090',
  'CL:0002091',
  'CL:0002093',
  'CL:0002290',
  'CL:0002291',
  'CL:0011013',
  'CL:0011014',
  'CL:0011015',
  'CL:0011016',
  'CL:4030036',
  'CL:4030037']}

In [25]:
# Get other info about ontology term graph
ontology_parser.get_distance_between_terms("CL:0000021", "CL:0000015")  # female, male germ cell terms

2

In [26]:
ontology_parser.get_lowest_common_ancestors("CL:0000021", "CL:0000015")

['CL:0000586']

In [27]:
# Map terms to a curated list, based on matching ancestor terms
CELL_CLASSES = [
    "CL:0002494",  # cardiocyte
    "CL:0002320",  # connective tissue cell
    "CL:0000473",  # defensive cell
    "CL:0000066",  # epithelial cell
    "CL:0000988",  # hematopoietic cell
    "CL:0000183",  # NEW contractile cell
    "CL:0002319",  # neural cell
    "CL:0011115",  # precursor cell
    "CL:0000151",  # secretory cell
    "CL:0000039",  # NEW germ cell line
    "CL:0000064",  # NEW ciliated cell
    "CL:0000219",  # NEW motile cell
    "CL:0000188",  # NEW cell of skeletal muscle
    "CL:0000325",  # NEW stuff accumulating cell
    "CL:0000349",  # NEW extraembryonic cell
    "CL:0000586",  # NEW germ cell
    "CL:0000630",  # NEW supporting cell
    "CL:0001035",  # NEW bone cell
    "CL:0001061",  # NEW abnormal cell
    "CL:0002321",  # NEW embryonic cell (metazoa)
    "CL:0009010",  # NEW transit amplifying cell
    "CL:1000600",  # NEW lower urinary tract cell
    "CL:4033054",  # NEW perivascular cell
    "CL:0002321",  # NEW embryonic cell(metazoa)
    "CL:0000677",  # NEW gut absorptive cel
]

In [28]:
cells_to_map = ["CL:0000021", "CL:0000015"]
ontology_parser.map_high_level_terms(cells_to_map, CELL_CLASSES)

{'CL:0000021': ['CL:0000586', 'CL:0000039'],
 'CL:0000015': ['CL:0000586', 'CL:0000039']}

In [29]:
# Map to highest level, most general term (i.e. term closest to root node)
ontology_parser.map_highest_level_term(cells_to_map, CELL_CLASSES)

{'CL:0000021': 'CL:0000039', 'CL:0000015': 'CL:0000039'}

In [30]:
# Determine if a term is deprecated--and retrieve metadata for how to replace it
ontology_parser.is_term_deprecated("CL:0000003")

True

In [31]:
ontology_parser.get_term_replacement("CL:0000003")

'CL:0000000'

In [32]:
ontology_parser.get_term_metadata("CL:0000003")

{'term_tracker': None,
 'consider': None,
 'comments': ['https://github.com/obophenotype/cell-ontology/issues/2124']}