# Mondo RD subset analysis

In [1]:
## Settings

mondo_path = "../../ontology/tmp/mondo_paper.db"
mappings_path = "../ontology/mappings/mondo.sssom.tsv"

oak_adapter = f"sqlite:{mondo_path}"


In [2]:
# Install Python packages
#!pip install upsetplot
!pip install oaklib

Collecting oaklib
  Using cached oaklib-0.6.6-py3-none-any.whl.metadata (9.2 kB)
Collecting SPARQLWrapper (from oaklib)
  Using cached SPARQLWrapper-2.0.0-py3-none-any.whl.metadata (2.0 kB)
Collecting SQLAlchemy>=1.4.32 (from oaklib)
  Using cached SQLAlchemy-2.0.30-cp311-cp311-macosx_11_0_arm64.whl.metadata (9.6 kB)
Collecting airium>=0.2.5 (from oaklib)
  Using cached airium-0.2.6-py3-none-any.whl.metadata (15 kB)
Collecting appdirs>=1.4.4 (from oaklib)
  Using cached appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting class-resolver>=0.4.2 (from oaklib)
  Using cached class_resolver-0.4.3-py3-none-any.whl.metadata (13 kB)
Collecting click (from oaklib)
  Using cached click-8.1.7-py3-none-any.whl.metadata (3.0 kB)
Collecting curies>=0.6.6 (from oaklib)
  Using cached curies-0.7.9-py3-none-any.whl.metadata (9.3 kB)
Collecting defusedxml<0.8.0,>=0.7.1 (from oaklib)
  Using cached defusedxml-0.7.1-py2.py3-none-any.whl.metadata (32 kB)
Collecting eutils>=0.6.0 (from oaklib)
 

In [33]:
# Load Imports
import pandas as pd
from pathlib import Path
from oaklib import get_adapter
from oaklib.mappers import OntologyMetadataMapper
from oaklib.datamodels.vocabulary import IS_A, PART_OF, SEMAPV, OWL_OBJECT_PROPERTY, OWL_ANNOTATION_PROPERTY
from typing import Tuple, List

# Configure dataframe display
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
adapter = get_adapter(oak_adapter)


[Back](#overview)

---
<a id="download"></a>
### Load Mondo using OAK pronto adapter


In [19]:
# Let's explore the ontology and get some initial data by getting all children of a class 

# Get all descendants for a CURIE
all_diseases = set(adapter.descendants(["MONDO:0000001"], predicates=[IS_A]))
all_characteristics = set(adapter.descendants(["MONDO:0021125"], predicates=[IS_A]))
all_susceptibilities = set(adapter.descendants(["MONDO:0042489"], predicates=[IS_A]))
all_injuries = set(adapter.descendants(["MONDO:0021178"], predicates=[IS_A]))

all_mondo_terms = {
    "diseases": all_diseases,
    "characteristics": all_characteristics,
    "susceptibilities": all_susceptibilities,
    "injuries": all_injuries
}



def get_subsets(adapter, mondo_terms_by_group: dict[str, List]) -> List[Tuple[str, List[str]]]:
    """Get subsets for a list of descendants.""" 
    data_subsets = []
    for mondo_group, mondo_terms in mondo_terms_by_group.items():    
        for e, subset in sorted(adapter.terms_subsets(mondo_terms)):
            data_subsets.append({
                "id": e,
                "subset": subset,
                "group": mondo_group
            })
    df = pd.DataFrame(data_subsets)
    return df

df_subsets = get_subsets(adapter, all_mondo_terms)
df_subsets.to_csv("mondo_subsets.csv", sep="\t", index=False)
df_subsets


Unnamed: 0,id,subset,group
0,MONDO:0000004,otar,diseases
1,MONDO:0000005,inferred_rare,diseases
2,MONDO:0000005,rare,diseases
3,MONDO:0000009,inferred_rare,diseases
4,MONDO:0000009,otar,diseases
...,...,...,...
68912,MONDO:0043797,ordo_disease,injuries
68913,MONDO:0043797,orphanet_rare,injuries
68914,MONDO:0043797,rare,injuries
68915,MONDO:0044745,otar,injuries


In [34]:
def get_synonyms(adapter, mondo_terms_by_group: dict[str, List]) -> List[Tuple[str, List[str]]]:
    """Get subsets for a list of descendants.""" 
    data_subsets = []
    for mondo_group, mondo_terms in mondo_terms_by_group.items():    
        for entity, spv in adapter.synonym_property_values(mondo_terms):
            xrefs = "|".join(spv.xrefs)
            synonym_type = spv.synonymType
            synonym = spv.val
            predicate = spv.pred
            data_subsets.append({
                "id": entity,
                "synonym": synonym,
                "synonym_type": synonym_type,
                "xrefs": xrefs,
                "predicate": predicate,
                "group": mondo_group
            })
    df = pd.DataFrame(data_subsets)
    return df

#all_mondo_terms
df_synonyms = get_synonyms(adapter, {"disease":["MONDO:0000001"]})
df_synonyms.to_csv("mondo_synonyms.csv", sep="\t", index=False)
df_synonyms

Unnamed: 0,id,synonym,synonym_type,xrefs,predicate,group
0,MONDO:0000001,condition,,NCIT:C2991,hasExactSynonym,disease
1,MONDO:0000001,disease,,NCIT:C2991,hasExactSynonym,disease
2,MONDO:0000001,disease or disorder,,NCIT:C2991,hasExactSynonym,disease
3,MONDO:0000001,"disease or disorder, non-neoplastic",,NCIT:C2991,hasExactSynonym,disease
4,MONDO:0000001,diseases,,NCIT:C2991,hasExactSynonym,disease
5,MONDO:0000001,diseases and disorders,,NCIT:C2991,hasExactSynonym,disease
6,MONDO:0000001,disorder,,NCIT:C2991,hasExactSynonym,disease
7,MONDO:0000001,disorders,,NCIT:C2991,hasExactSynonym,disease
8,MONDO:0000001,medical condition,,,hasExactSynonym,disease
9,MONDO:0000001,other disease,,NCIT:C2991,hasExactSynonym,disease
