In [1]:
import cython
from owlready2 import get_namespace, get_ontology, Thing

Download:

https://owlready2.readthedocs.io/en/latest/intro.html

https://owlready2.readthedocs.io/en/latest/namespace.html#accessing-entities-defined-in-another-namespace

Accessing content:

https://owlready2.readthedocs.io/en/latest/onto.html#accessing-the-content-of-an-ontology

Methods TODO:

https://owlready2.readthedocs.io/en/latest/class.html

ChEBI:

https://ftp.ebi.ac.uk/pub/databases/chebi/ontology/

GO-Plus comes with chebi

http://geneontology.org/docs/download-ontology/


Lite (Id, name, definition, and relationships), Core (Lite plus Synonyms) and Full (Core plus database accessions)

In [362]:
from owlready2 import get_namespace, get_ontology, Restriction

# from collections import frozenset
# TODO convert : to _?

CHEBI_FILE = "../data/raw/ontologies/chebi.owl"

GO_FILE = "../data/raw/ontologies/go.owl"


class Ontology:
    def __init__(
        self, owl_file_path: str, namespace_url: str = "http://purl.obolibrary.org/obo/"
    ):
        self.ontology = get_ontology(owl_file_path).load()
        self.namespace = self.ontology.get_namespace(namespace_url)

    def get_identifier(self, label: str) -> str:
        return self.ontology.search_one(label=label).name

    def get_label(self, identifier: str) -> str:
        labels = self.namespace[identifier].label
        return "" if len(labels) == 0 else list(labels)[0]

    def __to_set(self, classes) -> set:
        # classes is a generator in case of subclasses and a set for ancestors/descendants
        return {cl.name for cl in classes if len(cl.label) > 0}

    def get_ancestors(self, identifier: str) -> set:
        return self.__to_set(self.namespace[identifier].ancestors())

    def get_descendants(self, identifier: str) -> set:
        return self.__to_set(self.namespace[identifier].descendants())

    def get_subclasses(self, identifier: str) -> set:
        return self.__to_set(self.namespace[identifier].subclasses())

    def get_superclasses(
        self,
        identifier: str,
        include_restrictions: bool = False,
        default_relationship: str = "is_a",
    ) -> set | list:
        classes = {
            cl
            for cl in self.namespace[identifier].is_a
            if not isinstance(cl, Restriction)
        }
        classes_set = self.__to_set(classes)
        if include_restrictions:
            supercl_with_restr = [
                (default_relationship, class_id) for class_id in classes_set
            ]
            restrictions = [
                cl
                for cl in self.namespace[identifier].is_a
                if isinstance(cl, Restriction)
            ]
            assert all(
                [len(restriction.property.label) == 1 for restriction in restrictions]
            )
            supercl_with_restr.extend(
                [
                    (list(restriction.property.label)[0], restriction.value.name)
                    for restriction in restrictions
                ]
            )
            return supercl_with_restr
        else:
            return classes_set

    def get_class(self, identifier: str):
        # get owlready class object
        return self.namespace[identifier]

    def get_properties(self, identifier: str) -> dict:
        # get all properties of class
        properties = {}
        cl = self.get_class(identifier)
        for property in cl.get_class_properties():
            labels = list(property.label)
            # should be checked once, to see if the additional labels are different.
            # not necessary for chebi, go
            # if len(labels) >= 2:
            #     print(f"Warning: more than one label for a property: {labels}")
            label = property.name if len(labels) == 0 else labels[0]
            values = list(getattr(cl, property.name))
            values = values[0] if len(values) == 1 else values
            if label in properties.keys():
                print(f"Warning: label {label} occurred more than once")
            properties[label] = values

        return properties


In [363]:
ont = Ontology(CHEBI_FILE)



# identifier = "GO_0022857"
# label = "transmembrane transporter activity"


# ont.get_identifier(label)
# ont.get_label(identifier)
# list(ont.identifier_to_label(test))

In [364]:

dir(ont.ontology)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abbreviate',
 '_add_annotation_axiom',
 '_add_data_triple_raw_spod',
 '_add_data_triple_raw_spodsd',
 '_add_data_triple_spod',
 '_add_obj_triple_raw_spo',
 '_add_obj_triple_spo',
 '_base_iri',
 '_bnodes',
 '_del_annotation_axiom',
 '_del_data_triple_raw_spod',
 '_del_data_triple_raw_spodsd',
 '_del_data_triple_spod',
 '_del_list',
 '_del_obj_triple_raw_spo',
 '_del_obj_triple_spo',
 '_destroy_cached_entities',
 '_entity_destroyed',
 '_get_annotation_axioms',
 '_get_data_triple_sp_od',
 '_get_data_triples_s_pod',
 '_get_data_triples_sp_od',
 '_get_data_triples_sp

In [365]:
ont.get_label("CHEBI_27732")

'caffeine'

In [366]:
ont.get_identifier("caffeine")

'CHEBI_27732'

In [373]:
ont.get_subclasses("CHEBI_27732")

set()

In [374]:
ont.get_ancestors("CHEBI_27732")


{'CHEBI_22315',
 'CHEBI_23367',
 'CHEBI_24431',
 'CHEBI_24532',
 'CHEBI_25348',
 'CHEBI_25367',
 'CHEBI_26385',
 'CHEBI_26401',
 'CHEBI_27134',
 'CHEBI_27171',
 'CHEBI_27732',
 'CHEBI_33285',
 'CHEBI_33302',
 'CHEBI_33579',
 'CHEBI_33582',
 'CHEBI_33595',
 'CHEBI_33635',
 'CHEBI_33636',
 'CHEBI_33655',
 'CHEBI_33659',
 'CHEBI_33671',
 'CHEBI_33672',
 'CHEBI_33675',
 'CHEBI_33832',
 'CHEBI_33833',
 'CHEBI_35352',
 'CHEBI_35875',
 'CHEBI_36357',
 'CHEBI_38101',
 'CHEBI_38166',
 'CHEBI_50860',
 'CHEBI_51143',
 'CHEBI_5686',
 'CHEBI_72695'}

In [375]:
ont.get_descendants("CHEBI_27732")

{'CHEBI_27732'}

In [371]:
ont.get_superclasses("CHEBI_27732", include_restrictions=True)

[('is_a', 'CHEBI_27134'),
 ('is_a', 'CHEBI_26385'),
 ('has role', 'CHEBI_176497'),
 ('has role', 'CHEBI_25435'),
 ('has role', 'CHEBI_35337'),
 ('has role', 'CHEBI_35471'),
 ('has role', 'CHEBI_35498'),
 ('has role', 'CHEBI_35703'),
 ('has role', 'CHEBI_50218'),
 ('has role', 'CHEBI_50925'),
 ('has role', 'CHEBI_53121'),
 ('has role', 'CHEBI_60809'),
 ('has role', 'CHEBI_64047'),
 ('has role', 'CHEBI_67114'),
 ('has role', 'CHEBI_71232'),
 ('has role', 'CHEBI_75771'),
 ('has role', 'CHEBI_76924'),
 ('has role', 'CHEBI_76946'),
 ('has role', 'CHEBI_78298'),
 ('has role', 'CHEBI_85234')]

In [368]:
ont.get_properties("CHEBI_27732")

{'mass': '194.19076',
 'formula': 'C8H10N4O2',
 'charge': '0',
 'monoisotopicmass': '194.08038',
 'definition': 'A trimethylxanthine in which the three methyl groups are located at positions 1, 3, and 7. A purine alkaloid that occurs naturally in tea and coffee.',
 'in_subset': chebi.3_STAR,
 'database_cross_reference': ['Beilstein:17705',
  'CAS:58-08-2',
  'DrugBank:DB00201',
  'Drug_Central:463',
  'Gmelin:103040',
  'HMDB:HMDB0001847',
  'KEGG:C07481',
  'KEGG:D00528',
  'KNApSAcK:C00001492',
  'LINCS:LSM-2026',
  'MetaCyc:1-3-7-TRIMETHYLXANTHINE',
  'PDBeChem:CFF',
  'PMID:10510174',
  'PMID:10796597',
  'PMID:10803761',
  'PMID:10822912',
  'PMID:10884512',
  'PMID:10924888',
  'PMID:10983026',
  'PMID:11014293',
  'PMID:11022879',
  'PMID:11209966',
  'PMID:11312039',
  'PMID:11410911',
  'PMID:11431501',
  'PMID:11815511',
  'PMID:11949272',
  'PMID:12397877',
  'PMID:12457274',
  'PMID:12574990',
  'PMID:12915014',
  'PMID:12943586',
  'PMID:14521986',
  'PMID:14607010',
  'PM

In [149]:
set(ont.namespace[identifier].descendants())

{obo.GO_0005384,
 obo.GO_0140161,
 obo.GO_0015379,
 obo.GO_0015575,
 obo.GO_0090582,
 obo.GO_0035381,
 obo.GO_0106421,
 obo.GO_0015493,
 obo.GO_0005366,
 obo.GO_0008510,
 obo.GO_0140800,
 obo.GO_0015505,
 obo.GO_0015611,
 obo.GO_1901513,
 obo.GO_0042898,
 obo.GO_0008511,
 obo.GO_0000064,
 obo.GO_0015366,
 obo.GO_0046715,
 obo.GO_0090583,
 obo.GO_0015100,
 obo.GO_0004931,
 obo.GO_0140481,
 obo.GO_0015541,
 obo.GO_0033288,
 obo.GO_0005367,
 obo.GO_0015519,
 obo.GO_0015209,
 obo.GO_0005253,
 obo.GO_0015624,
 obo.GO_1901974,
 obo.GO_0015205,
 obo.GO_0008324,
 obo.GO_0015101,
 obo.GO_0008512,
 obo.GO_0015367,
 obo.GO_0015450,
 obo.GO_0090584,
 obo.GO_0015665,
 obo.GO_0015196,
 obo.GO_0042959,
 obo.GO_0015108,
 obo.GO_0015626,
 obo.GO_0140108,
 obo.GO_0042901,
 obo.GO_0005345,
 obo.GO_1901682,
 obo.GO_0010328,
 obo.GO_0090482,
 obo.GO_0008564,
 obo.GO_0015369,
 obo.GO_0015577,
 obo.GO_0015104,
 obo.GO_0090585,
 obo.GO_0015439,
 obo.GO_0008493,
 obo.GO_1990760,
 obo.GO_0031458,
 obo.GO_190151

In [114]:
# TODO part_of does not count
"GO_1902495" in ont.get_subclasses("GO_0022857")

False

In [88]:
ont.get_class(identifier).is_a[1]

obo.BFO_0000050.some(obo.GO_0055085)

In [77]:
t = ont.get_superclasses(identifier)
t

[obo.GO_0005215, obo.BFO_0000050.some(obo.GO_0055085)]

In [113]:
t[1].value

obo.GO_0055085

In [112]:
t[1].property.label

['part of']

In [13]:
transmembrane_transport_activity = "GO:0022857"

molecular_function = "GO:0003674"

test_list = [transmembrane_transport_activity, molecular_function]

In [52]:
ont.get_class("GO_0022857").label


['transmembrane transporter activity']