In [1]:
import owlready2 as owl
ncit = owl.get_ontology('../data/ncit/ncit_20.09d.owl')
ncit.load()
graph = owl.default_world.as_rdflib_graph()



get_ontology("http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#")

In [66]:
# get all classes with Semantic_Type = "Pharmacologic Substance"
query_str = """SELECT ?x WHERE {?x <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#P106> "Pharmacologic Substance" }"""
results = set(graph.query(query_str))

def parse_uri(uri):
    uri.toPython().split('#')[1]

semtype_ps = set()
for r in results:
    semtype_ps.add(ncit[parse_uri(r[0])])

In [73]:
# get all classes that are descendants of C1909 ("Pharmacologic Substance")
def build_descendants(node, descendants):
    children = node.descendants()
    if len(children) == 0:
        raise Exception(children)
    elif len(children) == 1:
        descendants.add(node)
    else:
        for child in children:
            if child.name != node.name:
                build_descendants(child, descendants)

descendants = set()
build_descendants(ncit.C1909, descendants)        

In [93]:
semtype_not_desc = semtype_ps - descendants
desc_not_semtype = descendants - semtype_ps
print(f"# of common elements: {len(semtype_ps.intersection(descendants))}")
print(f"# of elements with semantic type but not in descendants: {len(semtype_not_desc)}")
print(f"# of elements in descendant but don't have semantic type: {len(desc_not_semtype)}")

# of common elements: 15512
# of elements with semantic type but not in descendants: 1637
# of elements in descendant but don't have semantic type: 2954


In [106]:
def print_info(owl_class):
    if owl_class.P107:
        name = owl_class.P107[0]
    elif owl_class.P108:
        name = owl_class.P108[0]
    elif owl_class.P90:
        name = owl_class.P90[0]
    return (name, owl_class.NHC0[0])

In [107]:
# elements with semantic type Pharmacologic Substance but not in descendants
list(map(print_info, semtype_not_desc))

[('Pyran Polymer', 'C29343'),
 ('Amifampridine', 'C87424'),
 ('d-Alpha-Tocopherol', 'C2832'),
 ('Soy Isoflavones', 'C65152'),
 ('Propisergide', 'C66490'),
 ('Etanidazole', 'C1092'),
 ('Levomenol', 'C80904'),
 ('Magnesium Malate', 'C87336'),
 ('Aluminum Sesquichlorohydrate', 'C142918'),
 ('Copal', 'C74358'),
 ('Quercetin', 'C792'),
 ('Cathepsin-Activatable Cy5 Fluorescent Imaging Probe LUM015', 'C105147'),
 ('Anti-CD38 Monoclonal Antibody', 'C155321'),
 ('Alpha-Tocopherol', 'C74960'),
 ('PS IL-10/Placebo', 'C29336'),
 ('Sodium Ferulate', 'C84155'),
 ('DOTMP', 'C1496'),
 ('Bisbentiamine', 'C81630'),
 ('Elm', 'C65510'),
 ('Paricalcitol', 'C38693'),
 ('Rutin', 'C819'),
 ('Dendritic Cell Vaccine', 'C28310'),
 ('ALVAC-CEA Vaccine', 'C1648'),
 ('Coniine Hydrochloride', 'C87478'),
 ('dl-alpha-Tocopherol', 'C74578'),
 ('Ferric Phosphate', 'C83714'),
 ('Probiotic Acidophilus', 'C79797'),
 ('Starch, Rice', 'C80780'),
 ('Telomerase:540-548 Peptide', 'C2813'),
 ('Visilizumab', 'C2687'),
 ('OPCs/Gre

In [108]:
# elements in descendants but not with semantic type Pharmacologic Substance
list(map(print_info, desc_not_semtype))

[('Pipoxolan Hydrochloride', 'C170323'),
 ('Elbasvir', 'C166892'),
 ('CA19-9 Antigen', 'C327'),
 ('Futuximab', 'C152971'),
 ('Meralluride', 'C166688'),
 ('Drocinonide', 'C166963'),
 ('Bordetella pertussis Antigen, D', 'C75775'),
 ('Monoclonal Antibody 48.7 F(ab)', 'C29200'),
 ('Anti-BCMA/CD3 BiTE Antibody AMG 701', 'C147028'),
 ('Limiglidole', 'C170123'),
 ('Tigemonam Dicholine', 'C152620'),
 ('Elismetrep ', 'C169937'),
 ('Sodium Phosphate, Radioactive', 'C2126'),
 ('Penimepicycline', 'C166691'),
 ('Alicapistat', 'C171868'),
 ('Diazooxonorleucine', 'C427'),
 ('Coblopasvir', 'C166912'),
 ('Donanemab', 'C166484'),
 ('Mycobacterium w', 'C70969'),
 ('Vanilloid', 'C1955'),
 ('NA17-A Antigen', 'C37450'),
 ('Mercaptomerin', 'C166828'),
 ('Fluperolone Acetate', 'C169995'),
 ('Bordetella pertussis Antigen, E', 'C75776'),
 ('Monoclonal Antibody 528', 'C29201'),
 ('Pegapamodutide', 'C170294'),
 ('Pheneticillin', 'C170316'),
 ('Igmesine', 'C170052'),
 ('Betahistine Hydrochloride', 'C73794'),
 ('Yt