In [1]:
import owlready2 as owl
ncit = owl.get_ontology('../data/ncit/ncit_20.09d.owl')
ncit.load()
graph = owl.default_world.as_rdflib_graph()



In [15]:
# get all classes with Semantic_Type = "Pharmacologic Substance"
query_str = """SELECT ?x WHERE {
    ?x <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#P106> "Pharmacologic Substance"
}"""
results = set(graph.query(query_str))

semtype_ps = set()
for r in results:
    semtype_ps.add(ncit[r[0].toPython().split('#')[1]])

In [16]:
# get all classes that are descendants of C1909 ("Pharmacologic Substance")
def build_descendants(node, descendants):
    children = node.descendants()
    if len(children) == 0:
        raise Exception(children)
    elif len(children) == 1:
        descendants.add(node)
    else:
        for child in children:
            if child.name != node.name:
                build_descendants(child, descendants)

descendants = set()
build_descendants(ncit.C1909, descendants)        

In [17]:
semtype_not_desc = semtype_ps - descendants
desc_not_semtype = descendants - semtype_ps
print(f"# of common elements: {len(semtype_ps.intersection(descendants))}")
print(f"# of elements with semantic type but not in descendants: {len(semtype_not_desc)}")
print(f"# of elements in descendant but don't have semantic type: {len(desc_not_semtype)}")

# of common elements: 15512
# of elements with semantic type but not in descendants: 1637
# of elements in descendant but don't have semantic type: 2954


In [19]:
def print_info(owl_class):
    if owl_class.P107:
        name = owl_class.P107[0]
    elif owl_class.P108:
        name = owl_class.P108[0]
    elif owl_class.P90:
        name = owl_class.P90[0]
    return (name, owl_class.NHC0[0])

In [27]:
# sample of elements with semantic type Pharmacologic Substance but not in descendants
list(map(print_info, semtype_not_desc))[0:10]

[('Levocarnitine', 'C26657'),
 ('Therapeutic Corticotropin-Releasing Factor', 'C394'),
 ('Turpentine', 'C84233'),
 ('Largetrifoliolious Bugbane Rhizome Supplement', 'C103271'),
 ('D-Serine', 'C61739'),
 ('Delta-Tocopherol', 'C63645'),
 ('Pomegranate Liquid Extract', 'C78866'),
 ('Deferiprone', 'C73030'),
 ('Ferric Ferrocyanide', 'C47532'),
 ('Sulfuric Acid', 'C28191')]

In [28]:
# sample of elements in descendants but not with semantic type Pharmacologic Substance
list(map(print_info, desc_not_semtype))[0:10]

[('Zirconium Zr 89 Desferrioxamine B Monoclonal Antibody huJ591', 'C101133'),
 ('Transfer Factor', 'C93256'),
 ('Teprasiran Sodium', 'C174706'),
 ('Clostridium Tetani Antigen, A', 'C75952'),
 ('Beta-Sitosterol', 'C63662'),
 ('Spiramycin', 'C839'),
 ('Smoking Tobacco', 'C100079'),
 ('Furaprevir', 'C166437'),
 ('Serclutamab', 'C174909'),
 ('Ferric Derisomaltose', 'C171661')]

In [30]:
neg_query_str = """SELECT ?x WHERE {
    ?x <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#P106> "Pharmacologic Substance" .
}"""
neg_results = set(graph.query(neg_query_str))
# C77550 = retired apricoxib
# len = 16857 means failure
neg_results

{(rdflib.term.URIRef('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C84021')),
 (rdflib.term.URIRef('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C148416')),
 (rdflib.term.URIRef('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C83890')),
 (rdflib.term.URIRef('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C29119')),
 (rdflib.term.URIRef('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C63698')),
 (rdflib.term.URIRef('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C77357')),
 (rdflib.term.URIRef('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C61980')),
 (rdflib.term.URIRef('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C80325')),
 (rdflib.term.URIRef('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C154295')),
 (rdflib.term.URIRef('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C162749')),
 (rdflib.term.URIRef('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C77367')),
 (rdflib.term.URIRef('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#