In [1]:
import owlready2 as owl
ncit = owl.get_ontology('../data/ncit/ncit_20.09d.owl')
ncit.load()
graph = owl.default_world.as_rdflib_graph()



get_ontology("http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#")

In [66]:
# get all classes with Semantic_Type = "Pharmacologic Substance"
query_str = """SELECT ?x WHERE {?x <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#P106> "Pharmacologic Substance" }"""
results = set(graph.query(query_str))

def parse_uri(uri):
    uri.toPython().split('#')[1]

semtype_ps = set()
for r in results:
    semtype_ps.add(ncit[parse_uri(r[0])])

In [73]:
# get all classes that are descendants of C1909 ("Pharmacologic Substance")
def build_descendants(node, descendants):
    children = node.descendants()
    if len(children) == 0:
        raise Exception(children)
    elif len(children) == 1:
        descendants.add(node)
    else:
        for child in children:
            if child.name != node.name:
                build_descendants(child, descendants)

descendants = set()
build_descendants(ncit.C1909, descendants)        

In [93]:
semtype_not_desc = semtype_ps - descendants
desc_not_semtype = descendants - semtype_ps
print(f"# of common elements: {len(semtype_ps.intersection(descendants))}")
print(f"# of elements with semantic type but not in descendants: {len(semtype_not_desc)}")
print(f"# of elements in descendant but don't have semantic type: {len(desc_not_semtype)}")

# of common elements: 15512
# of elements with semantic type but not in descendants: 1637
# of elements in descendant but don't have semantic type: 2954


In [90]:
def print_info(owl_class):
    if owl_class.P107:
        name = owl_class.P107
    elif owl_class.P108:
        name = owl_class.P108
    elif owl_class.P90:
        name = owl_class.P90[0]
    print(f"{name} {owl_class.NHC0}")

In [92]:
# 
list(map(print_info, semtype_not_desc))

['Pyran Polymer'] ['C29343']
['Amifampridine'] ['C87424']
['d-Alpha-Tocopherol'] ['C2832']
['Soy Isoflavones'] ['C65152']
['Propisergide'] ['C66490']
['Etanidazole'] ['C1092']
['Levomenol'] ['C80904']
['Magnesium Malate'] ['C87336']
['Aluminum Sesquichlorohydrate'] ['C142918']
['Copal'] ['C74358']
['Quercetin'] ['C792']
['Cathepsin-Activatable Cy5 Fluorescent Imaging Probe LUM015'] ['C105147']
['Anti-CD38 Monoclonal Antibody'] ['C155321']
['Alpha-Tocopherol'] ['C74960']
['PS IL-10/Placebo'] ['C29336']
['Sodium Ferulate'] ['C84155']
['DOTMP'] ['C1496']
['Bisbentiamine'] ['C81630']
['Elm'] ['C65510']
['Paricalcitol'] ['C38693']
['Rutin'] ['C819']
['Dendritic Cell Vaccine'] ['C28310']
['ALVAC-CEA Vaccine'] ['C1648']
['Coniine Hydrochloride'] ['C87478']
['dl-alpha-Tocopherol'] ['C74578']
['Ferric Phosphate'] ['C83714']
['Probiotic Acidophilus'] ['C79797']
['Starch, Rice'] ['C80780']
['Telomerase:540-548 Peptide'] ['C2813']
['Visilizumab'] ['C2687']
['OPCs/Green Tea/Spirullina/Curcumin/Antr

['N-oleyl-phosphatidylethanolamine/Epigallocatechin Gallate Supplement'] ['C168614']
['Antihemophilic Factor, Human Recombinant'] ['C81123']
['Ferrous Iodide'] ['C87333']
['Orphan Drug'] ['C48201']
['Efaproxiral'] ['C37453']
['Ancestim'] ['C1646']
['Pantethine'] ['C87342']
['Aluminum Stearate'] ['C61631']
['Erythrocytes'] ['C23455']
['Human Cells, Tissues, and Cellular and Tissue-Based Products from Musculoskeletal Tissue'] ['C133364']
['Dibromannitol'] ['C28983']
['Gamma Interferon-SCH'] ['C29066']
['Lerociclib'] ['C170109']
['Niu Xi'] ['C129587']
['Pediatric Liquid Dosage Form'] ['C69042']
['Autologous-Cell Leukemia Vaccine'] ['C2023']
['Pre-1938 Product'] ['C99176']
['Iodine I-123'] ['C92223']
['Asparagine'] ['C29607']
['Lactobacillus rhamnosus/L. jensenii/L. crispatus/L. gasseri Oral Supplement'] ['C113645']
['pan FGFR Inhibitor PRN1371'] ['C125188']
['Recombinant Viral Vaccine'] ['C1572']
['Bryostatin 1'] ['C1026']
['2-Hydroxyethyl Methacrylate'] ['C47791']
['Recombinant CD40-Liga

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [86]:
desc_not_semtype

{ncit_20.09d.C170323,
 ncit_20.09d.C166892,
 ncit_20.09d.C327,
 ncit_20.09d.C152971,
 ncit_20.09d.C166688,
 ncit_20.09d.C166963,
 ncit_20.09d.C75775,
 ncit_20.09d.C29200,
 ncit_20.09d.C147028,
 ncit_20.09d.C170123,
 ncit_20.09d.C152620,
 ncit_20.09d.C169937,
 ncit_20.09d.C2126,
 ncit_20.09d.C166691,
 ncit_20.09d.C171868,
 ncit_20.09d.C427,
 ncit_20.09d.C166912,
 ncit_20.09d.C166484,
 ncit_20.09d.C70969,
 ncit_20.09d.C1955,
 ncit_20.09d.C37450,
 ncit_20.09d.C166828,
 ncit_20.09d.C169995,
 ncit_20.09d.C75776,
 ncit_20.09d.C29201,
 ncit_20.09d.C170294,
 ncit_20.09d.C170316,
 ncit_20.09d.C170052,
 ncit_20.09d.C73794,
 ncit_20.09d.C38679,
 ncit_20.09d.C169957,
 ncit_20.09d.C170251,
 ncit_20.09d.C118877,
 ncit_20.09d.C166501,
 ncit_20.09d.C166876,
 ncit_20.09d.C2007,
 ncit_20.09d.C364,
 ncit_20.09d.C166736,
 ncit_20.09d.C169774,
 ncit_20.09d.C166485,
 ncit_20.09d.C39782,
 ncit_20.09d.C174870,
 ncit_20.09d.C170007,
 ncit_20.09d.C75777,
 ncit_20.09d.C29202,
 ncit_20.09d.C152140,
 ncit_20.09d.C

In [87]:
ncit.C170323 in semtype_not_desc

False