In [1]:
import owlready2 as owl
ncit = owl.get_ontology('../data/ncit/ncit_20.09d.owl')
ncit.load()
graph = owl.default_world.as_rdflib_graph()



get_ontology("http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#")

In [66]:
# get all classes with Semantic_Type = "Pharmacologic Substance"
query_str = """SELECT ?x WHERE {?x <http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#P106> "Pharmacologic Substance" }"""
results = set(graph.query(query_str))

def parse_uri(uri):
    uri.toPython().split('#')[1]

semtype_ps = set()
for r in results:
    semtype_ps.add(ncit[parse_uri(r[0])])

In [73]:
# get all classes that are descendants of C1909 ("Pharmacologic Substance")
def build_descendants(node, descendants):
    children = node.descendants()
    if len(children) == 0:
        raise Exception(children)
    elif len(children) == 1:
        descendants.add(node)
    else:
        for child in children:
            if child.name != node.name:
                build_descendants(child, descendants)

descendants = set()
build_descendants(ncit.C1909, descendants)        

In [93]:
semtype_not_desc = semtype_ps - descendants
desc_not_semtype = descendants - semtype_ps
print(f"# of common elements: {len(semtype_ps.intersection(descendants))}")
print(f"# of elements with semantic type but not in descendants: {len(semtype_not_desc)}")
print(f"# of elements in descendant but don't have semantic type: {len(desc_not_semtype)}")

# of common elements: 15512
# of elements with semantic type but not in descendants: 1637
# of elements in descendant but don't have semantic type: 2954


In [90]:
def print_info(owl_class):
    if owl_class.P107:
        name = owl_class.P107
    elif owl_class.P108:
        name = owl_class.P108
    elif owl_class.P90:
        name = owl_class.P90[0]
    print(f"{name} {owl_class.NHC0}")

In [92]:
# elements with semantic type Pharmacologic Substance but not in descendants
list(map(print_info, semtype_not_desc))

['Pyran Polymer'] ['C29343']
['Amifampridine'] ['C87424']
['d-Alpha-Tocopherol'] ['C2832']
['Soy Isoflavones'] ['C65152']
['Propisergide'] ['C66490']
['Etanidazole'] ['C1092']
['Levomenol'] ['C80904']
['Magnesium Malate'] ['C87336']
['Aluminum Sesquichlorohydrate'] ['C142918']
['Copal'] ['C74358']
['Quercetin'] ['C792']
['Cathepsin-Activatable Cy5 Fluorescent Imaging Probe LUM015'] ['C105147']
['Anti-CD38 Monoclonal Antibody'] ['C155321']
['Alpha-Tocopherol'] ['C74960']
['PS IL-10/Placebo'] ['C29336']
['Sodium Ferulate'] ['C84155']
['DOTMP'] ['C1496']
['Bisbentiamine'] ['C81630']
['Elm'] ['C65510']
['Paricalcitol'] ['C38693']
['Rutin'] ['C819']
['Dendritic Cell Vaccine'] ['C28310']
['ALVAC-CEA Vaccine'] ['C1648']
['Coniine Hydrochloride'] ['C87478']
['dl-alpha-Tocopherol'] ['C74578']
['Ferric Phosphate'] ['C83714']
['Probiotic Acidophilus'] ['C79797']
['Starch, Rice'] ['C80780']
['Telomerase:540-548 Peptide'] ['C2813']
['Visilizumab'] ['C2687']
['OPCs/Green Tea/Spirullina/Curcumin/Antr

['N-oleyl-phosphatidylethanolamine/Epigallocatechin Gallate Supplement'] ['C168614']
['Antihemophilic Factor, Human Recombinant'] ['C81123']
['Ferrous Iodide'] ['C87333']
['Orphan Drug'] ['C48201']
['Efaproxiral'] ['C37453']
['Ancestim'] ['C1646']
['Pantethine'] ['C87342']
['Aluminum Stearate'] ['C61631']
['Erythrocytes'] ['C23455']
['Human Cells, Tissues, and Cellular and Tissue-Based Products from Musculoskeletal Tissue'] ['C133364']
['Dibromannitol'] ['C28983']
['Gamma Interferon-SCH'] ['C29066']
['Lerociclib'] ['C170109']
['Niu Xi'] ['C129587']
['Pediatric Liquid Dosage Form'] ['C69042']
['Autologous-Cell Leukemia Vaccine'] ['C2023']
['Pre-1938 Product'] ['C99176']
['Iodine I-123'] ['C92223']
['Asparagine'] ['C29607']
['Lactobacillus rhamnosus/L. jensenii/L. crispatus/L. gasseri Oral Supplement'] ['C113645']
['pan FGFR Inhibitor PRN1371'] ['C125188']
['Recombinant Viral Vaccine'] ['C1572']
['Bryostatin 1'] ['C1026']
['2-Hydroxyethyl Methacrylate'] ['C47791']
['Recombinant CD40-Liga

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [95]:
# elements in descendants but not with semantic type Pharmacologic Substance
list(map(print_info, desc_not_semtype))

['Pipoxolan Hydrochloride'] ['C170323']
['Elbasvir'] ['C166892']
['CA19-9 Antigen'] ['C327']
['Futuximab'] ['C152971']
['Meralluride'] ['C166688']
['Drocinonide'] ['C166963']
['Bordetella pertussis Antigen, D'] ['C75775']
['Monoclonal Antibody 48.7 F(ab)'] ['C29200']
['Anti-BCMA/CD3 BiTE Antibody AMG 701'] ['C147028']
['Limiglidole'] ['C170123']
['Tigemonam Dicholine'] ['C152620']
['Elismetrep '] ['C169937']
['Sodium Phosphate, Radioactive'] ['C2126']
['Penimepicycline'] ['C166691']
['Alicapistat'] ['C171868']
['Diazooxonorleucine'] ['C427']
['Coblopasvir'] ['C166912']
['Donanemab'] ['C166484']
['Mycobacterium w'] ['C70969']
['Vanilloid'] ['C1955']
['NA17-A Antigen'] ['C37450']
['Mercaptomerin'] ['C166828']
['Fluperolone Acetate'] ['C169995']
['Bordetella pertussis Antigen, E'] ['C75776']
['Monoclonal Antibody 528'] ['C29201']
['Pegapamodutide'] ['C170294']
['Pheneticillin'] ['C170316']
['Igmesine'] ['C170052']
['Betahistine Hydrochloride'] ['C73794']
['Yttrium Y 90 Glass Microspheres'

['Potassium Chloride K-42'] ['C174608']
['Attenuated Corynebacterium granulosum'] ['C1644']
['Gallium Ga 68-labeled RM2'] ['C121773']
['Hydrotalcite'] ['C166469']
['Cinodine Gamma (Sub 2) Hydrochloride'] ['C90682']
['Solithromycin'] ['C152386']
['Influenza A Virus Antigen, G'] ['C75686']
['Frovocimab'] ['C175068']
['Pozelimab'] ['C166423']
['Firivumab '] ['C169982']
['Belumosudil Mesylate'] ['C175056']
['Runcaciguat'] ['C166842']
['Oxycodegol Phosphate'] ['C170276']
['Relebactam Monohydrate'] ['C152176']
['Sustained-release Mitomycin C Hydrogel Formulation UGN-101'] ['C126642']
['Neisseria meningitidis Group W-135 Capsular Polysaccharide Antigen'] ['C77760']
['Nitroglycerin/Sodium Citrate/Ethanol Solution'] ['C124659']
['Florbetapir F-18'] ['C171884']
['Dovanvetmab'] ['C171832']
['Bremelanotide Acetate'] ['C171917']
['Briobacept'] ['C97707']
['Meciadanol'] ['C166571']
['Urelumab'] ['C62449']
['Actaplanin'] ['C171812']
['GM2-KLH Vaccine/QS21'] ['C11571']
['Streptovarycin'] ['C152439']
[

['Aselizumab'] ['C171823']
['Enmetazobactam'] ['C171696']
['Streptococcus Pneumoniae Antigen, Y'] ['C75850']
['Folitixorin Calcium'] ['C170001']
['Anti-CD45 Monoclonal Antibody'] ['C70798']
['Suvecaltamide'] ['C174710']
['Cinfenoac'] ['C171849']
['Morinda Citrifolia Fruit Extract'] ['C26662']
['Umbilical Cord Blood-Derived Mesenchymal Stem Cells'] ['C82688']
['Epaminurad'] ['C166441']
['In 111 Monoclonal Antibody M170'] ['C2533']
['Venglustat Malate'] ['C171783']
['Penicillin V Benzathine'] ['C166695']
['Catridecacog '] ['C174766']
['Olpasiran'] ['C175176']
['Influenza B Virus Antigen, A'] ['C75705']
['Dostarlimab'] ['C126799']
['Monoclonal Antibody A27.15'] ['C2447']
['Ensifentrine'] ['C166674']
['Olaflur'] ['C175173']
['Oleclumab'] ['C123914']
['Hydrargaphen'] ['C170045']
['Astegolimab'] ['C171824']
['Sibofimloc'] ['C171697']
['Streptococcus pneumoniae Type 11A Capsular Polysaccharide Antigen'] ['C77769']
['Carlumab'] ['C71010']
['Indoximod'] ['C71535']
['Cenobamate'] ['C174837']
['C

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,