In [22]:
# Ontology Parsing to the Graph

In [2]:
import csv

from pathlib import Path

from owlready2 import *

import pandas as pd

In [3]:
#Counters and Constants
or_cnt = 1
and_cnt = 1
blank_cnt = 1

In [4]:
# set up out path
data_p = Path("../../ontologies")

out_p = data_p / "out/"
out_p.mkdir(exist_ok=True)

node_p = out_p / "nodes.csv"
with open(node_p, 'w') as csvfile:
    fieldnames = [':ID', 'descriptive_label:string[]', 'iri', ':LABEL']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()

rel_p  = out_p / "rel.csv"
with open(rel_p, 'w') as csvfile:
    fieldnames = [':START_ID', ':END_ID', ':TYPE', 'restriction']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()

In [5]:
def create_transitionary_node(node_type, node_counter):
    # pass
    # AND_00000001
    # OR_00000001
    # BLANK_00000001
    
    node = f"{node_type.upper()}_{node_counter}"
    node_counter = node_counter + 1
    return node, node_counter

def append_node(node, node_p:Path, *, node_type:str=''):
    '''
    input:
        node:
            case 1: owlready2.entity.ThingClass
            case 2: Type: str: node = string name
        node_p: path to node file to append node to
        onto_type: node type for and/or
    '''
    # construct node row
    if isinstance(node, owlready2.entity.ThingClass):
        # ThingClass node
        try:
            node_label = ';'.join(node.label)
        except:
            node_label = ''
        node_line = [str(node.name), node_label, str(node.iri), node_type]
    elif isinstance(node, str):
        # Case: blank, and/or nodes
        node_line = [node, '', '', node_type]
    else:
        raise ValueError
    # write node to file
    with open(node_p, 'a') as f:
        node_writer = csv.writer(f, delimiter=',')
        node_writer.writerow(node_line)
        
def append_relation(source_id:str, target_id:str,
                    edge_type:str, *, restriction:str=''):
    # construct relation row
    rel_line = [source_id, target_id, edge_type, restriction]
    # write to file
    with open(rel_p, 'a') as f:
        rel_writer = csv.writer(f, delimiter=',')
        rel_writer.writerow(rel_line)

### foodon.owl

In [6]:
#!pip install owlready2

In [7]:
# set paths
ontology_p = "../../ontologies"

foodon_p   = ontology_p + "/foodon_filtered.owl"

In [8]:
# IRIS["https://raw.githubusercontent.com/FoodOntology/foodon/master/foodon.owl#obo"]

In [9]:
# load ontology
onto = get_ontology(foodon_p).load()

##### Exploring foodonto classes

In [10]:
## Get the namespace for testing
obo = get_namespace("http://purl.obolibrary.org/obo/")

In [11]:
def get_details_of_restriction(res):
#     print("\t\tEdge Label:",res.property)
#     print("\t\tType:",res.type)
#     print("\t\tBlank connected Node:",res.value)
    return res.property,res.type,res.value

In [12]:

def traverse_is_a_or_equivalent_to(parent,child_class,child_relation,and_cnt,or_cnt,blank_cnt):
    #print("\tParent Entity:",parent)
    append_node(child_class,node_p,node_type='THING_CLASS')
    
    if type(parent)==owlready2.class_construct.And:
        
        ## AND Node creation
        and_node,and_cnt = create_transitionary_node('AND',and_cnt)
        append_node(and_node,node_p,node_type='AND')
        append_relation(child_class,and_node,child_relation)
        
        for connected_node in parent.is_a:
            if type(connected_node)==owlready2.class_construct.Restriction:
                #print("\tBlank Node Connected Entity:",connected_node)
                
                ## Blank Node creation
                blank_node,blank_cnt = create_transitionary_node('BLANK',blank_cnt)
                append_node(blank_node,node_p,node_type='BLANK')
                
                # Blank to AND
                append_relation(and_node,blank_node,'member_of')
                edge_label,restriction,target_node = get_details_of_restriction(connected_node)
                #assert restriction == 24
                
                # Blank to Final Node
                append_node(target_node,node_p,node_type='THING_CLASS')
                append_relation(blank_node,target_node,edge_label,restriction=restriction)
            
            elif isinstance(connected_node,owlready2.entity.ThingClass):
                #print("\tAND Connected Node:",connected_node)
                append_node(connected_node,node_p,node_type='THING_CLASS')
                append_relation(and_node,connected_node,'member_of')
            else:
                raise TypeError("Type Erro with connected node:",connected_node.name)


    elif type(parent)==owlready2.class_construct.Or:
        
        ## OR Node creation
        or_node,or_cnt = create_transitionary_node('OR',or_cnt)
        append_node(or_node,node_p,node_type='OR')
        
        # Child to OR
        append_relation(child_class,or_node,child_relation)
        
        for connected_node in parent.is_a:
            if type(connected_node)==owlready2.class_construct.Restriction:
                #print("\tBlank Node Connected Entity:",connected_node)
                
                ## Blank Node creation
                blank_node,blank_cnt = create_transitionary_node('BLANK',blank_cnt)
                append_node(blank_node,node_p,node_type='BLANK')
                
                # Blank to OR
                append_relation(or_node,blank_node,'member_of')
                edge_label,restriction,target_node = get_details_of_restriction(connected_node)
                #assert restriction == 24
                
                # Blank to Final Node
                append_node(target_node,node_p,node_type='THING_CLASS')
                append_relation(blank_node,target_node,edge_label,restriction=restriction)
            
            elif isinstance(connected_node,owlready2.entity.ThingClass):
                #print("\tOR Connected Node:",connected_node)
                append_node(connected_node,node_p,node_type='THING_CLASS')
                append_relation(or_node,connected_node,'member_of')
            else:
                raise TypeError("Type Erro with connected node:",connected_node.name)
                
    elif isinstance(parent,owlready2.class_construct.Restriction):
        blank_node,blank_cnt = create_transitionary_node('BLANK',blank_cnt)
        append_node(blank_node,node_p,node_type='BLANK')
        append_relation(child_class,blank_node,child_relation)
        edge_label,restriction,target_node = get_details_of_restriction(parent)
        #assert restriction == 24
        append_relation(blank_node,target_node,edge_label,restriction=restriction)
        
    elif isinstance(parent,owlready2.entity.ThingClass):
        append_node(parent,node_p,node_type='THING_CLASS')
        append_relation(child_class,parent,child_relation)
        

In [13]:
def class_details(klass):
    #print("label:",klass.label)
    if list(klass.equivalent_to):
        #print("equivalent_to:",list(klass.equivalent_to))
        for parent in klass.equivalent_to:
            traverse_is_a_or_equivalent_to(parent,klass,'equivalent_to',and_cnt,or_cnt,blank_cnt)
    else:
        #print("is_a:",list(klass.is_a))
        for parent in klass.is_a:
            traverse_is_a_or_equivalent_to(parent,klass,'is_a',and_cnt,or_cnt,blank_cnt)

In [14]:
#c=obo.FOODON_00002114 #is_a example
#c=obo.FOODON_00002403 #is_equivalent example
#c=obo.FOODON_00002403

In [15]:
#class_details(c)


In [16]:
%%time
cnt = 0
for klass in onto.classes(): 
#     if cnt==10:
#         break
    #if re.search('FOODON', klass.name):
    try:
        class_details(klass)
    except:
        cnt+=1
print("Broken Classes:",cnt)

  http://purl.obolibrary.org/obo/FOODON_00002511

  http://purl.obolibrary.org/obo/FOODON_03315876



Broken Classes: 1525
CPU times: user 15.3 s, sys: 12.1 s, total: 27.3 s
Wall time: 27.6 s


In [17]:
klass.equivalent_to

[]

In [18]:
klass.iri

'https://www.w3.org/TR/owl-time/#time:TemporalUnit'

In [19]:
node_df = pd.read_csv(node_p)
node_df.drop_duplicates(inplace=True)
node_df.shape

(32031, 4)

In [20]:
rel_df = pd.read_csv(rel_p)
rel_df.drop_duplicates(inplace=True)
rel_df.shape

(42913, 4)

In [21]:
def class_details(klass):
    print("label:",klass.label)
    if list(klass.equivalent_to):
        print("equivalent_to:",list(klass.equivalent_to))
        for parent in klass.equivalent_to:
            print("\tParent Entity:",parent)
            if type(parent)==owlready2.class_construct.And:
                    for connected_node in parent.is_a:
                        if type(connected_node)==owlready2.class_construct.Restriction:
                            print("\tBlank Node Connected Entity:",connected_node)
                            get_details_of_restriction(connected_node)
                        else:
                            print("\tAnd Connected Node:",connected_node)
    else:
        print("is_a:",list(klass.is_a))
        for parent in klass.is_a:
            print("\tParent Entity:",parent)
            if type(parent)==owlready2.class_construct.And:
                for connected_node in parent.is_a:
                    if type(connected_node)==owlready2.class_construct.Restriction:
                        print("\tBlank Node Connected Entity:",connected_node)
                        get_details_of_restriction(connected_node)
                    else:
                        print("\tAnd Connected Node:",connected_node)