In [1]:
foodon_parsing = True

In [2]:
import csv

from pathlib import Path

from owlready2 import *

import pandas as pd



In [3]:
# set up out path
data_p = Path("../../ontologies")

out_p = data_p / "out/"
out_p.mkdir(exist_ok=True)

In [4]:
# set paths
ontology_p = "../../ontologies"

if foodon_parsing:
    # load foodon ontology
    foodon_p   = ontology_p + "/foodon_filtered.owl"
    onto = get_ontology(foodon_p).load()
    node_prefix = "foodon"

    # create write paths
    node_p = out_p / "nodes.csv"
    rel_p  = out_p / "rel.csv"
else:
    # load upper ontology
    # upper_p   = ontology_p + "/test_nourishUpperOntology.owl"
    upper_p   = ontology_p + "/nourishUpperOntology.owl"
    onto = get_ontology(upper_p).load()
    node_prefix = "upper"

    # create write paths
    node_p = out_p / "upper_nodes.csv"
    rel_p  = out_p / "upper_rel.csv"

In [5]:
# create node files
with open(node_p, 'w') as csvfile:
    fieldnames = ['node_id:ID', 'descriptive_label:string[]', 'iri', ':LABEL']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()


with open(rel_p, 'w') as csvfile:
    fieldnames = [':START_ID', ':END_ID', ':TYPE', 'restriction', 'label']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()

In [6]:
def create_transitionary_node(node_type, node_counter):
    # pass
    # AND_00000001
    # OR_00000001
    # BLANK_00000001
    
    node = f"{node_type.upper()}_{node_counter}"
    node_counter = node_counter + 1
    return node, node_counter

def append_node(node, node_p:Path, *, node_type:str=''):
    '''
    input:
        node:
            case 1: owlready2.entity.ThingClass
            case 2: Type: str: node = string name
        node_p: path to node file to append node to
        onto_type: node type for and/or
    '''
    # construct node row
    if isinstance(node, owlready2.entity.ThingClass) or isinstance(node, owlready2.prop.ObjectPropertyClass):
        # ThingClass node
        try:
            node_label = ';'.join(node.label)
        except:
            node_label = ''
        node_line = [str(node), node_label, str(node.iri), node_type]
    elif isinstance(node, str):
        # Case: blank, and/or nodes
        node_line = [node, '', '', node_type]
    else:
        raise ValueError
    # write node to file
    with open(node_p, 'a') as f:
        node_writer = csv.writer(f, delimiter=',')
        node_writer.writerow(node_line)
        
def append_relation(source_id:str, target_id:str,
                    edge_type:str, restriction:str=''):
    try:
        edge_label = ';'.join(edge_type.label)
    except:
        edge_label = ''
        
    # construct relation row
    rel_line = [source_id, target_id, edge_type, restriction,edge_label]
    # write to file
    with open(rel_p, 'a') as f:
        rel_writer = csv.writer(f, delimiter=',')
        rel_writer.writerow(rel_line)

def get_details_of_restriction(res):
    return res.property,res.type,res.value

In [7]:
def parse_logic(unknown_node, known_node, edge_type, restriction_type, and_count, or_count, blank_count, not_count, oo_count, node_prefix):
    match type(unknown_node):
        case owlready2.entity.ThingClass:
            # case: stop rule
            append_node(unknown_node,node_p,node_type="Concept")
            if isinstance(known_node, str) and ("AND" in known_node or "OR" in known_node):
                # AND/OR nodes are the targets of classes in conjunctons
                append_relation(unknown_node, known_node, edge_type, restriction_type)
            else:
                append_relation(known_node, unknown_node, edge_type, restriction_type)
        
        case owlready2.prop.ObjectPropertyClass:
            append_node(unknown_node,node_p,node_type="Property")
            if isinstance(known_node, str) and ("AND" in known_node or "OR" in known_node):
                # AND/OR nodes are the targets of classes in conjunctons
                append_relation(unknown_node, known_node, edge_type, restriction_type)
            else:
                append_relation(known_node, unknown_node, edge_type, restriction_type)

        case owlready2.class_construct.And:
            ## AND Node creation
            and_node, and_count = create_transitionary_node(f'{node_prefix}_AND',and_count)
            append_node(and_node,node_p,node_type='AND')
            # made edge between AND and known
            append_relation(known_node, and_node, edge_type, restriction_type)
            
            # Iterate through AND list
            for connected_node in unknown_node.is_a:
                # make recursion call on connected node
                and_count, or_count, blank_count, not_count, oo_count = parse_logic(connected_node, and_node, "member_of",
                    restriction_type, and_count, or_count, blank_count, not_count, oo_count, node_prefix)
                
        case owlready2.class_construct.Or:
            ## AND Node creation
            or_node, or_count = create_transitionary_node(f'{node_prefix}_OR', or_count)
            append_node(or_node,node_p,node_type='OR')
            # made edge between OR and known
            append_relation(known_node, or_node, edge_type, restriction_type)
            
            # Iterate through OR list
            for connected_node in unknown_node.Classes:
                # make recursion call on connected node
                and_count, or_count, blank_count, not_count, oo_count = parse_logic(connected_node, or_node, "member_of",
                    restriction_type, and_count, or_count, blank_count, not_count, oo_count, node_prefix)

        case owlready2.class_construct.Restriction:
            ## BLANK Node Creatiom
            blank_node, blank_count = create_transitionary_node(f'{node_prefix}_BLANK', blank_count)
            append_node(blank_node, node_p, node_type='BLANK')
            # make edge between known and blank
            append_relation(blank_node, known_node, edge_type, restriction_type)
            
            # Get values out of restriction
            edge_label, restriction, new_unknown_type = get_details_of_restriction(unknown_node)
            
            match restriction:
                case 24: # SOME
                    restriction_name = "SOME"
                case 25: # ONLY
                    restriction_name = "ONLY"
                case 26: # EXACTLY
                    restriction_name = "EXACTLY"
                case 27: # MIN
                    restriction_name = "MIN"
                case 28:
                    restriction_name = "MAX"
                case 29:
                    restriction_name = "VALUE"
                case _:
                    print(restriction)
                    print(unknown_node)
                    raise NotImplementedError
            
            assert isinstance(edge_label, owlready2.prop.ObjectPropertyClass) or \
                    isinstance(edge_label, owlready2.prop.DataPropertyClass), f"{edge_label} {type(edge_label)}"
            and_count, or_count, blank_count, not_count, oo_count = parse_logic(new_unknown_type, blank_node, edge_label,
                restriction_name, and_count, or_count, blank_count, not_count, oo_count, node_prefix)
            
        case owlready2.class_construct.Not:
            ## Not Node Creatiom
            not_node, not_count = create_transitionary_node(f'{node_prefix}_NOT', not_count)
            append_node(not_node, node_p, node_type='NOT')
            # make edge between known and blank
            append_relation(not_node, known_node, edge_type, restriction_type)
            
            # Get values out of restriction
            and_count, or_count, blank_count, not_count, oo_count = parse_logic(unknown_node.Class, not_node, "member_of",
                restriction_type, and_count, or_count, blank_count, not_count, oo_count, node_prefix)
            
        case owlready2.class_construct.OneOf:
            ## OneOf Node creation
            oo_node, oo_count = create_transitionary_node(f'{node_prefix}_OneOf', oo_count)
            append_node(oo_node,node_p,node_type='OneOf')
            # made edge between OneOf and known
            append_relation(known_node, oo_node, edge_type, restriction_type)
            
            # Iterate through AND list
            # print(unknown_node.instances)
            for connected_node in unknown_node.instances:
                # OneOf instance returns list of instances of type OneOf node
                # therefore, we need to temporarily cast connected node to type thing class
                # types.new_class(connected_node.name, (Thing,))
                
                # make recursion call on connected node
                and_count, or_count, blank_count, not_count, oo_count = parse_logic(
                    types.new_class(connected_node.name, (Thing,)), oo_node, 
                    "member_of", restriction_type, and_count, or_count, 
                    blank_count, not_count, oo_count, node_prefix)
            

        case _:
            # print out type
            raise TypeError(f"Unknown type: {type(unknown_node)}")
    return and_count, or_count, blank_count, not_count, oo_count

In [8]:
# main loop
and_count = 0
or_count = 0
blank_count = 0
not_count = 0
oo_count = 0

for c in onto.classes():
    # if c.name == "FOODON_00002396":
    # if c.name == "FOODON_00002403":
    # if c.name == "FOODON_00002114":
    #     break
        
    # add class to node file
    append_node(c,node_p,node_type="Concept")

    # check if equivalence is not empty
    if list(c.equivalent_to):
        for sc in c.equivalent_to:
            and_count, or_count, blank_count, not_count, oo_count = parse_logic(
                sc, c, "equivalent_to", "", and_count, or_count, blank_count, not_count, oo_count, node_prefix)
    elif list(c.is_a):
        for sc in c.is_a:
            and_count, or_count, blank_count, not_count, oo_count = parse_logic(
                sc, c, "is_a", "", and_count, or_count, blank_count, not_count, oo_count, node_prefix)
    else:
        print(":(")

for p in onto.object_properties():

    # add class to node file
    append_node(p,node_p,node_type="Property")

    # check if equivalence is not empty
    if list(p.subclasses()):
        for sp in p.subclasses():
            and_count, or_count, blank_count, not_count, oo_count = parse_logic(
                sp, p, "is_a", "", and_count, or_count, blank_count, not_count, oo_count, node_prefix)

  http://purl.obolibrary.org/obo/FOODON_00002511

  http://purl.obolibrary.org/obo/FOODON_03315876



In [9]:
node_df = pd.read_csv(node_p)
print(node_df.shape)
node_df.drop_duplicates(inplace=True)
node_df.to_csv(node_p,index=False)
node_df.shape

(91653, 4)


(46044, 4)

In [10]:
rel_df = pd.read_csv(rel_p)
print(rel_df.shape)
rel_df.drop_duplicates(inplace=True)
rel_df.to_csv(rel_p,index=False)
rel_df.shape

(59566, 5)


(59566, 5)

In [11]:
rel_df

Unnamed: 0,:START_ID,:END_ID,:TYPE,restriction,label
0,obo.FOODON_00002403,FOODON_AND_0,equivalent_to,,
1,obo.ENVO_00010483,FOODON_AND_0,member_of,,
2,FOODON_BLANK_0,FOODON_AND_0,member_of,,
3,FOODON_BLANK_0,obo.CHEBI_33290,obo.RO_0000087,SOME,has role
4,obo.FOODON_00001002,obo.FOODON_00002403,is_a,,
...,...,...,...,...,...
59561,obo.RO_0000056,obo.OBI_0000312,is_a,,
59562,obo.RO_0000052,obo.RO_0000079,is_a,,
59563,obo.RO_0000052,obo.RO_0000080,is_a,,
59564,obo.RO_0000052,obo.RO_0000081,is_a,,


In [12]:
rel_df.restriction.unique()

array([nan, 'SOME', 'ONLY', 'MIN', 'EXACTLY'], dtype=object)

# DO NOT PASS

## YE HAVE BEEN WARNED

### SERIOUSLY DONT DO IT

#### I SEE YOU DONT LISTEN, GOOD LUCK BRAVE ADVENTURER

In [9]:
c

obo.IAO_0000078

In [10]:
c.equivalent_to

[OneOf([obo.IAO_0000002, obo.IAO_0000120, obo.IAO_0000121, obo.IAO_0000122, obo.IAO_0000123, obo.IAO_0000124, obo.IAO_0000125, obo.IAO_0000423, obo.IAO_0000428])]

In [11]:
type(c.equivalent_to[0])

owlready2.class_construct.OneOf

In [14]:
c.equivalent_to[0].instances

[obo.IAO_0000002,
 obo.IAO_0000120,
 obo.IAO_0000121,
 obo.IAO_0000122,
 obo.IAO_0000123,
 obo.IAO_0000124,
 obo.IAO_0000125,
 obo.IAO_0000423,
 obo.IAO_0000428]

In [15]:
c.iri

'http://purl.obolibrary.org/obo/IAO_0000078'

In [11]:
type(c.equivalent_to[0].instances)

owlready2.util.CallbackList

In [15]:
for a in c.equivalent_to[0].instances:
    pass
a

obo.IAO_0000428

In [19]:
a.name

'IAO_0000428'

In [21]:
type(types.new_class(a.name, (Thing,)))

owlready2.entity.ThingClass

In [23]:
len(list(onto.classes()))

331

In [40]:
set([len(p.is_a) for p in onto.object_properties()])
for p in onto.object_properties():
    if len(p.is_a) > 1:
        break
p

obo.BFO_0000050

In [44]:
p.label

['part of', 'part of', 'part_of']

In [45]:
p.is_a

[owl.ObjectProperty, owl.ObjectProperty]

In [47]:
list(p.subclasses())

[]

In [48]:
set([len(p.is_a) for p in onto.object_properties()])
for p in onto.object_properties():
    if len(list(p.subclasses())) > 1:
        break
p

obo.RO_0000057

In [39]:
p.iri

'http://purl.obolibrary.org/obo/RO_0000057'

In [37]:
p.label

['has participant', 'has_participant']

In [38]:
[(pc, pc.label) for pc in p.subclasses()]

[(obo.OBI_0000293, ['has_specified_input']),
 (obo.OBI_0000299, ['has_specified_output'])]

In [29]:
set([len(list(p.subclasses())) for p in onto.object_properties()])

{0, 1, 2, 4}

In [27]:
list(p.subclasses())

[]

In [13]:
p.is_a[0]

owl.ObjectProperty

In [14]:
c

obo.FOODON_00002396

In [11]:
c.equivalent_to

[Not(obo.RO_0001000.some(obo.FOODON_03411355)) & obo.BFO_0000051.some(obo.CHEBI_18346 | obo.CHEBI_48408)]

In [12]:
type(c.equivalent_to[0].is_a[0])

owlready2.class_construct.Not

In [19]:
c.iri

'http://purl.obolibrary.org/obo/FOODON_00002396'

In [26]:
c.equivalent_to[0].is_a[0]

Not(obo.RO_0001000.some(obo.FOODON_03411355))

In [37]:
c.equivalent_to[0].is_a[0].subclasses()

[]

In [39]:
c.equivalent_to[0].is_a[0].is_a

()

In [43]:
c.equivalent_to[0].is_a[0].__invert__()

Not(Not(obo.RO_0001000.some(obo.FOODON_03411355)))

In [45]:
type(c.equivalent_to[0].is_a[0].Class)

owlready2.class_construct.Restriction

In [21]:
c.equivalent_to[0].is_a[-1].value

obo.CHEBI_18346 | obo.CHEBI_48408

In [22]:
type(c.equivalent_to[0].is_a[-1].value)

owlready2.class_construct.Or

In [29]:
c.equivalent_to[0].is_a[-1].value.Classes[0]

obo.CHEBI_18346

In [6]:
# get types
rel_types = set()
for c in onto.classes():
    # break
    # if c.name == "FOODON_03420103":
    if c.name == "CDNO_0200657":
    # if c.name == "FOODON_00002403":
        break

    if list(c.equivalent_to):
        for sc in c.equivalent_to:
            rel_types.add(type(sc))
            # if isinstance(sc, owlready2.class_construct.Or):
            #     print(c.iri)
    elif list(c.is_a):
        for sc in c.is_a:
            rel_types.add(type(sc))
            # if isinstance(sc, owlready2.class_construct.Or):
            #     print(c.iri)
rel_types
c

obo.CDNO_0200657

In [25]:
c.equivalent_to[-1].is_a[-1]

obo.RO_0000087.some(obo.CHEBI_33290)

In [24]:
c.equivalent_to[-1].is_a[-1].value

obo.CHEBI_33290

In [29]:
c.equivalent_to[0].is_a[-1]

obo.RO_0000052.some(obo.CDNO_0000027 & obo.BFO_0000050.some(obo.BFO_0000040))

In [32]:
type(c.equivalent_to[0].is_a[-1].value)

owlready2.class_construct.And

In [36]:
type(c.equivalent_to[0].is_a[-1].property)

owlready2.prop.ObjectPropertyClass

In [31]:
c

.time:TemporalUnit

In [7]:
for c in onto.classes():
    # if c.name == "FOODON_00002114":
    #     break
    break
print(c.iri)
print(f"object: {c} label: {c.label}")
print(f"equivalent to: {c.equivalent_to}")
print(f"is a: {c.is_a}")

http://purl.obolibrary.org/obo/FOODON_00002403
object: obo.FOODON_00002403 label: ['food material']
equivalent to: [obo.ENVO_00010483 & obo.RO_0000087.some(obo.CHEBI_33290)]
is a: [obo.ENVO_00010483, obo.RO_0000087.some(obo.CHEBI_33290)]


In [8]:
c.equivalent_to

[obo.ENVO_00010483 & obo.RO_0000087.some(obo.CHEBI_33290)]

In [32]:
# and_count, or_count, blank_count = parse_logic(sc, c, "equivalent_to")
and_count = 0
or_count = 0
blank_count = 0
parse_logic(c.equivalent_to[-1], c, "equivalent_to", and_count, or_count, blank_count)

(2, 0, 2)

In [55]:
restriction

NameError: name 'restriction' is not defined

In [9]:
type(c.equivalent_to)

owlready2.entity._EquivalentToList

In [8]:
type(list(c.equivalent_to)[0])

owlready2.class_construct.And

In [8]:
#TODO need to verify against a class with different is_a and equivalent_to
c.equivalent_to[0].get_Classes() # same as doing is_a below
c.equivalent_to[0].get_is_a() # also same as doing is_a

[obo.ENVO_00010483, obo.RO_0000087.some(obo.CHEBI_33290)]

In [9]:
c.equivalent_to[0].is_a

[obo.ENVO_00010483, obo.RO_0000087.some(obo.CHEBI_33290)]

In [10]:
type(c.equivalent_to[0].is_a[0])

owlready2.entity.ThingClass

In [11]:
c.equivalent_to[0].is_a[-1]

obo.RO_0000087.some(obo.CHEBI_33290)

In [12]:
type(c.equivalent_to[0].is_a[-1]) # if type restriction

owlready2.class_construct.Restriction

In [13]:
c.equivalent_to[0].is_a[-1].property # grab property as edge

obo.RO_0000087

In [14]:
c.equivalent_to[0].is_a[-1].value # grab value as node

obo.CHEBI_33290

In [15]:
c.equivalent_to[0].is_a[-1].type #TODO: is this specific to some? if so, what do the other map to?

24

In [16]:
type(list(c.is_a)[0])

owlready2.entity.ThingClass

In [25]:
rel_list = c.is_a
# isinstance(rel_list[-1], owlready2.class_construct.And)
type(rel_list[-1])

owlready2.class_construct.And

In [39]:
len(list(c.equivalent_to))

0

In [10]:
for c in onto.classes():
    if c.name == "FOODON_00002114":
        break
print(c.iri)
print(f"object: {c} label: {c.label}")
print(f"equivalent to: {c.equivalent_to}")
print(f"is a: {c.is_a}")

http://purl.obolibrary.org/obo/FOODON_00002114
object: obo.FOODON_00002114 label: ['pie food product']
equivalent to: []
is a: [obo.FOODON_00001180, obo.FOODON_00002347, obo.FOODON_00002644, obo.BFO_0000051.some(obo.FOODON_03306766) & obo.BFO_0000051.some(obo.FOODON_03306773) & obo.RO_0000086.some(obo.FOODON_00002407)]


In [11]:
[type(sc) for sc in c.is_a]

[owlready2.entity.ThingClass,
 owlready2.entity.ThingClass,
 owlready2.entity.ThingClass,
 owlready2.class_construct.And]

In [14]:
c.is_a[-1]

obo.BFO_0000051.some(obo.FOODON_03306766) & obo.BFO_0000051.some(obo.FOODON_03306773) & obo.RO_0000086.some(obo.FOODON_00002407)

In [15]:
c.is_a[-1].is_a

[obo.BFO_0000051.some(obo.FOODON_03306766),
 obo.BFO_0000051.some(obo.FOODON_03306773),
 obo.RO_0000086.some(obo.FOODON_00002407)]

In [21]:
c.is_a[-1].is_a
[type(sc_restriction) for sc_restriction in c.is_a[-1].is_a]

[owlready2.class_construct.Restriction,
 owlready2.class_construct.Restriction,
 owlready2.class_construct.Restriction]