# Comments provided by Garrett

Any comment prefaced with a GM

In [1]:
import csv

from pathlib import Path

from owlready2 import *

import pandas as pd

#GM 
#FYI the pink warning below can be solved by:
#pip uninstall owlready2
#pip install cython (requires c compiler)
#pip install owlready2



In [2]:
# set up out path
data_p = Path("../../ontologies")

out_p = data_p / "out/"
out_p.mkdir(exist_ok=True)

node_p = out_p / "nodes.csv"
with open(node_p, 'w') as csvfile:
    fieldnames = ['node_id:ID', 'descriptive_label:string[]', 'iri', ':LABEL']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()

rel_p  = out_p / "rel.csv"
with open(rel_p, 'w') as csvfile:
    fieldnames = [':START_ID', ':END_ID', ':TYPE', 'restriction', 'label']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()

In [3]:
# set paths
ontology_p = "../../ontologies"

foodon_p   = ontology_p + "/foodon_filtered.owl"

In [4]:
# load ontology
onto = get_ontology(foodon_p).load()

In [5]:
def create_transitionary_node(node_type, node_counter):
    # pass
    # AND_00000001
    # OR_00000001
    # BLANK_00000001
    
    node = f"{node_type.upper()}_{node_counter}"
    node_counter = node_counter + 1
    return node, node_counter

def append_node(node, node_p:Path, *, node_type:str=''):
    '''
    input:
        node:
            case 1: owlready2.entity.ThingClass
            case 2: Type: str: node = string name
        node_p: path to node file to append node to
        onto_type: node type for and/or
    '''
    # construct node row
    if isinstance(node, owlready2.entity.ThingClass):
        # ThingClass node
        try:
            node_label = ';'.join(node.label)
        except:
            node_label = ''
        node_line = [str(node), node_label, str(node.iri), node_type]
    elif isinstance(node, str):
        # Case: blank, and/or nodes
        node_line = [node, '', '', node_type]
    else:
        raise ValueError
    # write node to file
    with open(node_p, 'a') as f:
        node_writer = csv.writer(f, delimiter=',')
        node_writer.writerow(node_line)
        
def append_relation(source_id:str, target_id:str,
                    edge_type:str, restriction:str=''):
    try:
        edge_label = ';'.join(edge_type.label)
    except:
        edge_label = ''
        
    # construct relation row
    rel_line = [source_id, target_id, edge_type, restriction,edge_label]
    # write to file
    with open(rel_p, 'a') as f:
        rel_writer = csv.writer(f, delimiter=',')
        rel_writer.writerow(rel_line)

def get_details_of_restriction(res):
    return res.property,res.type,res.value

In [6]:
def parse_logic(unknown_node, known_node, edge_type, restriction_type, and_count, or_count, blank_count, not_count):
    match type(unknown_node):
        case owlready2.entity.ThingClass:
            # case: stop rule
            append_node(unknown_node,node_p,node_type="Concept")
            if isinstance(known_node, str) and ("AND" in known_node or "OR" in known_node):
                # AND/OR nodes are the targets of classes in conjunctons
                append_relation(unknown_node, known_node, edge_type, restriction_type)
            else:
                append_relation(known_node, unknown_node, edge_type, restriction_type)
        case owlready2.class_construct.And:
            ## AND Node creation
            and_node, and_count = create_transitionary_node('AND',and_count)
            append_node(and_node,node_p,node_type='AND')
            # made edge between AND and known
            append_relation(known_node, and_node, edge_type, restriction_type)
            
            # Iterate through AND list
            for connected_node in unknown_node.is_a:
                # make recursion call on connected node
                and_count, or_count, blank_count, not_count = parse_logic(connected_node, and_node, "member_of", restriction_type,
                                                               and_count, or_count, blank_count, not_count)
                
        case owlready2.class_construct.Or:
            ## AND Node creation
            or_node, or_count = create_transitionary_node('OR', or_count)
            append_node(or_node,node_p,node_type='OR')
            # made edge between OR and known
            append_relation(known_node, or_node, edge_type, restriction_type)
            
            # Iterate through OR list
            for connected_node in unknown_node.Classes:
                # make recursion call on connected node
                and_count, or_count, blank_count, not_count = parse_logic(connected_node, or_node, "member_of", restriction_type,
                                                               and_count, or_count, blank_count, not_count)

        case owlready2.class_construct.Restriction:
            ## BLANK Node Creatiom
            blank_node, blank_count = create_transitionary_node('BLANK', blank_count)
            append_node(blank_node, node_p, node_type='BLANK')
            # make edge between known and blank
            append_relation(blank_node, known_node, edge_type, restriction_type)
            
            # Get values out of restriction
            edge_label, restriction, new_unknown_type = get_details_of_restriction(unknown_node)
            
            match restriction:
                case 24: # SOME
                    restriction_name = "SOME"
                case 25: # ONLY
                    restriction_name = "ONLY"
                case 26: # EXACTLY
                    restriction_name = "EXACTLY"
                case 27: # MIN
                    restriction_name = "MIN"
                case 28:
                    restriction_name = "MAX"
                case 29:
                    restriction_name = "VALUE"
                case _:
                    print(restriction)
                    print(unknown_node)
                    raise NotImplementedError
            
            assert isinstance(edge_label, owlready2.prop.ObjectPropertyClass), f"{edge_label} {type(edge_label)}"
            and_count, or_count, blank_count, not_count = parse_logic(new_unknown_type, blank_node, edge_label, restriction_name,
                                                           and_count, or_count, blank_count, not_count)
            
        case owlready2.class_construct.Not:
            ## Not Node Creatiom
            not_node, not_count = create_transitionary_node('NOT', not_count)
            append_node(not_node, node_p, node_type='NOT')
            # make edge between known and blank
            append_relation(not_node, known_node, edge_type, restriction_type)
            
            # Get values out of restriction
            and_count, or_count, blank_count, not_count = parse_logic(unknown_node.Class, not_node, "member_of", restriction_type,
                                                           and_count, or_count, blank_count, not_count)           
            

        case _:
            # print out type
            raise TypeError
    return and_count, or_count, blank_count, not_count

In [7]:
# skeleton loop
and_count = 0
or_count = 0
blank_count = 0
not_count = 0
for c in onto.classes():
    # if c.name == "FOODON_00002396":
    # if c.name == "FOODON_00002403":
    # if c.name == "FOODON_00002114":
    #     break
        
    # add class to node file
    append_node(c,node_p,node_type="Concept")

    # check if equivalence is not empty
    if list(c.equivalent_to):
        for sc in c.equivalent_to:
            and_count, or_count, blank_count, not_count = parse_logic(sc, c, "equivalent_to", "", and_count, or_count, blank_count, not_count)
    elif list(c.is_a):
        for sc in c.is_a:
            and_count, or_count, blank_count, not_count = parse_logic(sc, c, "is_a", "", and_count, or_count, blank_count, not_count)
    else:
        print(":(")

  http://purl.obolibrary.org/obo/FOODON_00002511

  http://purl.obolibrary.org/obo/FOODON_03315876



In [8]:
node_df = pd.read_csv(node_p)
print(node_df.shape)
node_df.drop_duplicates(inplace=True)
node_df.to_csv(node_p,index=False)
node_df.shape

(91576, 4)


(45979, 4)

In [9]:
rel_df = pd.read_csv(rel_p)
print(rel_df.shape)
rel_df.drop_duplicates(inplace=True)
rel_df.to_csv(rel_p,index=False)
rel_df.shape

(59554, 5)


(59554, 5)

In [10]:
rel_df

Unnamed: 0,:START_ID,:END_ID,:TYPE,restriction,label
0,obo.FOODON_00002403,AND_0,equivalent_to,,
1,obo.ENVO_00010483,AND_0,member_of,,
2,BLANK_0,AND_0,member_of,,
3,BLANK_0,obo.CHEBI_33290,obo.RO_0000087,SOME,has role
4,obo.FOODON_00001002,obo.FOODON_00002403,is_a,,
...,...,...,...,...,...
59549,obo.UO_0010047,obo.UO_0000095,is_a,,
59550,obo.UO_0010051,obo.UO_1010051,is_a,,
59551,obo.UO_1010051,obo.UO_0000111,is_a,,
59552,.time:Duration,obo.BFO_0000038,is_a,,


In [11]:
rel_df.restriction.unique()

array([nan, 'SOME', 'ONLY', 'MIN', 'EXACTLY'], dtype=object)

# DO NOT PASS

## YE HAVE BEEN WARNED

### SERIOUSLY DONT DO IT

#### I SEE YOU DONT LISTEN, GOOD LUCK BRAVE ADVENTURER

In [None]:
#GM
#section below deleted