In [26]:
import gzip
import os
import io
from IPython.display import clear_output

class OntologyBranch:
    __slots__ = ["parents", "children", "name", "lookup_table", "id"]
    
    def __init__(self, name):
        self.name = name
        self.parents = []
        self.children = []
        self.lookup_table = None
        
    def add_parent(self, parent):
        self.parents.append(parent)
        parent.add_child(self)
        
    def add_child(self, child):
        self.children.append(child)
        
    def __str__(self):
        return "<Branch name=\"%s\", num_children=%d, num_parents=%d>" % (self.name, len(self.children), len(self.parents)) 
    
    def __repr__(self):
        return str(self)

def add_lattice_edge(parent, child, branch_map, parentless):
    if type(child) is str:
        if child not in branch_map:
            branch_map[child] = OntologyBranch(child)
        child = branch_map[child]
    if type(parent) is str:
        if parent not in branch_map:
            branch_map[parent] = OntologyBranch(parent)
            parentless.append(branch_map[parent])
        parent = branch_map[parent]
    child.add_parent(parent)    
    return (parent, child)

def load_roots_from_stream(fp, roots, total_size):
    text_fin = io.TextIOWrapper(fp, newline='')
    
    parentless = []
    branch_map = {}
    right_arrow = "->"
    left_arrow = "<-"
    
    marked_branch = None
    last_edge_is_right_arrow = True
    
    for k, line in enumerate(text_fin):
        tokens = line.split(right_arrow, 1)
        if len(tokens) >= 2:
            for i in range(len(tokens)-1):
                marked_branch = add_lattice_edge(tokens[i], tokens[i+1].strip(), branch_map, parentless)[0]
                last_edge_is_right_arrow = True
        else:
            tokens = line.split(left_arrow, 1)
            if len(tokens) >= 2:
                for i in range(len(tokens)-1):
                    marked_branch = add_lattice_edge(tokens[i+1].strip(), tokens[i], branch_map, parentless)[1]
                    last_edge_is_right_arrow = False
            elif marked_branch is not None:
                if last_edge_is_right_arrow:
                    add_lattice_edge(marked_branch, tokens[0].strip(), branch_map, parentless)
                else:
                    add_lattice_edge(tokens[0].strip(), marked_branch, branch_map, parentless)
        if k % 2000 == 0:
            progress = fp.tell() / total_size
            clear_output(wait=True)
            print("█" * (int(20 * progress)) + " %.1f%%" % (100 * progress,))
                        
    for k in parentless:
        if len(k.parents) == 0:
            roots.append(k)
            k.lookup_table = branch_map
    

def load_abstract_trees(path):
    roots = []
    total_size = os.stat(path).st_size
     
    try:
        fp = gzip.open(path, "rb")
        load_roots_from_stream(fp, roots, total_size)
        fp.close()
    except OSError:
        fpalt = open(path, "rb")
        load_roots_from_stream(fpalt, roots, total_size)
        fpalt.close()
    finally:
        fp.close()
    
    return roots

In [27]:
roots = load_abstract_trees("ontology.txt.gz")

██████████████████████████████████████████████████████████████████████████████████████████████████████ 513.2%


In [52]:
c = 0
for root in roots:
    if not root.name.startswith("fp:"):
        c+=1


In [54]:
root.children[0].parents[0].parents[0].parents[0].parents[0].parents[0].parents[0].parents[0].parents[0].parents[0].parents[0].parents[0].parents





1232

In [59]:
node = root.children[0]

i = 0
path = ""
while i < 13:
    path += " -> " + node.name
    node = node.parents[0]
    i+=1
print(path)

 -> fp:Concepts -> fp:Cognition -> fp:Mental processes -> fp:Mind -> fp:Metaphysics -> fp:Philosophy -> fp:Humanities -> fp:Academic disciplines -> fp:Academia -> fp:Knowledge -> fp:Perception -> fp:Mental processes -> fp:Mind


In [39]:
parentless_concepts = []
for key, val in root.lookup_table.items():
    if not key.startswith("fp:") and key != root:
        if len(val.parents) == 0:
            parentless_concepts.append(val)
            

In [48]:
print(open("../lattice2.txt", "rt").read())

root->Joe
Bob
Max
Mary
Jane
Goodwin
root 2->Joe
Bob
Max
Mary
Jane
Goodwin

