In [11]:
from typing import Dict
from queue import LifoQueue

In [12]:
from typing import Dict
from queue import LifoQueue


class TrieNode:
    def __init__(self, character: str = "", parent_node: 'TrieNode' = None, node_depth: int = 0) -> None:
        super().__init__()
        self.character: str = character
        self.children: Dict[str, TrieNode] = dict()
        self.node_depth: int = node_depth
        self.parent_node: TrieNode = parent_node
        self.link_node: TrieNode = None

    def add_child(self, text: str, sibling_node: 'TrieNode' = None) -> 'TrieNode':
        node = self
        for current_character in list(text):
            if current_character not in node.children:
                node.children[current_character] = TrieNode(current_character, node, node.node_depth + 1)
            node = node.children[current_character]
            if sibling_node:
                sibling_node = sibling_node.children[current_character]
                sibling_node.link_node = node
        return node

    def __contains__(self, item):
        node = self
        if not isinstance(item, str):
            return False
        while item:
            if item[0] not in node.children.keys():
                return False
            node = node.children[item[0]]
            item = item[1:]
        return True


class Trie:
    def __init__(self, text: str) -> None:
        self.root_node: TrieNode = TrieNode()
        leaf_node = self.root_node.add_child(text)
        self.root_node.children[text[0]].link_node = self.root_node
        for i in range(1, len(text)):
            head_node, sibling_node = self.up_link_down(leaf_node)
            if not head_node:
                sibling_node = self.root_node.children[text[i - 1]]
                sibling_node.link_node = self.root_node
                head_node = self.root_node
            leaf_node = head_node.add_child(text[i + head_node.node_depth:], sibling_node)

    def __contains__(self, item):
        return isinstance(item, str) and item in self.root_node

    def up_link_down(self, sibling_node: TrieNode) -> (TrieNode, TrieNode):
        letters = LifoQueue()
        while sibling_node and not sibling_node.link_node:
            letters.put(sibling_node.character)
            sibling_node = sibling_node.parent_node
        if not sibling_node:
            return None, None
        node = sibling_node.link_node
        while not letters.empty():
            current_character = letters.get()
            if current_character in node.children.keys():
                node = node.children[current_character]
                sibling_node = sibling_node.children[current_character]
                sibling_node.link_node = node
            else:
                break
        return node, sibling_node


In [38]:
class SuffixTreeNode:

    def __init__(self, text: str, start: int = 0, end: int = 0, depth: int = 0, parent: 'SuffixTreeNode' = None) -> None:
        super().__init__()
        self.depth = depth
        self.start = start
        self.end = end
        self.full_text = text
        self.children: Dict[str, SuffixTreeNode] = dict()
        self.parent: SuffixTreeNode = parent
        self.link: SuffixTreeNode = None

    def graft(self, start) -> 'SuffixTreeNode':
        start = start + self.depth
        text = self.full_text[start:]
        child = SuffixTreeNode(self.full_text, start, len(self.full_text), self.depth + len(text), self)
        self.children[text[0]] = child
        return child

    def break_path(self, text: str) -> 'SuffixTreeNode':
        length = len(text)
        child = self.children[text[0]]
        new_node = SuffixTreeNode(self.full_text, child.start, child.start + length, self.depth + length, self)
        child.start = child.start + length
        child.parent = new_node
        new_node.children[child.label[0]] = child
        self.children[text[0]] = new_node
        return new_node

    def fast_find(self, text: str) -> 'SuffixTreeNode':
        if len(text) == 0:
            return self
        child = self.children[text[0]]
        if len(child.label) < len(text):
            return child.fast_find(text[len(child.label):])
        elif len(child.label) == len(text):
            return child
        else:
            return self.break_path(text)

    def slow_find(self, text: str) -> 'SuffixTreeNode':
        if len(text) == 0 or text[0] not in self.children.keys():
            return self
        child = self.children[text[0]]
        for i in range(1, len(child.label)):
            if child.label[i] != text[i]:
                return self.break_path(text[:i])
        return child.slow_find(text[len(child.label):])

    @property
    def label(self):
        return self.full_text[self.start:self.end]

    def __contains__(self, item):
        if len(item) == 0:
            return True
        if not isinstance(item, str) or item[0] not in self.children:
            return False
        child = self.children[item[0]]
        for i in range(1, min(len(child.label), len(item))):
            if child.label[i] != item[i]:
                return False
        return len(item) < len(child.label) or item[len(child.label):] in child

    def __repr__(self) -> str:
        return f"[{self.start}:{self.end}] {self.full_text[self.start:self.end]}"


class SuffixTree:

    def __init__(self, text: str, slow_mode = False) -> None:
        self.root = last_head = SuffixTreeNode(text)
        leaf = self.root.graft(0)
        if slow_mode:
            for i in range(1, len(text)):
                head = self.root.slow_find(text[i:])
                head.graft(i)
        else:
            for i in range(1, len(text)):
                if last_head == self.root:
                    last_head = self.root.slow_find(leaf.label[1:])
                    leaf = last_head.graft(i)
                    continue
                parent = last_head.parent
                if parent == self.root:
                    link = parent.fast_find(last_head.label[1:])
                else:
                    link = parent.link.fast_find(last_head.label)
                if len(link.children) == 1:
                    head = link
                else:
                    head = link.slow_find(leaf.label)
                leaf = head.graft(i)
                last_head.link = link
                last_head = head

    def __contains__(self, item):
        return isinstance(item, str) and item in self.root

In [39]:
def check_correctness(structure, text):
    for i in range(0, len(text)):
        for j in range(i, len(text)):
            if text[i:] not in structure:
                return False
    return True

In [40]:
text = "bbb$"
print(f"Trie {check_correctness(Trie(text), text)}")
print(f"McCreight {check_correctness(SuffixTree(text), text)}")
print(f"Slow McCreight {check_correctness(SuffixTree(text, True), text)}")

text = "abbbabd"
print(f"Trie {check_correctness(Trie(text), text)}")
print(f"McCreight {check_correctness(SuffixTree(text), text)}")
print(f"Slow McCreight {check_correctness(SuffixTree(text, True), text)}")

text = "ababcd"
print(f"Trie {check_correctness(Trie(text), text)}")
print(f"McCreight {check_correctness(SuffixTree(text), text)}")
print(f"Slow McCreight {check_correctness(SuffixTree(text, True), text)}")

text = "abcbccd"
print(f"Trie {check_correctness(Trie(text), text)}")
print(f"McCreight {check_correctness(SuffixTree(text), text)}")
print(f"Slow McCreight {check_correctness(SuffixTree(text, True), text)}")

Trie True
McCreight True
Slow McCreight True
Trie True
McCreight True
Slow McCreight True
Trie True
McCreight True
Slow McCreight True
Trie True
McCreight True
Slow McCreight True


In [45]:
from time import perf_counter
def runing_time(func, args, w_print=False, name=None, count=10):
    start = perf_counter()
    for i in range(count):
        func(*args)
    end = perf_counter()
    avg = (end-start)/count
    if w_print:
        print(f"{name} average time: {avg}")
    else:
        return avg

In [46]:

text = "bbb$"
runing_time(Trie, [text], True, "Trie",count=50)
runing_time(SuffixTree, [text], True, "McCreight",count=50)
runing_time(SuffixTree, [text, True], True, "Slow McCreight",count=50)

Trie average time: 0.00022557253999821113
McCreight average time: 3.0449480000243057e-05
Slow McCreight average time: 2.574407999873074e-05


In [47]:
text = "abbbabd"
runing_time(Trie, [text], True, "Trie",count=50)
runing_time(SuffixTree, [text], True, "McCreight",count=50)
runing_time(SuffixTree, [text, True], True, "Slow McCreight",count=50)

Trie average time: 0.0005542132400023547
McCreight average time: 7.371772000169585e-05
Slow McCreight average time: 4.994230000193056e-05


In [48]:
text = "abcbccd"
runing_time(Trie, [text], True, "Trie",count=50)
runing_time(SuffixTree, [text], True, "McCreight",count=50)
runing_time(SuffixTree, [text, True], True, "Slow McCreight",count=50)


Trie average time: 0.00040603762000046117
McCreight average time: 5.050356000083412e-05
Slow McCreight average time: 4.170698000052653e-05


In [50]:
with open('1997_714_head.txt', 'r') as file:
    text = file.read()
    # text = text[:2000]
    text += '\0'
    runing_time(Trie, [text], True, "Trie",count=1)
    runing_time(SuffixTree, [text], True, "McCreight",count=1)
    runing_time(SuffixTree, [text, True], True, "Slow McCreight",count=1)

Trie average time: 21.252170428
McCreight average time: 0.042717566999954215
Slow McCreight average time: 0.06623011900001075
