# Dane wejściowe    

In [235]:
data = [
    "bbb$",
    "aabbabd$",
    "ababcd$",
    "abaababaabaabaabab$",
    "Jest piękna pogoda na spacer po parku. Słońce świeci, ptaki śpiewają, a ludzie się uśmiechają. Wspan$",
]
# with open("lab6 - suffix tree/1997_714_head.txt", "r") as file:
with open("1997_714_head.txt", "r") as file:
    data.append(file.read() + "$")

# Trie zawierające wszystkie sufixy

In [236]:
class SufixesTrie:
    def __add_string(self, string: str) -> None:
        walker = self.root
        for i, letter in enumerate(string):
            if not walker.is_conected(letter):
                walker.add_child(letter)
                for _letter in string[i+1:]:
                    walker.add_child(_letter)
                    walker = walker.get_child(_letter)
                break
            walker = walker.get_child(letter)

    def __init__(self, data: str) -> None:
        self.root = SufixesTrieNode()
        tmp_str = data
        while tmp_str[1:]:
            self.__add_string(tmp_str)
            tmp_str = tmp_str[1:]
        self.__add_string(tmp_str)

    def __contains__(self, item):
        walker = self.root
        for letter in item:
            if walker.is_conected(letter):
                walker = walker.get_child(letter)
            else:
                return False
        return True

class SufixesTrieNode:
    def __init__(self) -> None:
        self.children = dict()
    
    def get_child(self, branch_str: str):
        if self.is_conected(branch_str):
            return self.children[branch_str]
        else:
            raise ValueError(f"NOT SUCH BRANCH: {branch_str}")
    
    def add_child(self, branch_str: str):
        if self.is_conected(branch_str):
            raise ValueError("SUCH CHILD ALREADY EXIST")
        else:
            self.children[branch_str] = SufixesTrieNode()
    
    def is_conected(self, branch_str: str):
        return branch_str in self.children

In [237]:
print(data[2])
trie = SufixesTrie(data[2])

print("ab" in trie)


ababcd$
True


# Trie budowane on line (z suffix link'ami)

In [238]:
class OnLineTrie:
    def __build_tree(self, string: str):
        depest = self.root

        for letter in string:
            node = depest
            depest = None
            prev = None

            while not(node.is_conected(letter)):
                node.add_child(letter)
                child = node.get_child(letter)

                if depest is None:
                    depest = child
                else:
                    prev.link(child)
                
                if(node == self.root):
                    child.link(self.root)
                
                prev = child
                if node.is_linked():
                    node = node.get_link()
                else:
                    node = None
                    break
            
            if node:
                prev.link(node.add_child(letter))

    def __init__(self, string: str) -> None:
        self.root = OnLineTrieNode()
        self.__build_tree(string)

    def __contains__(self, item):
        walker = self.root
        for letter in item:
            if walker.is_conected(letter):
                walker = walker.get_child(letter)
            else:
                return False
        return True

class OnLineTrieNode:
    def __init__(self) -> None:
        self.children = dict()
        self.up_link = None
    
    def get_child(self, branch_str: str):
        if self.is_conected(branch_str):
            return self.children[branch_str]
        else:
            raise ValueError(f"NOT SUCH BRANCH: {branch_str}")
    
    def add_child(self, branch_str: str):
        if self.is_conected(branch_str):
            raise ValueError("SUCH CHILD ALREADY EXIST")
        else:
            self.children[branch_str] = OnLineTrieNode()
    
    def is_conected(self, branch_str):
        return branch_str in self.children
    
    def link(self, node):
        if isinstance(node, OnLineTrieNode):
            self.link = node
        else:
            raise ValueError(f"WRONG ISTANCE OF UP_LINK: {type(node)}")
    
    def is_linked(self):
        return self.up_link is not None
    
    def get_link(self):
        if self.is_linked():
            return self.link
        else:
            raise RuntimeError("THIS NODE DONT HAVE LINK")

In [239]:
print(data[2])
trie = OnLineTrie(data[2])

print("ab" in trie)


ababcd$
True


# suffix tree

In [264]:
class ActivePoint:
    def __init__(self, node) -> None:
        # węzeł na którym aktualnie pracujemy
        self.node = node
        # pierwsza litera aktywnej krawędiz, None -> będziemy dodawać nową krawędź
        self.edge = None
        # odległość od aktytwnej krawędzi gdzie potencjalnie nastąpi rozgałęzienie
        self.length = 0
        # liczba pominiętych sufixów
        self.reminder = 0
    
    def __change_node(self, node, text):
        print(f"\t\tnew_node: {node},{node.string_edge_above(text)}")
        if isinstance(node, SuffixTreeNode):
            self.node = node
        else:
            raise ValueError(f"WRONG NODE TYPE: {type(node)}")
        
    def __change_edge(self, edge):
        print(f"\t\tedge changed {edge}")
        if edge is None:
            self.edge = None
        elif isinstance(edge, str):
            self.edge = edge
        else:
            raise ValueError(f"WRONG EDGE TYPE {type(edge)}")

    def __line_indexes(self):
        if self.edge is None:
            return (0, 0)
        child = self.node.get_child(self.edge)
        return (child.parent_index, child.parent_index + self.length)

    def __line_str(self, text):
        if self.edge is None:
            return ""
        start, end = self.__line_indexes()
        return f"{self.node} -> {self.edge}:{text[start: end]}"

    def on_path(self, letter, text):
        if letter not in self.node.children:
            return False
        else:
            if self.edge is None:
                self.__change_edge(letter)
            return letter == text[self.node.get_child(self.edge).parent_index + self.length]
    
    def extend(self, letter, text, i):
        if self.edge is None:
            self.__change_edge(letter)
        self.length += 1
        self.reminder += 1
        self.__adapt(text, i)
        print(f"\textend\tcurrent line: {self.__line_str(text)}")

    def __adapt(self, text, i):
        if self.edge is None:
            return
        child = self.node.get_child(self.edge)
        child_length_from_parent = child.length_from_parent(text)
        # print(f"\t\t adapt\t{child_length_from_parent}\t{self.__line_str(text)}")
        while child_length_from_parent < self.length:
            print(f"\t\tjump")
            self.__change_edge(text[i - self.length + child_length_from_parent])
            self.node = child
            self.length = self.length - child_length_from_parent

            child = self.node.get_child(self.edge)
            child_length_from_parent = child.length_from_parent(text)

        if child_length_from_parent == self.length:
            print(f"\t\tjump0")
            self.__change_node(child, text)
            self.__change_edge(None)
            self.length = 0

    def add_child(self, letter, text, i):
        if self.node.is_contain(self.edge):
            child = self.node.divine_edge(letter, self.length, self.edge, text, i)
        else:
            self.node.add_child(letter, -1, i)
            child = self.node
            print(f"\tchild:{letter}, {self.node.get_child(letter)} added to {self.node}")
        return child
    
    def step(self, text, i):
        self.reminder -= 1
        if self.node.suffix_link == self.node:
            self.length -= 1
            new_edge = text[i-self.length]
            if self.node.is_contain(new_edge):
                self.__change_edge(new_edge)
            else:
                self.__change_edge(None)
            ans = self.node
            self.__adapt(text, i)
            print(f"\tstep\tcurrent line: {self.__line_str(text)}")
        else:
            self.__change_node(self.node.suffix_link, text)
            ans = self.node
            self.__adapt(text, i)
        return ans

class SuffixTree:
    def __init__(self, text: str) -> None:
        self.text = text
        self.root = SuffixTreeNode(0,None)
        self.root.link(self.root)
        self.__build_tree()

    def __build_tree(self):
        n = len(self.text)
        active_point = ActivePoint(self.root)
        for i in range(n):
            print(self.text[i:])
            letter = self.text[i]

            if active_point.on_path(letter, self.text):
                active_point.extend(letter, self.text, i)
            else:
                prev_child = active_point.add_child(letter, self.text, i)
                while active_point.reminder > 0:
                    node = active_point.step(self.text, i)
                    if active_point.on_path(letter, self.text):
                        if(prev_child != active_point.node):
                            print(1)
                            prev_child.link(active_point.node)
                            active_point.extend(letter, self.text, i)
                            break
                        active_point.extend(letter, self.text, i)
                        break
                    child = active_point.add_child(letter, self.text, i)
                    print(2)
                    prev_child.link(child)

                    prev_child = child
    
    def __contains__(self, item):
        n = len(item)
        i = 0
        walker = self.root
        while i < n:
            edge_letter = item[i]
            if walker.is_contain(edge_letter):
                walker = walker.get_child(edge_letter)
                i += 1
                length = 1
                while i < n and item[i] == self.text[walker.parent_index + length]:
                    length += 1
                    i += 1
            else:
                return False
        return True

class SuffixTreeNode:
    def __init__(self, index, parent_index) -> None:
        self.children = dict()
        # gałęzie odpowiadają za łańcuchy znaków - w węzłach są przechowywane informacje 
        # jakiemu łąńcuchowi znaków odpoiada krawędź od siebie samego do swojego ojca 
        # indeks znaku za którego koniec odpowiada węzeł,
        #   index == -1 -> mamy na myśli ostatni index
        self.index = index
        #   parent_index == None -> mamy do czynienia z root'em
        #   index == 0 -> mamy do czynienia z root'em
        self.parent_index = parent_index

        self.suffix_link = None
    
    def add_child(self, first_letter: str, index, parent_index):
        # print(f"\t\tadded child {first_letter}, [{parent_index}, {index})")
        if self.is_contain(first_letter):
            raise ValueError(F"SUCH CHILD ALREADY IN TREE: {first_letter}")
        else:
            self.children[first_letter] = SuffixTreeNode(index, parent_index)

    def get_child(self, first_letter: str):
        if self.is_contain(first_letter):
            return self.children[first_letter]
        else:
            raise ValueError(f"NOT SUCH CHILD: {first_letter}")
    
    def divine_edge(self, letter, length, edge, text, i):
        old_child = self.get_child(edge)
        child_index = old_child.parent_index + length
        child = SuffixTreeNode(parent_index=old_child.parent_index, index=child_index)

        self.children[edge] = child
        old_child.parent_index = child_index
        child.add_child(letter, -1, i)
        child.children[text[child_index]] = old_child

        print(f"\tdivined edge\tfrom: {self} created: {edge}, {child}, with children: {child.children}")
        return child

    
    def link(self, other):
        print(f"\t\t{self} linked to {other}")
        if isinstance(other, SuffixTreeNode):
            self.suffix_link = other
        else:
            raise ValueError(f"WRONG INSTANCE OF LINKING ITEM {type(other)}")

    def is_contain(self, first_letter: str):
        return first_letter in self.children
    
    # wypisuje tekst za który odpowiada gałąź od ojca do tego węzłą
    def string_edge_above(self, text):
        if self.index == -1:
            return text[self.parent_index:]
        elif self.parent_index == None:
            return ""
        else:
            return text[self.parent_index:self.index]
    
    # zwraca długość na gałęzi między sobą a swoim ojcem
    def length_from_parent(self, text):
        if self.index == -1:
            ans = len(text) - self.parent_index
        elif self.parent_index == None:
            return None
        else:
            ans = self.index - self.parent_index 
        return ans
    
    def __str__(self) -> str:
        return f"[{self.parent_index}, {self.index})"
        
    def __repr__(self) -> str:
        return self.__str__()



In [265]:
text = "cdddcdcx"
first_lecture_tree = SuffixTree(text)

for i in range(len(text)):
    if not(text[i:] in first_lecture_tree and text[:-i] in first_lecture_tree):
        raise RuntimeError(f"{(text[i:], text[:-i])} this patterns should be in tree")
print("done")

		[None, 0) linked to [None, 0)
cdddcdcx
	child:c, [0, -1) added to [None, 0)
dddcdcx
	child:d, [1, -1) added to [None, 0)
ddcdcx
		edge changed d
	extend	current line: [None, 0) -> d:d
dcdcx
	extend	current line: [None, 0) -> d:dd
cdcx
	divined edge	from: [None, 0) created: d, [1, 3), with children: {'c': [4, -1), 'd': [3, -1)}
		edge changed d
	step	current line: [None, 0) -> d:d
	divined edge	from: [None, 0) created: d, [1, 2), with children: {'c': [4, -1), 'd': [2, 3)}
2
		[2, 3) linked to [1, 2)
		edge changed c
	step	current line: [None, 0) -> c:
1
		[1, 2) linked to [None, 0)
	extend	current line: [None, 0) -> c:c
dcx
	extend	current line: [None, 0) -> c:cd
cx
	divined edge	from: [None, 0) created: c, [0, 2), with children: {'c': [6, -1), 'd': [2, -1)}
		edge changed d
		jump0
		new_node: [1, 2),d
		edge changed None
	step	current line: 
		edge changed c
1
		[0, 2) linked to [1, 2)
	extend	current line: [1, 2) -> c:c
x
	divined edge	from: [1, 2) created: c, [4, 5), with children

RuntimeError: ('cdcx', 'cddd') this patterns should be in tree

In [None]:
SuffixTree(data[3])

abaababaabaabaabab$
	child:a, [0, -1) added to [None, 0)
baababaabaabaabab$
	child:b, [1, -1) added to [None, 0)
aababaabaabaabab$
		edge changed a
	extend	current line: a
ababaabaabaabab$
	divined edge	from: [None, 0) created: a, [0, 1), with children: {'a': [3, -1), 'b': [1, -1)}
		edge changed a
	step	current line: 
	extend	current line: a
babaabaabaabab$
		jump0
		new_node: ab
		edge changed None
	extend	current line: 
abaabaabaabab$
		edge changed a
	extend	current line: a
baabaabaabab$
	extend	current line: ab
aabaabaabab$
	extend	current line: aba
abaabaabab$
	divined edge	from: [0, 1) created: a, [3, 6), with children: {'a': [8, -1), 'b': [6, -1)}
	step	 move on link [None, 0)
		new_node: 
		jump
		edge changed a
	divined edge	from: [0, 1) created: a, [3, 4), with children: {'a': [8, -1), 'b': [4, 6)}
	step	 move on link [None, 0)
		new_node: 
	divined edge	from: [None, 0) created: a, [0, 1), with children: {'a': [8, -1), 'b': [1, 1)}
		edge changed a
	step	current line: 
	exte

IndexError: string index out of range