# Dane wejściowe    

In [10]:
data = [
    "bbb$",
    "aabbabd$",
    "ababcd$",
    "abaababaabaabaabab$",
    "Jest piękna pogoda na spacer po parku. Słońce świeci, ptaki śpiewają, a ludzie się uśmiechają. Wspan$",
]
with open("lab6 - suffix tree/1997_714_head.txt", "r") as file:
    data.append(file.read() + "$")

# Trie zawierające wszystkie sufixy

In [11]:
class SufixesTrie:
    def __add_string(self, string: str) -> None:
        walker = self.root
        for i, letter in enumerate(string):
            if not walker.is_conected(letter):
                walker.add_child(letter)
                for _letter in string[i+1:]:
                    walker.add_child(_letter)
                    walker = walker.get_child(_letter)
                break
            walker = walker.get_child(letter)

    def __init__(self, data: str) -> None:
        self.root = SufixesTrieNode()
        tmp_str = data
        while tmp_str[1:]:
            self.__add_string(tmp_str)
            tmp_str = tmp_str[1:]
        self.__add_string(tmp_str)

    def __contains__(self, item):
        walker = self.root
        for letter in item:
            if walker.is_conected(letter):
                walker = walker.get_child(letter)
            else:
                return False
        return True

class SufixesTrieNode:
    def __init__(self) -> None:
        self.children = dict()
    
    def get_child(self, branch_str: str):
        if self.is_conected(branch_str):
            return self.children[branch_str]
        else:
            raise ValueError(f"NOT SUCH BRANCH: {branch_str}")
    
    def add_child(self, branch_str: str):
        if self.is_conected(branch_str):
            raise ValueError("SUCH CHILD ALREADY EXIST")
        else:
            self.children[branch_str] = SufixesTrieNode()
    
    def is_conected(self, branch_str: str):
        return branch_str in self.children

In [12]:
print(data[2])
trie = SufixesTrie(data[2])

print("ab" in trie)


ababcd$
True


# Trie budowane on line (z suffix link'ami)

In [13]:
class OnLineTrie:
    def __build_tree(self, string: str):
        depest = self.root

        for letter in string:
            node = depest
            depest = None
            prev = None

            while not(node.is_conected(letter)):
                node.add_child(letter)
                child = node.get_child(letter)

                if depest is None:
                    depest = child
                else:
                    prev.link(child)
                
                if(node == self.root):
                    child.link(self.root)
                
                prev = child
                if node.is_linked():
                    node = node.get_link()
                else:
                    node = None
                    break
            
            if node:
                prev.link(node.add_child(letter))

    def __init__(self, string: str) -> None:
        self.root = OnLineTrieNode()
        self.__build_tree(string)

    def __contains__(self, item):
        walker = self.root
        for letter in item:
            if walker.is_conected(letter):
                walker = walker.get_child(letter)
            else:
                return False
        return True

class OnLineTrieNode:
    def __init__(self) -> None:
        self.children = dict()
        self.up_link = None
    
    def get_child(self, branch_str: str):
        if self.is_conected(branch_str):
            return self.children[branch_str]
        else:
            raise ValueError(f"NOT SUCH BRANCH: {branch_str}")
    
    def add_child(self, branch_str: str):
        if self.is_conected(branch_str):
            raise ValueError("SUCH CHILD ALREADY EXIST")
        else:
            self.children[branch_str] = OnLineTrieNode()
    
    def is_conected(self, branch_str):
        return branch_str in self.children
    
    def link(self, node):
        if isinstance(node, OnLineTrieNode):
            self.link = node
        else:
            raise ValueError(f"WRONG ISTANCE OF UP_LINK: {type(node)}")
    
    def is_linked(self):
        return self.up_link is not None
    
    def get_link(self):
        if self.is_linked():
            return self.link
        else:
            raise RuntimeError("THIS NODE DONT HAVE LINK")

In [14]:
print(data[2])
trie = OnLineTrie(data[2])

print("ab" in trie)


ababcd$
True


# suffix tree

In [31]:
class ActivePoint:
    def __init__(self, node) -> None:
        # węzeł na którym aktualnie pracujemy
        self.node = node
        # pierwsza litera aktywnej krawędiz, None -> będziemy dodawać nową krawędź
        self.edge = None
        # odległość od aktytwnej krawędzi gdzie potencjalnie nastąpi rozgałęzienie
        self.length = 0
        # liczba pominiętych sufixów
        self.reminder = 0
    
    def on_path(self, letter, text):
        return letter == text[self.node.index + self.length]
    
    def extend(self, leter, text):
        if self.edge is None:
            self.edge = leter
        self.length += 1
        self.reminder += 1
        self.__adapt(text)

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    def __adapt(self, text):
        child = self.node.get_child(self.edge)
        child_length_from_parent = child.length_from_parent(text)
        while child_length_from_parent < self.length:
            self.edge = text[]
        

        if child_length_from_parent == self.length:
            self.node = child
            self.edge = None
            self.length = 0

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    def add_child(self, letter, text):
        child = self.node.divine_branch(letter, self.length, self.edge, text)
        return child
    
    def step(self, letter, text):
        if self.node.suffix_link == self.node:
            self.length -= 1
            self.edge = letter
            self.__adapt(text)
        else:
            self.node = self.node.suffix_link
            self.__adapt(text)

class SuffixTree:
    def __init__(self, text: str) -> None:
        self.text = text
        self.root = SuffixTreeNode(0,None)
        self.root.link(self.root)
        self.__build_tree()

    def __build_tree(self):
        n = len(self.text)
        active_point = ActivePoint(self.root)
        for i in range(n):
            letter = self.text[i]

            if active_point.on_path(letter, self.text):
                active_point.extend(letter, self.text)
            else:
                # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                prev_child = active_point.add_child(letter, self.text)
                while active_point.reminder > 0:
                    node = active_point.step(letter, self.text)

    
    def __contains__(self, item):
        raise RuntimeError("NOT IMPLEMENMTED YET")

class SuffixTreeNode:
    def __init__(self, index, parent_index) -> None:
        self.children = dict()
        # gałęzie odpowiadają za łańcuchy znaków - w węzłach są przechowywane informacje 
        # jakiemu łąńcuchowi znaków odpoiada krawędź od siebie samego do swojego ojca 
        # indeks znaku za którego koniec odpowiada węzeł,
        #   index == -1 -> mamy na myśli ostatni index
        self.index = index
        #   parent_index == None -> mamy do czynienia z root'em
        #   index == 0 -> mamy do czynienia z root'em
        self.parent_index = parent_index

        self.suffix_link = None
    
    def add_child(self, first_letter: str, index, parent_index):
        if self.is_contain(first_letter):
            raise ValueError(F"SUCH CHILD ALREADY IN TREE: {first_letter}")
        else:
            self.children[first_letter] = SuffixTreeNode(index, parent_index)

    def get_child(self, first_letter: str):
        if self.is_contain(first_letter):
            return self.children[first_letter]
        else:
            raise ValueError(f"NOT SUCH CHILD: {first_letter}")
    
    def divine_branch(self, letter, length, edge, text):
        # ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
        old_child = self.get_child(edge)
        child_index = self.index + length
        child = SuffixTreeNode(child_index, self.index)

        self.children[edge] = child
        old_child.parent_index = child_index
        child.add_child(letter, -1, child_index)
        child.children[text[child_index]] = old_child

        return child

    
    def link(self, other):
        if isinstance(other, SuffixTreeNode):
            self.suffix_link = other
        else:
            raise ValueError(f"WRONG INSTANCE OF LINKING ITEM {type(other)}")

    def is_contain(self, first_letter: str):
        return first_letter in self.children
    
    # wypisuje tekst za który odpowiada gałąź od ojca do tego węzłą
    def print_branch_above(self, text):
        if self.index == -1:
            print(text[self.parent_index:])
        elif self.parent_index == None:
            print("")
        else:
            print(text[self.parent_index:self.index+1])
    
    # zwraca długość na gałęzi między sobą a swoim ojcem
    def length_from_parent(self, text):
        if self.index == -1:
            ans = len(text) - self.parent_index
            if len(text[self.parent_index:]) != ans:
                raise RuntimeError(f"length form parent wrong, index = {self.index}")
        elif self.parent_index == None:
            return None
        else:
            ans = self.index - self.parent_index + 1
            if len(text[self.parent_index:self.index+1]) != ans:
                raise RuntimeError(f"length form parent wrong, index = {self.index}")
        return ans



In [29]:
SuffixTree("abcabx")

0 abcabx
1 bcabx
2 cabx
3 abx
4 bx
5 x


<__main__.SuffixTree at 0x7f0a8ac8c0d0>