In [None]:
# 1. suffix trie
def TrieConstruction(Patterns: List[str]) -> List[Tuple[int, int, str]]:
    node_counter = 0  
    trie = []  
    
    children = {node_counter: {}}
    
    for pattern in Patterns:
        currentNode = 0 
        for symbol in pattern:
            if symbol in children[currentNode]:  
                currentNode = children[currentNode][symbol]
            else:
                node_counter += 1  # Create a new node
                trie.append((currentNode, node_counter, symbol))  
                children[currentNode][symbol] = node_counter  
                currentNode = node_counter 
                children[currentNode] = {}
                
    return trie 

# 2. suffix tree
def DFS(start, graph, visited = set(), path = [], all_paths = []):
        visited.add(start)
        path = path + [start]
        
        # If current node is a leaf or a branching node, return the path
        if start not in graph or len(list(graph[start].values())) != 1:
            all_paths.append(path)
        else:
            for neighbor in list(graph[start].values()):
                if neighbor not in visited:  
                    DFS(neighbor,graph, visited, path, all_paths)

# Please do not remove package declarations because these are used by the autograder.
def suffix_tree(text: str) -> List[str]:
    # build the suffix trie first
    node_counter = 0  
    edge = {}
    children = {node_counter: {}}
    
    for i in range(len(text)):
        currentNode = 0 
        for j in range(i, len(text)):
            symbol = text[j]
            if symbol in children[currentNode]:  
                currentNode = children[currentNode][symbol]
            else:
                node_counter += 1
                edge[(currentNode, node_counter)] = symbol
                children[currentNode][symbol] = node_counter  
                currentNode = node_counter 
                children[currentNode] = {}

    # use DFS to traverse the tree and finding all non-branching path
    non_branching_paths = []
    visited = set()
    for start_node in children.keys():
        if start_node != 0 and start_node not in visited:
            DFS(start_node, children, visited, [], non_branching_paths)
    # now paths include list with len = 1, these are leaves or branching node.
    # for them find there parent and keep those edge
    # for lists with len > 1, they are those need condensed and replaced by a new node.
    tree = {}
    for p in non_branching_paths:
        label = ""
        for e in edge.keys():
            if e[1] == p[0]:
                parent = e[0]
                label = edge[e]
                if len(p) == 1:
                    tree[(parent,e[1])] = label
                elif len(p) > 1:
                    suffix = ""
                    for i in range(len(p)-1):
                        currentEdge = (p[i],p[i+1])
                        suffix += edge[currentEdge]
                    label += suffix
                    node_counter += 1
                    tree[(parent, node_counter)] = label
    #print(tree)
    return list(tree.values())


# 3. longest_repeat
def suffix_array(text: str) -> List[int]:
    """
    Generate the suffix array for the given text.
    """
    size = len(text)
    suffix = [[text[i:], i] for i in range(size)]
    # Sort the suffixes lexicographically
    sorted_suffixes = sorted(suffix)
    
    # Extract and return the indices of the sorted suffixes
    suffix_array = [idx for (suff, idx) in sorted_suffixes]
    return suffix_array


def build_lcp_array(s, suffix_array):
        n = len(s)
        lcp = [0] * (n - 1)  # Adjusted for the correct size
        rank = [0] * n
        for i, suffix in enumerate(suffix_array):
            rank[suffix] = i
        k = 0
        for i in range(n):
            if rank[i] == n - 1:
                k = 0  
                continue
            # Next suffix in the sorted order
            j = suffix_array[rank[i] + 1]
            # Increase `k` as long as the next character is the same for both suffixes
            while i + k < n and j + k < n and s[i + k] == s[j + k]:
                k += 1
            lcp[rank[i]] = k
            if k > 0: k -= 1  # Decrease `k` for the next iteration
        
        return lcp
# Insert your longest_repeat function here, along with any subroutines you need
def longest_repeat(text: str) -> str:
    """
    Find the longest repeated substring in the given text.
    """
    if not text:
            return ""  # Handle empty string case
    array = suffix_array(text)
    lcp_array = build_lcp_array(text, array)
    max_length, index = max((lcp, idx) for idx, lcp in enumerate(lcp_array))
    
    # Return the longest repeated substring found
    return text[array[index]:array[index] + max_length] if max_length > 0 else ""


# 4. longest substring
# Please do not remove package declarations because these are used by the autograder.
def suffix_array(text) -> List[int]:
    """
    Generate the suffix array for the given text.
    """
    size = len(text)
    suffix = [[text[i:], i] for i in range(size)]
    # Sort the suffixes lexicographically
    sorted_suffixes = sorted(suffix)
    
    # Extract and return the indices of the sorted suffixes
    # idx is the position of the suffix start in text
    suffix_array = [idx for (suff, idx) in sorted_suffixes]
    return suffix_array

# stores the lengths of the longest common prefixes between all pairs of consecutive suffixes in the sorted suffix array of a string. 
def build_lcp_array(text, suffix_array, delimiter):
        n = len(text)
        lcp = [0] * (n - 1)  # in this case we only compare and find LCPs of the first n suffices
        rank = [0] * n
        for i, suffix in enumerate(suffix_array):
            # the rank of each suffix in the suffix array
            rank[suffix] = i
        k = 0
        for i in range(n):
            # if its the last suffix, there is no one it can compare to so skip
            if rank[i] == n - 1:
                k = 0  
                continue
            # Next suffix in the sorted order
            j = suffix_array[rank[i] + 1]
            if (i-delimiter)*(j-delimiter) < 0:
                # Increase `k` as long as the next character is the same for both suffixes
                while i + k < n and j + k < n and text[i + k] == text[j + k]:
                    k += 1
                lcp[rank[i]] = k
                if k > 0: k -= 1  # Decrease `k` for the next iteration
        
        return lcp

# Insert your longest_shared_substring function here, along with any subroutines you need
def longest_shared_substring(text1: str, text2: str) -> str:
    """
    Find the longest shared substring between two texts. Find the longest substring repeat of the concatenated text, which is the deepest internal node has two branches one branch to $ one branch to #.
    """
    # ended two string with two different stop sign, text1 with $, text2 with #.
    text = text1 + "$" + text2
    array = suffix_array(text)
    # the position of delimiter
    delimiter = len(text1)
    lcp_array = build_lcp_array(text, array, delimiter)
    max_length, index = max((lcp, idx) for idx, lcp in enumerate(lcp_array))
    # Return the longest repeated substring found
    return text[array[index]:array[index] + max_length] if max_length > 0 else ""




# 5. suffix array
def suffix_array(text: str) -> List[int]:
    """
    Generate the suffix array for the given text.
    """
    size = len(text)
    suffix = [[text[i:], i] for i in range(size)]
    # Sort the suffixes lexicographically
    sorted_suffixes = sorted(suffix)
    
    # Extract and return the indices of the sorted suffixes
    suffix_array = [idx for (suff, idx) in sorted_suffixes]
    return suffix_array