Naive Code to Create a list of Ranked suffixes

In [9]:
from pprint import pprint
def return_ranked_suffixes(s):
    suffixes = []
    for i in range(len(s)):
        suffixes.append(s[i:])
    suffixes.sort()
    return suffixes

def return_suffixes(s):
    suffixes = []
    for i in range(len(s)):
        suffixes.append(s[i:])
    return suffixes

# string = "mississippi$"
string = "abcabxabcd$"
pprint(list(zip([i+1 for i in range(len(string))], return_suffixes(string))))
pprint(list(zip([i+1 for i in range(len(string))], return_ranked_suffixes(string))))

[(1, 'abcabxabcd$'),
 (2, 'bcabxabcd$'),
 (3, 'cabxabcd$'),
 (4, 'abxabcd$'),
 (5, 'bxabcd$'),
 (6, 'xabcd$'),
 (7, 'abcd$'),
 (8, 'bcd$'),
 (9, 'cd$'),
 (10, 'd$'),
 (11, '$')]
[(1, '$'),
 (2, 'abcabxabcd$'),
 (3, 'abcd$'),
 (4, 'abxabcd$'),
 (5, 'bcabxabcd$'),
 (6, 'bcd$'),
 (7, 'bxabcd$'),
 (8, 'cabxabcd$'),
 (9, 'cd$'),
 (10, 'd$'),
 (11, 'xabcd$')]


In [16]:
class Edge:
    def __init__(self, start, string, end = -1):
        self.start_value = string[start]
        self.start = start
        self.end = end

    def value(self, string, global_end):
        if self.end == -1: ## If not a leaf edge
            return string[self.start : global_end + 1]
        else:
            return string[self.start : self.end + 1]
    
    def length(self, global_end):
        if self.end == -1: ## If not a leaf edge
            return global_end - self.start
        else:
            return self.end - self.start
        
    def __hash__(self):
        # Hash the edge object based on the value attribute
        return hash(self.value[0])

    def __eq__(self, other):
        # Define equality based on the value attribute
        return self.start_value == other.start_value if isinstance(other, Edge) else False

    def __repr__(self):
        # This method helps in giving a printable representation of the object
        return f"Edge({self.value})"



In [1]:

class Node:
    def __init__(self, incoming_edge = None):
        self.incoming_edge = incoming_edge
        self.link_to = None
        self.children = {} ## HashMap<Edge: Node>

class SuffixTree:
    def __init__(self, string):
        self.root = Node()
        self.root.link_to = self.root

        ## Triple representing the active point
        self.aNode = self.root 
        self.aEdge = None ## TODO: Should this hold an edge object later on
        self.aLength = 0 
    
        self.remainder = 0
        self.text = string
        self.end = -1 ## Represents "#"
        self.unresolved_internal_node = None ## Holds the last created and unresolved internal node


    def _edge_insert(self, j):
        '''
        Insert a new node at the active point which lies along an edge
        
        If we are inserting an internal node at position x along edge RP then must make the following changes
        x = active length
        j = position, index of current phase
        R = active node
        P = leaf node

        1. Update R to remove P: {RP:P}
        2. Update RP edge to become RG edge by setting end = x
        3. Create new internal node G, incoming edge = RG
        4. Update R to add G: {RG:G}

        5. Create new edge GO, start = j end = -1
        6. Create new leaf node O, incoming edge = GO
        7. Update G to add O: {GO:O}

        8. Create new edge GP, start = x, end = -1
        9. Update leaf node P, incoming edge = GP

        PREVIOUS:    (R)---x---(P) 

                          (O)
        NEW:               |     
                    (R)---(G)---(P)
        '''
        x = self.aLength
        R = self.aNode
        RP = R.incoming_edge

        P = R.children.pop(RP) ## 1
        RP.end = x ## 2

        G = Node(incoming_edge = RP) ## 3
        R.children[RP] = G ## 4

        GO = Edge(start = j, end = -1, string = self.text) ##5
        O = Node(incoming_edge = G0) ## 6
        G.children[GO] = O ## 7

        GP = Edge(start = x, end = -1, string = self.text) ## 8
        P.incoming_edge = GP ## 9

        ## Set unresolved_internal_node to G
        self.unresolved_internal_node = G


    def _node_insert(self, parent, position):
        '''
        Insert a new node at the active node
        '''
        new_edge = Edge(start = position, string = self.text)
        new_leaf_node = Node(incoming_edge = new_edge)
        self.aNode.children[new_edge] = new_leaf_node



    def _check_or_insert(self, position): ## --> Returns True if an insertion was made, False if it already existed
        '''
        Checks if character exists at current active point, and if it doesn't, then insert it
        position = str[j-k] where j is the index of current phase, and k is obtained from iterating through range(remainder)
        str[j-k] where j is the index of current phase, and k is obtained from iterating through range(remainder)
        '''
        ## If we are on a node
        if self.aLength == 0: 

            ## If the character exists as a child node, it is implicitly represented, so return
            if self.text[position] in self.aNode.children.keys():
                return False
            
            ## If it doesn't exist, add the character as a new edge and node
            else:
                self._node_insert()
                return True

        ## Else we are on an edge
        else:
            edge_length = self.aEdge.length(self.end)

            ## If length of the edge <= the remaining length we must traverse (aLength), then recurse deeper into tree
            if self.aLength >= edge_length:
                self.aNode = self.aNode.children[str[position - edge_length]] ## aNode points to node traversed to 
                self.aEdge = self.aNode.incoming ## aEdge points to edge just traversed
                self.aLength -= edge_length ## Decrement aLength to be remaining length 
                return self._check_or_insert(position)
            
            ## Else we don't need to recurse deeper, so check/insert on the current edge
            else:
                edge_value = self.aEdge.value(self.end)

                ## If the remainder suffix has already been inserted, return False
                if edge_value[self.aLength] == self.text[position]:
                    return False
                
                ## Else insert it as a new internal node
                else:
                    self._edge_insert()
                    return True

    def _suffix_link(self, from_node, to_node):
        from_node.link_to = to_node

    def extend_suffix_tree(self, j):
        '''
        Performs a single phase of the suffix tree given. Extends the suffix tree from s[0...j-1] to s[0...j]
        '''

        # Increment `remainder` for each new character
        self.remainder += 1
        self.end += 1 ## Increment end denoter for each leaf edge
        self.unresolved_internal_node = None
        
        # Deal with all remaining suffixes. Using for loop to prevent infinite looping
        for k in range(self.remainder):
            
            ## Insert the remaining suffix at the active point `abx``
            inserted = self._insert_at_active_point(j)

            ## If an insertion did not occur, then str[j] exists at the active point so skip to next phase
            if not inserted:
                return 
            
            ## If an insertion did occur
            else: 
                ## Just inserted a new internal node, and it was not the first in this phase
                if self.unresolved_internal_node is not None:
                    ## TODO: aNode should point to most recent internal node encountered while inserting new remainder
                    self._suffix_link(self.unresolved_internal_node, self.aNode) 
                    self.unresolved_internal_node = None ## Set to None as have resolved suffix link
                

                ## If an insertion was made from the root node
                if self.aNode == self.root:
                    aLength -= 1
                    aEdge = self.text[]
                ## TODO: If no suffix link exists, go to root
                if self.aNode.link_to is None:
                    pass
                ## TODO: If a suffix link exists, follow it and update active point
                else:
                    aNode = aNode.link_to

        

                ## TODO: If at root already

        ## Reset unresolved_internal_node to None for next phase
        self.unresolved_internal_node = None
    
    def build_suffix_tree(self):
        for j in range(len(self.text)):
            self.extend_suffix_tree(j)

    def print_tree(self, node=None, indent=""):
        if node is None:
            node = self.root
        for key, (start, end, next_node) in node.edges.items():
            text = self.text[start:min(end + 1, self.end + 1)]
            print(f"{indent}{key}: {text} ({start}, {end})")
            self.print_tree(next_node, indent + "  ")

# Example usage
s = "abcabxabcd"
stree = SuffixTree(s)
stree.build_suffix_tree()
stree.print_tree()


SyntaxError: invalid syntax (477438342.py, line 159)