In [1]:
class Trie():
    def __init__(self):
        self.nodes = {}
        self.count = 0
    
    def add(self, seq):
        if not seq:
            return
        
        self.count += 1

        if seq[0] not in self.nodes:
            self.nodes[seq[0]] = Trie()
        
        self.nodes[seq[0]].add(seq[1:])

    def find(self, seq):
        if not seq:
            return 0

        head, *tail = seq

        if head not in self.nodes:
            return 0
        
        if not tail:
            return self.count

        return self.nodes[head].find(tail)

In [8]:
trie_root = Trie()

trie_root.add("hack")
trie_root.add("hackerrank")

print(trie_root.find("hack"))
print(trie_root.find("ha"))
print(trie_root.find("hacker"))

2
2
1


In [3]:
class Trie:
    def __init__(self):
        self.data = [{}, 0]
    
    def add(self, seq):
        pointer = self.data
        
        pointer[1] += 1
        
        for val in seq:
            if val not in pointer[0]:
                pointer[0][val] = [{}, 0]

            pointer = pointer[0][val]
            pointer[1] += 1

    def find(self, seq):
        if not seq:
            return 0

        pointer = self.data

        for val in seq:
            if val not in pointer[0]:
                return 0
            
            pointer = pointer[0][val]

        return pointer[1]

In [7]:
trie_root = Trie()

trie_root.add("hack")
trie_root.add("hackerrank")

print(trie_root.find("hack"))
print(trie_root.find("ha"))
print(trie_root.find("hacker"))

2
2
1


In [11]:
from itertools import zip_longest, islice


def to_int_keys_best(l):
    """
    l: iterable of keys
    returns: a list with integer keys
    """
    seen = set()

    ls = []
    for e in l:
        if not e in seen:
            ls.append(e)

        seen.add(e)

    ls.sort()
    index = {v: i for i, v in enumerate(ls)}
    return [index[v] for v in l]


def suffix_matrix_best(s):
    """
    suffix matrix of s
    O(n * log(n)^2)
    """
    n = len(s)
    k = 1
    line = to_int_keys_best(s)
    ans = [line]
    while max(line) < n - 1:
        line = to_int_keys_best(
            [a * (n + 1) + b + 1
             for (a, b) in
             zip_longest(line, islice(line, k, None),
                         fillvalue=-1)])
        ans.append(line)
        k <<= 1
    return ans


def suffix_array_best(s):
    """
    suffix array of s
    O(n * log(n)^2)
    """
    n = len(s)
    k = 1
    line = to_int_keys_best(s)
    while max(line) < n - 1:
        line = to_int_keys_best(
            [a * (n + 1) + b + 1
             for (a, b) in
             zip_longest(line, islice(line, k, None),
                         fillvalue=-1)])
        k <<= 1
    return line


def lcp(sm, i, j):
    """
    longest common prefix
    O(log(n))

    sm: suffix matrix
    """
    n = len(sm[-1])
    if i == j:
        return n - i
    k = 1 << (len(sm) - 2)
    ans = 0
    for line in sm[-2::-1]:
        if i >= n or j >= n:
            break
        if line[i] == line[j]:
            ans ^= k
            i += k
            j += k
        k >>= 1
    return ans

In [13]:
print(suffix_array_best('hungphan'))
print(suffix_matrix_best('hungphan'))



[3, 7, 5, 1, 6, 2, 0, 4]
[[2, 5, 3, 1, 4, 2, 0, 3], [3, 7, 5, 1, 6, 2, 0, 4]]
