In [77]:
"""
proper prefix/suffix: not empty and not equal to total string

e.g. kmp(ABABX, ABX); kmp(ABABY, ABABX)

s     a b c a b c f
lcps  0 0 0 1 2 3 0

[x]aababcxabcaabcabxabcabcx
[a]bcabcf -> match=0, lcps=0 -> starti+=1, startj=0

xa[a]babcxabcaabcabxabcabcx
 a[b]cabcf -> match=1, lcps=0 -> starti+=1, startj=0

xaab[a]bcxabcaabcabxabcabcx
  ab[c]abcf -> match=2, lcps=0 -> starti+=2, startj=0

xaababc[x]abcaabcabxabcabcx
    abc[a]bcf -> match=3, lcps=0 -> starti+=3, startj=0
    
xaababc[x]abcaabcabxabcabcx
       [a]bcabcf -> match=0, lcps=0 -> starti+=1, startj=0
    
xaababcxabca[a]bcabxabcabcx
        abca[b]cf -> match=4, lcps=1 -> starti+=3, startj=1
    
xaababcxabca[a]bcabxabcabcx
           a[b]cabcf -> match=1, lcps=0 -> starti+=1, startj=0
    
xaababcxabcaabcab[x]abcabcx
            abcab[c]f -> match=5, lcps=2 -> starti+=3, startj=2
    
xaababcxabcaabcab[x]abcabcx
               ab[c]abcf -> match=2, lcps=0 -> starti+=2, startj=0
    
xaababcxabcaabcab[x]abcabcx
                 [a]bcabcf -> match=0, lcps=0 -> starti+=1, startj=0
    
xaababcxabcaabcabxabcabc[x]
                  abcabc[f] -> match=6, lcps=3 -> starti+=3, startj=3
    
xaababcxabcaabcabxabcabc[x]
                     abc[a]bcf -> match=3, lcps=0 -> starti+=3, startj=0
    
xaababcxabcaabcabxabcabc[x]
                        [a]bcabcf -> match=0, lcps=0 -> starti+=1, startj=0
"""
# match means pattern not article # exclude last non-matched character
# starti = transition of pattern
# startj = pattern matching start index

0

In [104]:
def get_lcpsLx(s): # brute force # O(m^3)
    L = []
    for i in range(len(s)):
        v = 0
        for j in range(i):
            if s[:j+1]==s[i-j:i+1]:
                v = j+1
        L.append(v)
    return L

# https://www.codingninjas.com/codestudio/library/kmp-string-matching-algorithm
def get_lcpsL(s): # O(m^2) # e.g. aabaaac
    L    = [0]
    prev = 0 # prev is the length of last longest proper prefix which is also a suffix # prev can be increase, remains, decrease
    i    = 1 # i evolves increasingly or constantly
    while i<len(s): # why not for? prev might turn back
        #print("i=", i, "prev=", prev, "s[:i]=", s[:i+1], "L=", L, end=" | ")
        if s[i]==s[prev]: # matched
            #print("match")
            prev+=1
            L.append(prev)
            i+=1
        elif prev==0: # not matched and no accumulation
            #print("un1")
            L.append(0)
            i+=1
        else: # not matched and has accumulation
            #print("un2")
            prev = L[prev-1] # SPECIAL: prev turn back 
    return L

#print( get_lcpsL("aabaaac") ) # [0, 1, 0, 1, 2, 2, 0]
"""
# turn on the print comments to view more details
i= 1 prev= 0 s[:i]= aa L= [0] | match
i= 2 prev= 1 s[:i]= aab L= [0, 1] | un2
i= 2 prev= 0 s[:i]= aab L= [0, 1] | un1
i= 3 prev= 0 s[:i]= aaba L= [0, 1, 0] | match
i= 4 prev= 1 s[:i]= aabaa L= [0, 1, 0, 1] | match
i= 5 prev= 2 s[:i]= aabaaa L= [0, 1, 0, 1, 2] | un2 # aa[b]aa[a] not matched -> check a[a]baa[a], so turn back to L[2-1]=L[1]=1
i= 5 prev= 1 s[:i]= aabaaa L= [0, 1, 0, 1, 2] | match
i= 6 prev= 2 s[:i]= aabaaac L= [0, 1, 0, 1, 2, 2] | un2 # aa[b]aaa[c] not matched -> check a[a]baaa[c], so turn back to L[2-1]=L[1]=1
i= 6 prev= 1 s[:i]= aabaaac L= [0, 1, 0, 1, 2, 2] | un2 # a[a]baaa[c] not matched -> check [a]abaaa[c], so turn back to L[1-1]=L[0]=0
i= 6 prev= 0 s[:i]= aabaaac L= [0, 1, 0, 1, 2, 2] | un1
"""
#raise
#print( get_lcpsL("abcabcf"), get_lcpsL("ababcaabc"), get_lcpsL("ll"), get_lcpsL("a") )

def kmp(haystack, needle): # O(mn) # m>=1 and n>=1
    lcpsL, i, j = get_lcpsL(needle), 0, 0
    # print(lcpsL)
    while i+len(needle)<=len(haystack): # not i+j<len(haystack) since len(needle) may >= len(haystack)
        #print("i=", i, "j=", j, end=" ")
        while j<len(needle) and haystack[i+j]==needle[j]:
            j+=1
        match, lcps = j, (0 if j==0 else lcpsL[j-1]) # j==0 is SPECIAL, prevention-1: to prevent lcpsL[-1]
        #print("match=", match, "lcps=", lcps)
        if match==len(needle):
            return i
        else:
            i, j = i + (1 if j==0 else match-lcps), lcps # prevention-2: to prevent (i,j)=(i,0) infinite loop
    return -1

for haystack, needle, ans in [("xaababcxabcaabcabxabcabcf","abcabcf",18), ("ababcaababcaabc","ababcaabc",6), ("hello","ll",2), ("a","a",0)]:    
    pd = kmp(haystack,needle)
    if pd!=ans:
        print( pd, ans )

In [75]:
"""
This is not related to suffix array and LCP array
# https://www.codingninjas.com/codestudio/library/longest-common-prefix-from-suffix-array
"""
0

0