## Strings

In [None]:
for i in range(0, 128):
    print "%d %c" % (i, i)

## Knuth - Morris - Pratt

In [1]:
def jump_table(pattern):
    # Create the resulting table, which for length zero is None.
    result = [None]

    for i in range(0, len(pattern)):
        j = i

        while True:
            if j == 0:
                result.append(0)
                break

            if pattern[result[j]] == pattern[i]:
                result.append(result[j] + 1)
                break

            j = result[j]
    
    return result

In [4]:
jump_table("12332112")

[None, 0, 0, 0, 0, 0, 1, 1, 2]

In [None]:
def kmp(P, T):
    jump = jump_table(P)

    index = 0
    match = 0

    while index + match < len(T):
        if T[index + match] == P[match]:
            match = match + 1

            if match == len(P):
                return index
        else:
            if match == 0:
                index = index + 1
            else:
                index = index + match - jump[match]
                match = jump[match]

    return None

In [None]:
kmp("ababac", "abbdrfababacasdfababacaadasd")

## Levenshtein distance

In [4]:
def call_counter(func):
    def helper(*args, **kwargs):
        helper.calls += 1
        return func(*args, **kwargs)

    helper.calls = 0
    helper.__name__= func.__name__
    return helper

In [5]:
@call_counter
def levenshtein(s, t):
    if s == "":
        return len(t)
    if t == "":
        return len(s)
    if s[-1] == t[-1]:
        cost = 0
    else:
        cost = 1
       
    res = min([levenshtein(s[:-1], t) + 1,
               levenshtein(s, t[:-1]) + 1, 
               levenshtein(s[:-1], t[:-1]) + cost])
    return res

In [6]:
levenshtein("My name is", "My nmae is")

2

In [7]:
levenshtein.calls

12146179

In [8]:
memo = {}
@call_counter
def levenshtein2(s, t):
    if s == "":
        return len(t)
    if t == "":
        return len(s)
    cost = 0 if s[-1] == t[-1] else 1
       
    i1 = (s[:-1], t)
    if not i1 in memo:
        memo[i1] = levenshtein2(*i1)
    i2 = (s, t[:-1])
    if not i2 in memo:
        memo[i2] = levenshtein2(*i2)
    i3 = (s[:-1], t[:-1])
    if not i3 in memo:
        memo[i3] = levenshtein2(*i3)
    res = min([memo[i1]+1, memo[i2]+1, memo[i3]+cost])
    
    return res


In [12]:
levenshtein2.calls = 0 
levenshtein2("Test", "Toast")

2

In [10]:
levenshtein2.calls

121

In [11]:
len(memo.keys())

120

## LCS

In [13]:
@call_counter
def lcs(xstr, ystr):
    if not xstr or not ystr:
        return ""
    x, xs, y, ys = xstr[0], xstr[1:], ystr[0], ystr[1:]
    if x == y:
        return x + lcs(xs, ys)
    else:
        return max(lcs(xstr, ys), lcs(xs, ystr), key=len)

In [14]:
lcs.calls = 0 
lcs("1a234rt","c1b23xd4")

'1234'

In [15]:
lcs.calls

3866

In [16]:
def diff(xs, ys):
    cs = lcs(xs, ys)
    comp(xs, ys, cs)

def comp(xs, ys, cs):
    if len(cs) == 0:
        for x in xs:
            print "-%s" % x
        for y in ys:
            print "+%s" % y
        return

    x, y, c = xs[0], ys[0], cs[0]
    if c != x:
        print "-%s" % x
        comp(xs[1:], ys, cs) 
    elif c != y:
        print "+%s" % y
        comp(xs, ys[1:], cs) 
    else:
        print "=%s" % c
        comp(xs[1:], ys[1:], cs[1:])

In [17]:
diff("1a234rt","c1b23xd4")

+c
=1
-a
+b
=2
=3
+x
+d
=4
-r
-t
