# Edit Distance

## Recursive Brute Force Solution

In [2]:
def edit_distance(s: str, t: str) -> int:
    
    def distance(i: int, j: int) -> int:
        if i == 0: return j   # s is empty, j insertions
        if j == 0: return i   # t is empty, i deletions
        
        # case 1 - change
        if s[i-1] == t[j-1]:
            changes = distance(i-1, j-1)
        else:
            changes = 1 + distance(i-1, j-1)
            
        # case 2 - deletions
        deletions = 1 + distance(i-1, j) 
        
        # case 3 - insertions
        insertions = 1 + distance(i, j-1)
        
        return min(changes, insertions, deletions)
    
    return distance(len(s), len(t))
        

In [3]:
s = "snowy"
t = "sunny"
print(edit_distance(s,t))

3


In [4]:
s = 'polynomial'
t = 'exponential'
print(edit_distance(s,t))

6


## Edit Distance - Dynamic Programming

In [7]:
import numpy as np
# let m = len(s)   n = len(t)
def edit_distance(s: str, t: str) -> int:
    dist = np.zeros((len(s) + 1, len(t) + 1)) # (m+1)(n+1) is O(mn)
    
    # initialize the first row
    for j in range(len(t) + 1): dist[0][j] = j   # (n+1) is O(n)
        
    # initialize the first column
    for i in range(len(s) + 1): dist[i][0] = i   # (m+1) is O(m)
        
    for i in range(1, len(s) + 1):      # rows    O(mn)
        for j in range(1, len(t) + 1):  # columns
            dist[i][j] = min(
                1 + dist[i-1][j],  # deletion
                1 + dist[i][j-1],  # insertion
                dist[i-1][j-1] + (0 if s[i-1] == t[j-1] else 1) # change
            )
            
    return dist
        
    

In [8]:
print(edit_distance("repeal", "treat"))

[[0. 1. 2. 3. 4. 5.]
 [1. 1. 1. 2. 3. 4.]
 [2. 2. 2. 1. 2. 3.]
 [3. 3. 3. 2. 2. 3.]
 [4. 4. 4. 3. 3. 3.]
 [5. 5. 5. 4. 3. 4.]
 [6. 6. 6. 5. 4. 4.]]


In [10]:
from typing import Tuple
def align(s: str, t: str) -> Tuple[str,str,str]:
    dist = edit_distance(s,t)
    i = len(s) # row
    j = len(t) # column
    
    sa = ta = c = ""  # assignment expression
    
    while i > 0 and j > 0:
        s_ch = s[i - 1]
        t_ch = t[j - 1]
        
        # no change
        if dist[i][j] == dist[i-1][j-1] and s_ch == t_ch:
            sa = s_ch + sa
            ta = t_ch + ta
            c = ' ' + c
            i = i - 1
            j = j - 1
        
        # change  
        # TODO - make this more compact by combining with above
        elif dist[i][j] == 1 + dist[i-1][j-1] and s_ch != t_ch:
            sa = s_ch + sa
            ta = t_ch + ta
            c = '*' + c
            i = i - 1
            j = j - 1
        
        # delete 
        elif dist[i][j] == 1 + dist[i-1][j]:
            sa = s_ch + sa
            ta = '-' + ta
            c = ' ' + c
            i = i - 1
        
        # insert
        elif dist[i][j] == 1 + dist[i][j-1]:
            sa = '-' + sa
            ta = t_ch + ta
            c = ' ' + c
            j = j - 1
  
    # At the top row? Do inserts
    while j > 0:    
        sa = '-' + sa
        c = ' ' + c
        j = j - 1
        ta = t[j] + ta
        
    # At the left column? Do deletions
    while i > 0:
        ta = '-' + ta
        c = ' ' + c
        i = i - 1
        sa = s[i] + sa
        
    return (sa, ta, c)
    
    

In [11]:
(sa, ta, c) = align('polynomial', 'exponential')

In [12]:
print(sa)
print(ta)
print(c)

--polynomial
exponen-tial
    **  *   
