### 2.6

Implement minimum edit distance algorithm.

Steps of algorithm summarized in english:

1. 

In [23]:
import numpy as np
from typing import Callable

def min_edit_dist(src: str, 
                  tgt: str, 
                  del_cost: Callable[[str], int] = lambda x: 1, 
                  ins_cost: Callable[[str], int] = lambda x: 1,
                  sub_cost: Callable[[str, str], int] = lambda x,y: 0 if x==y else 2) -> int:
    """returns minium edit distance between two strings with Levenshtein distance as default"""
    n, m = len(src), len(tgt)
    D = np.zeros((n+1,m+1))
    for i in range(n):
        D[i+1,0] = D[i,0] + del_cost(src[i])
    for j in range(m):
        D[0,j+1] = D[0,j] + ins_cost(tgt[j])
        
#     for i,j in ((a,b) for a in range(1,n+1) for b in range(1,m+1)):
#         D[i,j] = min(D[i-1,j] + del_cost(src[i-1]),
#                      D[i-1,j-1] + sub_cost(src[i-1], tgt[j-1]),
#                      D[i,j-1] + ins_cost(tgt[j-1]))
    for i,j in ((a,b) for a in range(n) for b in range(m)):
        D[i+1,j+1] = min(D[i,j+1] + del_cost(src[i]),
                         D[i,j] + sub_cost(src[i], tgt[j]),
                         D[i+1,j] + ins_cost(tgt[j]))
        
    return D[n,m]

In [24]:
import unittest

class TestMinEditDist(unittest.TestCase):
    
    def test_trivial_base_case(self):
        src, tgt = "", ""
        src1, tgt1 = "a", "a"
        self.assertEqual(min_edit_dist(src, tgt), 0)
        self.assertEqual(min_edit_dist(src1, tgt1), 0)
        
    def test_all_delete_case(self):
        src1, tgt1 = "a", ""
        src2, tgt2 = "ab", ""
        self.assertEqual(min_edit_dist(src1, tgt1), 1)
        self.assertEqual(min_edit_dist(src2, tgt2), 2)
        
    def test_all_ins_case(self):
        src1, tgt1 = "", "a"
        src2, tgt2 = "", "ab"
        self.assertEqual(min_edit_dist(src1, tgt1), 1)
        self.assertEqual(min_edit_dist(src2, tgt2), 2)
        
    def test_all_sub_case(self):
        src1, tgt1 = "a", "b"
        src2, tgt2 = "aa", "bb"
        self.assertEqual(min_edit_dist(src1, tgt1), 2)
        self.assertEqual(min_edit_dist(src2, tgt2), 4)
        
test = TestMinEditDist()
test_suite = unittest.TestLoader().loadTestsFromModule(test)

unittest.TextTestRunner().run(test_suite)

....
----------------------------------------------------------------------
Ran 4 tests in 0.005s

OK


<unittest.runner.TextTestResult run=4 errors=0 failures=0>

In [100]:
import numpy as np
from typing import Callable

def aug_min_edit_dist(src: str, 
                      tgt: str, 
                      del_cost: Callable[[str], int] = lambda x: 1, 
                      ins_cost: Callable[[str], int] = lambda x: 1,
                      sub_cost: Callable[[str, str], int] = lambda x,y: 0 if x==y else 2) -> (int,str):
    """returns augmented edit distance and alignment of strings with Levenshtein as default metric"""
    alginment = ""
    n, m = len(src), len(tgt)
    D = np.zeros((n+1,m+1))
    A = np.chararray((n+1,m+1), unicode=True)
    A[:] = ""
    for i in range(n):
        D[i+1,0] = D[i,0] + del_cost(src[i])
    for j in range(m):
        D[0,j+1] = D[0,j] + ins_cost(tgt[j])
        
#     for i,j in ((a,b) for a in range(1,n+1) for b in range(1,m+1)):
#         D[i,j] = min(D[i-1,j] + del_cost(src[i-1]),
#                      D[i-1,j-1] + sub_cost(src[i-1], tgt[j-1]),
#                      D[i,j-1] + ins_cost(tgt[j-1]))
    for i,j in ((a,b) for a in range(n) for b in range(m)):
        cost = [D[i,j+1] + del_cost(src[i]),
                D[i,j] + sub_cost(src[i], tgt[j]),
                D[i+1,j] + ins_cost(tgt[j])]
        D[i+1,j+1] = min(cost)
        a = cost.index(min(cost))
        print(cost, src, tgt)
        if a == 1 and src[i] == tgt[i]:
            A[i+1,j+1] = A[i,j] + "_"
        elif a == 0:
            A[i+1,j+1] = A[i,j+1] + "D"
        elif a == 1:
            A[i+1,j+1] = A[i,j] + "S"
        else:
            A[i+1,j+1] = A[i+1,j] + "I"
    print(A)
    return D[n,m], A[n,m]

In [101]:
import unittest

class TestAugMinEditDist(unittest.TestCase):

    def test_trivial_base_case(self):
        src, tgt = "", ""
        src1, tgt1 = "a", "a"
        self.assertEqual(aug_min_edit_dist(src, tgt), (0,""))
        self.assertEqual(aug_min_edit_dist(src1, tgt1), (0,"_"))
        
    def test_all_delete_case(self):
        src1, tgt1 = "a", ""
        src2, tgt2 = "ab", ""
        self.assertEqual(aug_min_edit_dist("a", ""), (1,"D"))
        self.assertEqual(aug_min_edit_dist(src2, tgt2), (2,"DD"))
        
    def test_all_ins_case(self):
        src1, tgt1 = "", "a"
        src2, tgt2 = "", "ab"
        self.assertEqual(min_edit_dist(src1, tgt1), 1)
        self.assertEqual(min_edit_dist(src2, tgt2), 2)
        
    def test_all_sub_case(self):
        src1, tgt1 = "a", "b"
        src2, tgt2 = "aa", "bb"
        self.assertEqual(min_edit_dist(src1, tgt1), 2)
        self.assertEqual(min_edit_dist(src2, tgt2), 4)
        
test = TestMinEditDist()
test_suite = unittest.TestLoader().loadTestsFromModule(test)

unittest.TextTestRunner().run(test_suite)

F...

[['']
 ['']]
[['']]
[2.0, 0.0, 2.0] a a
[['' '']
 ['' '_']]



FAIL: test_all_delete_case (__main__.TestMinEditDist)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-98-52c6241acad9>", line 14, in test_all_delete_case
    self.assertEqual(aug_min_edit_dist("a", ""), (1,"D"))
AssertionError: Tuples differ: (1.0, '') != (1, 'D')

First differing element 1:
''
'D'

- (1.0, '')
?   --

+ (1, 'D')
?      +


----------------------------------------------------------------------
Ran 4 tests in 0.007s

FAILED (failures=1)


<unittest.runner.TextTestResult run=4 errors=0 failures=1>