https://web.stanford.edu/class/cs124/lec/med.pdf

In [3]:
import math

"""
Input: string A and B
Output: the smallest number of operations (insertion, deletion and substitution) needed to transform A to B

This number is usually called the edit distance. eg, good->ood->bod->bad

We will approach this problem using dynamic programming. 
Let |A|=n and |B|=m. We will fill a (n+1) x (m+1) table D, 
such that D[i][j] will be the edit distance between A[:i] and B[:j]. 
We will derive a recurrence to compute D[i][j].

Consider two cases:

1. A[i-1] = B[j-1]. Then we can focus on beginnings of both strings, 
so D[i][j] = D[i-1][j-1], 
then we can move to A[i-2] and B[j-2], if they are not equal, it fall in case 2

2. A[i-1] <> B[j-1]. Since we want ultimately make the strings equal, there are three options

- We delete A[i-1]. The remainning number of operations is D[i-1][j]
- We delete B[j-1]. The remainning number of operations is D[i][j-1]
- We substitute A[i-1] for B[j-1]. The remainning number of operations is D[i-1][j-1]

Therefore, in this case D[i][j] = 1+min(D[i-1][j], D[i][j-1], D[i-1][j-1]). THIS IS OUR RECURRENCE LOGIC
"""

def edistance(A, B):
    """
    using above recurrence, the complexity of the function should be O(|A||B|)
    """
    if A == B:
        return 0

    n = len(A)+1
    m = len(B)+1

    tbl=[[0] * m for _ in range(n)]

    # counter = 0

    for i in range(n):
        for j in range(m):
            if i == 0:
                tbl[i][j] = j
            elif j == 0:
                tbl[i][j] = i
            elif A[i-1] == B[j-1]:
                tbl[i][j] = tbl[i-1][j-1]
            else:
                tbl[i][j] = 1+min(
                    tbl[i-1][j],
                    tbl[i][j-1],
                    tbl[i-1][j-1]
                )
    # print("\n".join([str(l) for l in tbl]))
    return tbl[-1][-1]

def weighted_edistance(A, B, wdel, wins, wsub):
    """
    Apply weight to each operation, compute the total weighted operation
    
    wdel: weight of deletion
    wins: weight of insertion
    wsub: weight of substitution
    
    """
    if A == B:
        return 0

    n = len(A)+1
    m = len(B)+1

    tbl=[[0] * m for _ in range(n)]

    # counter = 0

    for i in range(n):
        for j in range(m):
            if i == 0:
                tbl[i][j] = j * wins
            elif j == 0:
                tbl[i][j] = i * wdel
            # counter+=1
            # print(tbl[i+1][j+1])
            # print(tbl)
            elif A[i-1] == B[j-1]:
                tbl[i][j] = tbl[i-1][j-1]
            else:
                tbl[i][j] = min(
                    tbl[i-1][j] + wdel,
                    tbl[i][j-1] + wins,
                    tbl[i-1][j-1] + wsub
                )
    # print("\n".join([str(l) for l in tbl]))
    return tbl[-1][-1]

def edistance_substring(A, B):
    """
    Write a function edistance_substring(A, B) that returns the smallest edit distance 
    between a substring of A (that is, a string of the from A[i:j]A[i:j]) and B. 
    In other words, it is allowed to delete some symbols at the beginning and at the end of A for free.

    For example, if A = good and B = bad, then the output should be 2, 
    because one can transform ood into bad with two subtitutions, 
    and at least two operations are necessary because the letters b and a are not present in A.
    """
    if A == B:
        return 0

    n = len(A)+1
    m = len(B)+1

    tbl=[[0] * m for _ in range(n)]

    wdel = 1

    for i in range(n):
        for j in range(m):
            if i == 0:
                tbl[i][j] = j
            elif j == 0:
                tbl[i][j] = 0
            elif A[i-1] == B[j-1]:
                tbl[i][j] = tbl[i-1][j-1]
                # mark first same
                # start counting deletion
                # start counting samed character, 
                # when samed character same as j
                # disable counting deletion
            else:
                if i > j:
                    wdel= 0

                tbl[i][j] = min(
                        tbl[i-1][j] + wdel,
                        tbl[i][j-1]+1,
                        tbl[i-1][j-1]+1
                    )

    print("\n".join([str(l) for l in tbl]))
    return tbl[-1][-1]

if __name__ == "__main__":
    # should print 3
    # print(edistance("good", "bad"))
    # # should print 3
    # print(edistance("", "bade"))
    #     # should print 1
    # print(edistance("geek", "gesek"))
    # #     # should print 3
    # print(edistance("sunday", "saturday"))
    # #     # should print 4
    # print(edistance("acdfjasd", "ldsfjaxd"))
    # #     # should print 3
    # print(edistance("qwefftyuiop", "asdfghjkl"))
    # #     # should print 3
    # # print(edistance("good", "bad"))
    # #     # should print 3
    # # print(edistance("good", "bad"))

    # # should print 2
    # print(edistance_substring("good", "bad"))
    # should print 0
    print(edistance_substring("goodxx", "good"))
    # # should print 0
    print(edistance_substring("xxgood", "good"))
    # # should print  1
    print(edistance_substring("xxgoiodxx", "good"))
    # # should print  2
    print(edistance_substring("xaibcxe", "acx"))

    
    
    

    # a='abc'
    # b='efg'

    # a = a[:-1] + b[-1]

    # print("abcd"[4:1], "abcd"[:-1])

2
1
