In [25]:
import numpy as np
import random

# Exploring the Impact of Evaluation Order on the Wagner Fisher Algorithm for Levenshtein Edit Distance

In [26]:
def algo_v0(s1, s2) -> int:
    # Create a matrix of size (len(s1)+1) x (len(s2)+1)
    matrix = np.zeros((len(s1) + 1, len(s2) + 1), dtype=int)

    # Initialize the first column and first row of the matrix
    for i in range(len(s1) + 1):
        matrix[i, 0] = i
    for j in range(len(s2) + 1):
        matrix[0, j] = j

    # Compute Levenshtein distance
    for i in range(1, len(s1) + 1):
        for j in range(1, len(s2) + 1):
            substitution_cost = s1[i - 1] != s2[j - 1]
            matrix[i, j] = min(
                matrix[i - 1, j] + 1,  # Deletion
                matrix[i, j - 1] + 1,  # Insertion
                matrix[i - 1, j - 1] + substitution_cost,  # Substitution
            )

    # Return the Levenshtein distance
    return matrix[len(s1), len(s2)], matrix

Accelerating this exact algorithm isn't trivial, is the `matrix[i, j]` value has a dependency on the `matrix[i, j-1]` value.
So we can't brute-force accelerate the inner loop.
Instead, we can show that we can evaluate the matrix in a different order, and still get the same result.

![](https://mathworld.wolfram.com/images/eps-svg/SkewDiagonal_1000.svg)

In [27]:
def algo_v1(s1, s2, verbose: bool = False) -> int:
    assert len(s1) == len(s2), "First define an algo for square matrices!"
    # Create a matrix of size (len(s1)+1) x (len(s2)+1)
    matrix = np.zeros((len(s1) + 1, len(s2) + 1), dtype=int)
    matrix[:, :] = 99

    # Initialize the first column and first row of the matrix
    for i in range(len(s1) + 1):
        matrix[i, 0] = i
    for j in range(len(s2) + 1):
        matrix[0, j] = j

    # Number of rows and columns in the square matrix.
    n = len(s1) + 1
    skew_diagonals_count = 2 * n - 1
    # Compute Levenshtein distance
    for skew_diagonal_idx in range(2, skew_diagonals_count):
        skew_diagonal_length = (skew_diagonal_idx + 1) if skew_diagonal_idx < n else (2*n - skew_diagonal_idx - 1)
        for offset_within_skew_diagonal in range(skew_diagonal_length):
            if skew_diagonal_idx < n:
                # If we passed the main skew diagonal yet, 
                # Then we have to skip the first and the last operation,
                # as those are already pre-populated and form the first column 
                # and the first row of the Levenshtein matrix respectively.
                if offset_within_skew_diagonal == 0 or offset_within_skew_diagonal + 1 == skew_diagonal_length:
                    continue      
                i = skew_diagonal_idx - offset_within_skew_diagonal
                j = offset_within_skew_diagonal
                if verbose:
                    print(f"top left triangle: {skew_diagonal_idx=}, {skew_diagonal_length=}, {i=}, {j=}")
            else:
                i = n - offset_within_skew_diagonal - 1
                j = skew_diagonal_idx - n + offset_within_skew_diagonal + 1
                if verbose:
                    print(f"bottom right triangle: {skew_diagonal_idx=}, {skew_diagonal_length=}, {i=}, {j=}")
            substitution_cost = s1[i - 1] != s2[j - 1]
            matrix[i, j] = min(
                matrix[i - 1, j] + 1,  # Deletion
                matrix[i, j - 1] + 1,  # Insertion
                matrix[i - 1, j - 1] + substitution_cost,  # Substitution
            )

    # Return the Levenshtein distance
    return matrix[len(s1), len(s2)], matrix

Let's generate some random strings and make sure we produce the right result.

In [28]:
for _ in range(10):
    s1 = ''.join(random.choices("ab", k=50))
    s2 = ''.join(random.choices("ab", k=50))
    d0, _ = algo_v0(s1, s2)
    d1, _ = algo_v1(s1, s2)
    assert d0 == d1 

Going further, we can avoid storing the whole matrix, and only store two diagonals at a time.
The longer will never exceed N. The shorter one is always at most N-1, and is always shorter by one.

In [29]:
s1 = "listen"
s2 = "silent"
# s1 = ''.join(random.choices("abcd", k=100))
# s2 = ''.join(random.choices("abcd", k=100))
distance, baseline = algo_v0(s1, s2)
s1, s2, f"{distance = }", baseline

('listen',
 'silent',
 'distance = 4',
 array([[0, 1, 2, 3, 4, 5, 6],
        [1, 1, 2, 2, 3, 4, 5],
        [2, 2, 1, 2, 3, 4, 5],
        [3, 2, 2, 2, 3, 4, 5],
        [4, 3, 3, 3, 3, 4, 4],
        [5, 4, 4, 4, 3, 4, 5],
        [6, 5, 5, 5, 4, 3, 4]]))

In [30]:
assert len(s1) == len(s2), "First define an algo for square matrices!"
# Number of rows and columns in the square matrix.
n = len(s1) + 1

# Let's use just a couple of arrays to store the previous skew diagonals.
# Let's imagine that our Levenshtein matrix is gonna have 5x5 size for two words of length 4.
#         B C D E << s2 characters: BCDE
#     + ---------
#     | a b c d e
#   F | f g h i j
#   K | k l m n o
#   P | p q r s t
#   U | u v w x y
#   ^
#   ^ s1 characters: FKPU
following = np.zeros(n, dtype=np.uint) # let's assume we are computing the main skew diagonal: [u, q, m, i, e]
current = np.zeros(n, dtype=np.uint) # will contain: [p, l, h, e]
previous = np.zeros(n, dtype=np.uint) # will contain: [k, g, c]

# Initialize the first two diagonals.
# The `previous` would contain the values [a].
# The `current` would contain the values [f, b]. 
previous[0] = 0
current[0:2] = 1
previous, current, following

(array([0, 0, 0, 0, 0, 0, 0], dtype=uint64),
 array([1, 1, 0, 0, 0, 0, 0], dtype=uint64),
 array([0, 0, 0, 0, 0, 0, 0], dtype=uint64))

To feel safer, while designing our alternative traversal algorithm, let's define an extraction function, that will get the values of a certain skewed diagonal.

In [31]:
def get_skewed_diagonal(matrix: np.ndarray, index: int):
    flipped_matrix = np.fliplr(matrix)
    return np.flip(np.diag(flipped_matrix, k= matrix.shape[1] - index - 1))

In [32]:
matrix = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])
assert np.all(get_skewed_diagonal(matrix, 2) == [7, 5, 3])
assert np.all(get_skewed_diagonal(matrix, 1) == [4, 2])
assert np.all(get_skewed_diagonal(matrix, 4) == [9])

In [33]:
# To evaluate every subsequent entry:
following_skew_diagonal_idx = 2
while following_skew_diagonal_idx < n:
    following_skew_diagonal_length = following_skew_diagonal_idx + 1

    old_substitution_costs = previous[:following_skew_diagonal_length - 2]
    added_substitution_costs = [s1[following_skew_diagonal_idx - i - 2] != s2[i] for i in range(following_skew_diagonal_length - 2)]
    substitution_costs = old_substitution_costs + added_substitution_costs

    following[1:following_skew_diagonal_length-1] = np.minimum(current[1:following_skew_diagonal_length-1] + 1, current[:following_skew_diagonal_length-2] + 1) # Insertions or deletions
    following[1:following_skew_diagonal_length-1] = np.minimum(following[1:following_skew_diagonal_length-1], substitution_costs) # Substitutions
    following[0] = following_skew_diagonal_idx
    following[following_skew_diagonal_length-1] = following_skew_diagonal_idx
    assert np.all(following[:following_skew_diagonal_length] == get_skewed_diagonal(baseline, following_skew_diagonal_idx))
    
    previous[:] = current[:]
    current[:] = following[:]
    following_skew_diagonal_idx += 1

previous, current, following # Log the state

(array([5, 3, 2, 2, 3, 5, 0], dtype=uint64),
 array([6, 4, 3, 2, 3, 4, 6], dtype=uint64),
 array([6, 4, 3, 2, 3, 4, 6], dtype=uint64))

By now we've scanned through the upper triangle of the matrix, where each subsequent iteration results in a larger diagonal. From now onwards, we will be shrinking. Instead of adding value equal to the skewed diagonal index on either side, we will be cropping those values out.

In [34]:
while following_skew_diagonal_idx < 2 * n - 1:
    following_skew_diagonal_length = 2 * n - 1 - following_skew_diagonal_idx
    old_substitution_costs = previous[:following_skew_diagonal_length]
    added_substitution_costs = [s1[len(s1) - i - 1] != s2[following_skew_diagonal_idx - n + i] for i in range(following_skew_diagonal_length)]
    substitution_costs = old_substitution_costs + added_substitution_costs
    
    following[:following_skew_diagonal_length] = np.minimum(current[:following_skew_diagonal_length] + 1, current[1:following_skew_diagonal_length+1] + 1) # Insertions or deletions
    following[:following_skew_diagonal_length] = np.minimum(following[:following_skew_diagonal_length], substitution_costs) # Substitutions
    assert np.all(following[:following_skew_diagonal_length] == get_skewed_diagonal(baseline, following_skew_diagonal_idx)), f"\n{following[:following_skew_diagonal_length]} not equal to \n{get_skewed_diagonal(baseline, following_skew_diagonal_idx)}"
    
    previous[:following_skew_diagonal_length] = current[1:following_skew_diagonal_length+1]
    current[:following_skew_diagonal_length] = following[:following_skew_diagonal_length]
    following_skew_diagonal_idx += 1

previous, current, following # Log the state

(array([5, 4, 5, 5, 5, 6, 0], dtype=uint64),
 array([4, 5, 4, 5, 5, 5, 6], dtype=uint64),
 array([4, 5, 4, 5, 5, 5, 6], dtype=uint64))

In [35]:
assert distance == following[0], f"{distance = } != {following[0] = }"