# Edit Distance, Assembly, and Overlaps



In [36]:
%matplotlib inline

In [60]:
from typing import Callable
import random

import matplotlib.pyplot as plt
import numpy as np

In [64]:
def edit_distance(
    s1: str,
    s2: str
) -> int:
    """Calculate the edit distance between to strings.

    The edit distance is defined as the number of substitutions,
    insertions, and deletions required to align them

    Parameters
    ----------
    s1 : str
        First string
    s2 :
        Second string

    Returns
    -------
    int
        The edit distance

    >>> s1 = "ABCD"
    >>> s2 = "EFG"
    >>> edit_distance(s1, s2)
    4
    """
    dist = np.zeros(shape=(len(s1) + 1, len(s2) + 1), dtype=int)
    for index in range(1, len(s1) + 1):
        dist[index][0] = index
    for index in range(1, len(s2) + 1):
        dist[0][index] = index
    for index_i in range(1, len(s1) + 1):
        for index_j in range(1, len(s2) + 1):
            dist_hor = dist[index_i][index_j - 1] + 1
            dist_ver = dist[index_i - 1][index_j] + 1
            dist_diag = dist[index_i - 1][index_j - 1]
            if s1[index_i - 1] != s2[index_j - 1]:
                dist_diag += 1
            dist[index_i][index_j] = np.min([dist_hor, dist_ver, dist_diag])
    return dist[-1][-1]

In [65]:
random.seed(42)
s1 = "ABCD"
s2 = "EFG"
edit_distance(s1, s2)

4