In [4]:
# This version only has recursive approach, but the catch is it will recall the steps that it called before
# say X the first sequence, Y is the second sequence
# X = alpha_x, Y = beta_y so x and y will be the last bases in the whole sequences and alpha and beta are their prefixes

def edist(X, Y):
    if len(X) == 0 or len(Y) == 0:
        return max(len(X), len(Y))
    else:
        delta = 0 if X[-1] == Y[-1] else 1
        return min(
            edist(X[:-1], Y[:-1]) + delta,
            edist(X, Y[:-1]) + 1,
            edist(X[:-1], Y) + 1,
        )

In [5]:
# This version is the optimized version of the recursive approach. It uses a matrix to store the results
import numpy as np
def get_edist(X, Y):
    # Initialize the matrix with -1 (indicating uncomputed values)
    matrix = np.full((len(X) + 1, len(Y) + 1), -1)
    return edist_dynamic(X, Y, len(X), len(Y), matrix)

def edist_dynamic(X, Y, i, j, matrix):
    # Base case: if one string is empty
    if i == 0 or j == 0:
        matrix[i, j] = max(i, j)
        return matrix[i, j]
    
    # Return precomputed value
    if matrix[i, j] != -1:
        return matrix[i, j]
    
    # Calculate delta for substitution cost
    delta = 0 if X[i - 1] == Y[j - 1] else 1

    # Compute the value recursively and store it
    matrix[i, j] = min(
        edist_dynamic(X, Y, i - 1, j - 1, matrix) + delta,  # Substitution
        edist_dynamic(X, Y, i, j - 1, matrix) + 1,         # Insertion
        edist_dynamic(X, Y, i - 1, j, matrix) + 1          # Deletion
    )
    return matrix[i, j]

In [52]:
X = "kitten"
Y = "sitting"

distance = get_edist(X, Y)
print(f"Edit distance between '{X}' and '{Y}': {distance}")

Edit distance between 'kitten' and 'sitting': 3


In [40]:
# Another version which never uses recursive approach but only loops through each cell in the matrix
def edist_loopmatrix(X, Y):
    matrix = np.full((len(X) + 1, len(Y) + 1), None)
    # Fill the first row and first column (base cases)
    for ix in range(len(X) + 1):
        matrix[ix, 0] = ix
    for iy in range(len(Y) + 1):
        matrix[0, iy] = iy
    for ix in range(1, len(X) + 1):
        for iy in range(1, len(Y) + 1):
            delta = 0 if X[ix - 1] == Y[iy - 1] else 1
            matrix[ix, iy] = min(
                matrix[ix - 1, iy - 1] + delta,
                matrix[ix - 1, iy] + 1,
                matrix[ix, iy - 1] + 1
            )
    return matrix[len(X), len(Y)]

In [42]:
X = "kitten"
Y = "sitting"

distance = edist_loopmatrix(X, Y)
print(f"Edit distance between '{X}' and '{Y}': {distance}")

Edit distance between 'kitten' and 'sitting': 3


In [21]:
# Global alignment
def galign_penalty(x, y):
    """
    Only approves A, T, C, G, or -
    """
    purines = {'A', 'G'}
    pyrimidines = {'T', 'C'}
    gap = "-"
    if (x not in purines.union(pyrimidines, {gap}) and y not in purines.union(pyrimidines, {gap})) or (x == gap and y == gap):
        raise ValueError("Only approves A, T, C, G, or -")

    # Matches
    if x == y:
        return 0
    # Transitions
    if (x in purines and y in purines) or (x in pyrimidines and y in pyrimidines):
        return 2
    # Transversions
    if (x in purines and y in pyrimidines) or (x in pyrimidines and y in purines):
        return 4
    # Gaps
    if (x == gap or y == gap):
        return 8
    
def galign(X, Y):
    matrix = np.full((len(X) + 1, len(Y) + 1), None)
    # Fill the first row and first column (base cases)
    for ix in range(len(X) + 1):
        matrix[ix, 0] = matrix[ix - 1, 0] + galign_penalty(X[ix - 1], "-") if ix > 0 else 0
    for iy in range(len(Y) + 1):
        matrix[0, iy] = matrix[0, iy - 1] + galign_penalty("-", Y[iy - 1]) if iy > 0 else 0
    for ix in range(1, len(X) + 1):
        for iy in range(1, len(Y) + 1):
            matrix[ix, iy] = min(
                # Substitution
                matrix[ix - 1, iy - 1] + galign_penalty(X[ix - 1], Y[iy - 1]),
                # Deletion
                matrix[ix - 1, iy] + galign_penalty(X[ix - 1], "-"),
                # Insertion
                matrix[ix, iy - 1] + galign_penalty("-", Y[iy - 1])
            )
    return matrix[len(X), len(Y)]

In [27]:
X = "TACCAGATTCGA"
Y = "TACCAAATTGA"
print(galign(X, Y))

10
