In [1]:
import pandas as pd

word1 = 'dom'
word2 = 'come'

edit_distance = pd.DataFrame([[0 for _ in range(len(word2))] for _ in range(len(word1))],
                            index=list(word1),
                            columns=list(word2), dtype=float)

edit_distance.loc['d', 'c'] = 0.1
edit_distance.loc['d', 'o'] = 0.7
edit_distance.loc['d', 'm'] = 0.5
edit_distance.loc['d', 'e'] = 0.1
edit_distance.loc['o', 'c'] = 0.8
edit_distance.loc['o', 'm'] = 0.4
edit_distance.loc['o', 'e'] = 0.6
edit_distance.loc['m', 'c'] = 0.4
edit_distance.loc['m', 'o'] = 0.4
edit_distance.loc['m', 'e'] = 0.6


print(edit_distance)

     c    o    m    e
d  0.1  0.7  0.5  0.1
o  0.8  0.0  0.4  0.6
m  0.4  0.4  0.0  0.6


In [2]:
def manhattan_distance(a, b):
    return abs(a[0] - b[0]) + abs(a[1] - b[1])

keyboard = [
    'qwertyuiop',
    'asdfghjkl',
    'zxcvbnm'
]

letter_positions = {}
for i, row in enumerate(keyboard):
    for j, letter in enumerate(row):
        letter_positions[letter] = (i, j)

letters = 'abcdefghijklmnopqrstuvwxyz'
edit_distance = pd.DataFrame(index=list(letters), columns=list(letters), dtype=float)

for i, letter1 in enumerate(letters):
    for j, letter2 in enumerate(letters):
        if letter1 in letter_positions and letter2 in letter_positions:
            distance = manhattan_distance(letter_positions[letter1], letter_positions[letter2])
            edit_distance.loc[letter1, letter2] = distance / 10.0
        else:
            edit_distance.loc[letter1, letter2] = 1.0  # Default to 1.0 for letters not on the keyboard

print(edit_distance)


     a    b    c    d    e    f    g    h    i    j  ...    q    r    s    t  \
a  0.0  0.5  0.3  0.2  0.3  0.3  0.4  0.5  0.8  0.6  ...  0.1  0.4  0.1  0.5   
b  0.5  0.0  0.2  0.3  0.4  0.2  0.1  0.2  0.5  0.3  ...  0.6  0.3  0.4  0.2   
c  0.3  0.2  0.0  0.1  0.2  0.2  0.3  0.4  0.7  0.5  ...  0.4  0.3  0.2  0.4   
d  0.2  0.3  0.1  0.0  0.1  0.1  0.2  0.3  0.6  0.4  ...  0.3  0.2  0.1  0.3   
e  0.3  0.4  0.2  0.1  0.0  0.2  0.3  0.4  0.5  0.5  ...  0.2  0.1  0.2  0.2   
f  0.3  0.2  0.2  0.1  0.2  0.0  0.1  0.2  0.5  0.3  ...  0.4  0.1  0.2  0.2   
g  0.4  0.1  0.3  0.2  0.3  0.1  0.0  0.1  0.4  0.2  ...  0.5  0.2  0.3  0.1   
h  0.5  0.2  0.4  0.3  0.4  0.2  0.1  0.0  0.3  0.1  ...  0.6  0.3  0.4  0.2   
i  0.8  0.5  0.7  0.6  0.5  0.5  0.4  0.3  0.0  0.2  ...  0.7  0.4  0.7  0.3   
j  0.6  0.3  0.5  0.4  0.5  0.3  0.2  0.1  0.2  0.0  ...  0.7  0.4  0.5  0.3   
k  0.7  0.4  0.6  0.5  0.6  0.4  0.3  0.2  0.1  0.1  ...  0.8  0.5  0.6  0.4   
l  0.8  0.5  0.7  0.6  0.7  0.5  0.4  0.

In [3]:
print(edit_distance.loc[['d', 'o', 'm'], ['c', 'o', 'm', 'e']])


     c    o    m    e
d  0.1  0.7  0.5  0.1
o  0.8  0.0  0.4  0.6
m  0.4  0.4  0.0  0.6


In [4]:
cost = pd.DataFrame([[0.0 for _ in range(len(word2) + 1)] for _ in range(len(word1) + 1)],
                    index=['-'] + list(word1),
                    columns=['-'] + list(word2), dtype=float)
cost.iloc[0] = range(len(word2) + 1)
cost.iloc[:, 0] = range(len(word1) + 1)
# cost = edit_distance.copy().map(lambda x: 0.0)
# cost.insert(1, list(0))

for i in range(1, len(word1) + 1):
    for j in range(1, len(word2) + 1):

        # Calculate the cost for deletion, insertion, and substitution
        deletion_cost = cost.iloc[i-1, j] + 1
        insertion_cost = cost.iloc[i, j-1] + 1
        substitution_cost = cost.iloc[i-1, j-1] + edit_distance.loc[word1[i-1], word2[j-1]]

        # Choose the minimum cost
        cost.iloc[i, j] = min(deletion_cost, insertion_cost, substitution_cost)

print(cost)

     -    c    o    m    e
-  0.0  1.0  2.0  3.0  4.0
d  1.0  0.1  1.1  2.1  3.1
o  2.0  1.1  0.1  1.1  2.1
m  3.0  2.1  1.1  0.1  1.1
