## 1.Edit distance with matrix
> 参考视频：https://www.youtube.com/watch?v=We3YDTzNXEk

In [4]:
import numpy as np

In [9]:
class EditDistance:
    def __init__(self):
        self.solution = {}
    @staticmethod
    def edit_distance_with_matrix(str1, str2):
        len_1, len_2 = len(str1) + 1, len(str2) + 1
        if str1 and str2:
            matrix = np.zeros((len_2, len_1), dtype=np.int)
            matrix[0, :] = np.arange(len_1)
            matrix[:, 0] = np.arange(len_2)
            for i, char1 in enumerate(str2):
                for j, char2 in enumerate(str1):
                    if char1 == char2:
                        matrix[i + 1, j + 1] = matrix[i, j]
                    else:
                        matrix[i + 1, j + 1] = min(matrix[i, j + 1], matrix[i, j], matrix[i + 1, j]) + 1

            return matrix[len_2 -1 , len_1 - 1], matrix
        else:
            return len_1 -1  if str1 else len_2 - 1
    
    def get_matrix_solution(self, matrix):
        """
        Up refers to insert; left refers to delete; digonal refers to substating.
        """
        r, c = matrix.shape
        if r < 2 and c > 2:
            for _ in matrix[0, :c-1]:
                self.solution[_+1] = f"delete str1.({_})"
            return
        elif c < 2 and r > 2:
            for _ in matrix[:r-1, 0]:
                self.solution[_ + 1] = f"str1.({0}) + str2.({_})"
            return
        row, column = np.array(matrix.shape) - 1
        target = matrix[row, column]
        up = row - 1, column
        left = row, column - 1
        diag = row - 1, column - 1

        temp = min(matrix[diag], matrix[up], matrix[left])

        # 以下if控制语句的顺序不可变，即从up -> left -> diag顺序判断
        if target == temp + 1:
            if target == matrix[up] + 1:
                self.solution[target] = f"str1.({column - 1}) + str2.({row - 1})"
                return self.get_matrix_solution(matrix[:row, :])
            elif target == matrix[left] + 1:
                self.solution[target] = f"delete str1.({column - 1})"
                return self.get_matrix_solution(matrix[:, :column])
            elif target == matrix[diag] + 1:
                self.solution[target] = f"str1.({column - 1}) -> str2.({row - 1})"
                return self.get_matrix_solution(matrix[:row,:column])
        return self.get_matrix_solution(matrix[:row,:column])
    
    def get_solution(self):
        return sorted(self.solution.items())

In [10]:
test = EditDistance()
step, matrix_ = test.edit_distance_with_matrix('welecome', 'stinfwelcom')
test.get_matrix_solution(matrix_)
test.get_solution()

[(1, 'str1.(0) + str2.(0)'),
 (2, 'str1.(0) + str2.(1)'),
 (3, 'str1.(0) + str2.(2)'),
 (4, 'str1.(0) + str2.(3)'),
 (5, 'str1.(0) + str2.(4)'),
 (6, 'delete str1.(3)'),
 (7, 'delete str1.(7)')]

In [11]:
test_ = EditDistance()
step, matrix_ = test_.edit_distance_with_matrix('stinfwelcom', 'welecome')
test_.get_matrix_solution(matrix_)
print(matrix_)
print(step)
test_.get_solution()

[[ 0  1  2  3  4  5  6  7  8  9 10 11]
 [ 1  1  2  3  4  5  5  6  7  8  9 10]
 [ 2  2  2  3  4  5  6  5  6  7  8  9]
 [ 3  3  3  3  4  5  6  6  5  6  7  8]
 [ 4  4  4  4  4  5  6  6  6  6  7  8]
 [ 5  5  5  5  5  5  6  7  7  6  7  8]
 [ 6  6  6  6  6  6  6  7  8  7  6  7]
 [ 7  7  7  7  7  7  7  7  8  8  7  6]
 [ 8  8  8  8  8  8  8  7  8  9  8  7]]
7


[(1, 'delete str1.(0)'),
 (2, 'delete str1.(1)'),
 (3, 'delete str1.(2)'),
 (4, 'delete str1.(3)'),
 (5, 'delete str1.(4)'),
 (6, 'str1.(7) + str2.(3)'),
 (7, 'str1.(10) + str2.(7)')]

## 2.Edit distance with recursion

In [308]:
from functools import lru_cache

In [314]:
del solution_
solution_ = {}
@lru_cache(maxsize=2**10)
def edit_distance(string1, string2):
    if string1 and string2:
    
        tail_s1 = string1[-1]
        tail_s2 = string2[-1]

        candidates = [
            (edit_distance(string1[:-1], string2) + 1, 'DEL {}'.format(tail_s1)),  # string 1 delete tail
            (edit_distance(string1, string2[:-1]) + 1, 'ADD {}'.format(tail_s2)),  # string 1 add tail of string2
        ]

        if tail_s1 == tail_s2:
            both_forward = (edit_distance(string1[:-1], string2[:-1]) + 0, '')
        else:
            both_forward = (edit_distance(string1[:-1], string2[:-1]) + 1, 'SUB {} => {}'.format(tail_s1, tail_s2))

        candidates.append(both_forward)

        min_distance, operation = min(candidates, key=lambda x: x[0])

        solution_[(string1, string2)] = operation 

        return min_distance
    
    return len(string1) if string1 else len(string2)

In [315]:
edit_distance('abcdef', 'ab')

4

In [316]:
solution_

{('a', 'a'): '',
 ('a', 'ab'): 'ADD b',
 ('ab', 'a'): 'DEL b',
 ('ab', 'ab'): '',
 ('abc', 'a'): 'DEL c',
 ('abc', 'ab'): 'DEL c',
 ('abcd', 'a'): 'DEL d',
 ('abcd', 'ab'): 'DEL d',
 ('abcde', 'a'): 'DEL e',
 ('abcde', 'ab'): 'DEL e',
 ('abcdef', 'a'): 'DEL f',
 ('abcdef', 'ab'): 'DEL f'}

In [243]:
edit_distance('*executio123', 'ies*')

11

In [263]:
edit_distance('iest', '#executeio123')

11

## 3.Longest common substring with matrix
> 最长公共子串

In [244]:
def longest_substring_matrix(str1, str2):
    """
    Return all the longest common substring of two strings.
    """
    result = []
    row, column = len(str1), len(str2)
    matrix = np.zeros((row, column), dtype=int)
    for i, char1 in enumerate(str1):
        for j, char2 in enumerate(str2):
            if char1 == char2:
                matrix[i, j] = matrix[i-1, j-1] + 1
    max_ = matrix.max()
    args = np.argwhere(matrix == max_)
    for arg in args:
        index_ = arg[0]
        result.append(str1[index_ - max_ + 1 : index_ + 1])
    return max_, result

In [245]:
longest_substring_matrix('hellowel', 'hellfadellolflowedlfalowefsello')

(4, ['hell', 'ello', 'ello', 'lowe', 'lowe'])

In [246]:
longest_substring_matrix('fish', 'fhish')

(3, ['ish'])

## 4.Longest common subsequence with matrix
> 最长公共子序列

In [2]:
def longest_subsequence_matrix(str1, str2):
    """
    Return the longest common subsequence of two strings, return one of them if there are mutiple solution with same 
    length.
    """
    str1, str2 = ' '+ str1, ' '+ str2
    index_ = []
    len_1, len_2 = len(str1), len(str2)
    if str1 and str2:
        matrix = np.zeros((len_1, len_2), dtype=np.int)
        for i, char1 in enumerate(str1):
            if i == 0: continue
            for j, char2 in enumerate(str2):
                if j == 0: continue
                if char1 == char2:
                    matrix[i, j] = matrix[i - 1, j - 1] + 1
                else:
                    matrix[i, j] = max(matrix[i, j - 1], matrix[i - 1, j])
                    
        target = np.max(matrix)
        
        for i in range(1, target + 1):
            # 找出第一个加1的数
            # 行小于列，以行索引取最小，反之，以列索引取最小
            flag = 0 if len_1 < len_2 else 1
            temp = min(np.argwhere(matrix == i), key = lambda x: x[flag])
            index_.append(temp)  
            
        result = [str1[i[0]] for i in index_]

        return result, target
    else:
        return len_1 -1  if str1 else len_2 - 1

In [7]:
longest_subsequence_matrix('h1e2ll3o2y34o234u', 'helloyou')

(['h', 'e', 'l', 'l', 'o', 'y', 'o', 'u'], 8)

In [8]:
longest_subsequence_matrix('ABCDEFG', 'BCDAFG')

(['B', 'C', 'D', 'F', 'G'], 5)

## 5.dijsktra算法
![](./graph.png)

In [12]:
inf = float('inf')

In [13]:
graph = {
    'start': {'a': 6, 'b': 2},
    'a': {'fin' : 1},
    'b': {'a': 3, 'fin': 5},
    'fin': {}
}
costs = {
    'a': 6,
    'b': 2,
    'fin': inf
}
parents = {
    'a': 'start',
    'b': 'start',
    'fin': None
}
processed = []

In [19]:
del r
r = ['start']
def find_lowest_cost_node(costs):
    result = list(filter(lambda x: x[0] not in processed, sorted(costs.items(), key=lambda x: x[1])))
    return result[0][0] if result else None

def get_next_(result, key='start'):
    for k, v in result.items():
        if k == key:
            r.append(v)
            get_next_(result, v)
    return r
        
def dijsktra_algorithm(graph=graph, costs=costs, parents=parents):
    node = find_lowest_cost_node(costs)
    result = {}
    while node is not None:
        cost = costs[node]
        neighbors = graph[node]
        for n in neighbors.keys():
            new_cost = cost + neighbors[n]
            if costs[n] > new_cost:
                costs[n] = new_cost
                parents[n] = node
        processed.append(node)
        node = find_lowest_cost_node(costs)
    result = {v:k for k,v in parents.items()}
    result = get_next_(result)
    return ' -> '.join(result), costs['fin']

In [20]:
dijsktra_algorithm()

('start -> b -> a -> fin', 6)