In [1]:
import numpy as np

In [9]:
seqa = 'ALH'
seqb = 'AGH'

class NW():
    def __init__(
            self, seqa:str, seqb:str, 
            match, mismatch, gap
    ):
        self.seqa = seqa
        self.seqb = seqb
        self.matrix = np.zeros((len(seqa)+1, len(seqb)+1))
        self.matrix.fill(np.inf)
        self.matrix[:, 0] = -np.arange(len(seqa)+1)
        self.matrix[0, :] = -np.arange(len(seqb)+1)
        self.match_score = match
        self.mismatch_score = mismatch
        self.gap_score = gap
                
    def _scoring(self):
        for row_idx, i in enumerate(self.matrix):
            for col_idx, j in enumerate(i):
                
                if j != np.inf:
                    pass
                else:
                    '''
                    lt: right top (diagonal)
                    lt_: rt + match or mismatch
                    rb: right bottom
                    lt: left top
                    '''
                    lt = self.matrix[row_idx-1, col_idx-1]
                    lt_ = lt + self.match_score if self.seqa[row_idx-1] == self.seqb[col_idx-1] else lt + self.mismatch_score
                    
                    lb = self.matrix[row_idx, col_idx-1]
                    rt = self.matrix[row_idx-1, col_idx]

                    score = np.max([lt_, lb+self.gap_score, rt+self.gap_score])

                    self.matrix[row_idx, col_idx] = score

        return self.matrix
    
    def _traceback(self,):
        '''
        Initialize traceback procedure: 
        row: the row coordinate of the current value
        col: the column coordinate of the current value
        '''
        row = self.matrix.shape[0] - 1
        col = self.matrix.shape[1] - 1
        '''
        tv: traceback value(score)
        '''
        traceback_list = []
        traceback = self.matrix[row, col]
        traceback_list.append(traceback)

        while row != 1 or col != 1:
            '''
            window_state: score as key, coordinates(tuple) as value
            lb: left bottom
            lt: left top
            rt: right top
            '''
            window_state = {}
            lb = self.matrix[row, col-1]
            lt = self.matrix[row-1, col-1]
            rt = self.matrix[row-1, col]
            window_state[lb] = (row, col-1)
            window_state[lt] = (row-1, col-1)
            window_state[rt] = (row-1, col)

            print(window_state)

            traceback = max(lb, lt, rt)
            traceback_list.append(traceback)
            for i in window_state:
                if traceback == i:
                    row = window_state[i][0]
                    col = window_state[i][1]

        return traceback_list

    
    def align(self):
        score_matrix = self._scoring()
        print(score_matrix)
        output = self._traceback()
        return output

            

    
nw = NW(seqa, seqb, 1, -1, -1)
m = nw.align()
print(m)
                    

[[ 0. -1. -2. -3.]
 [-1.  1.  0. -1.]
 [-2.  0.  0. -1.]
 [-3. -1. -1.  1.]]
{-1.0: (2, 3), 0.0: (2, 2)}
{0.0: (1, 2), 1.0: (1, 1)}
[1.0, 0.0, 1.0]


In [11]:
seqa1 = 'AEGHK'
seqb1 = 'AELHK'
nw1 = NW(seqa1, seqb1, 1, -1, -1)
m1 = nw1.align()
print(m1)

[[ 0. -1. -2. -3. -4. -5.]
 [-1.  1.  0. -1. -2. -3.]
 [-2.  0.  2.  1.  0. -1.]
 [-3. -1.  1.  1.  0. -1.]
 [-4. -2.  0.  0.  2.  1.]
 [-5. -3. -1. -1.  1.  3.]]
{1.0: (4, 5), 2.0: (4, 4)}
{0.0: (3, 4), 1.0: (3, 3)}
{1.0: (2, 3), 2.0: (2, 2)}
{0.0: (1, 2), 1.0: (1, 1)}
[3.0, 2.0, 1.0, 2.0, 1.0]
