## Needleman-Wunsch algorithm implementation

Implementation of Needleman-Wunsch algorithm for global alignment.

In [15]:
from Bio.Align import substitution_matrices
import numpy as np

In [16]:
def getGlobalTables(s,t,m,gapPenalty):
    """The function gets two sequences to align as input (s, t), the substitution matrix (m), and the gap penalty (gapPenalty). 
    It returns a tuple with the dynamic matrix and the pointers matrix"""
    # Load substitution matrix
    subst = substitution_matrices.load(m)
    dynamic_matrix = []
    pointers_matrix = []
    # Fill both matrices with zeros
    for e in range(0,len(s)+1):
        dynamic_matrix.append([0]*(len(t)+1))
        pointers_matrix.append([0]*(len(t)+1))
    # Fill first row and first column
    for i in range(1,len(s)+1):
        dynamic_matrix[i][0] = dynamic_matrix[i-1][0] + gapPenalty
        pointers_matrix[i][0] = 2
    for j in range(1,len(t)+1):
        dynamic_matrix[0][j] = dynamic_matrix[0][j-1] + gapPenalty
        pointers_matrix[0][j] = 3
    
    # Use a nested loop to fill the rest of the matrices
    for i in range(1,len(s)+1):
        
        for j in range(1,len(t)+1):
            # Store the maximum
            dynamic_matrix[i][j] = max(dynamic_matrix[i-1][j-1]+subst[s[i-1],t[j-1]],
                                             dynamic_matrix[i-1][j]+subst[s[i-1],"*"],
                                             dynamic_matrix[i][j-1]+subst["*",t[j-1]])
            # Store the index of the maximum + 1
            pointers_matrix[i][j] = np.argmax((dynamic_matrix[i-1][j-1]+subst[s[i-1],t[j-1]],
                                             dynamic_matrix[i-1][j]+subst[s[i-1],"*"],
                                             dynamic_matrix[i][j-1]+subst["*",t[j-1]])) +1
    
    return (dynamic_matrix, pointers_matrix)


In [17]:
getGlobalTables('VIVALAVIDA','VIVALADAVIS',"BLOSUM62",-4)

([[0, -4, -8, -12, -16, -20, -24, -28, -32, -36, -40, -44],
  [-4, 4.0, 0.0, -4.0, -8.0, -12.0, -16.0, -20.0, -24.0, -28.0, -32.0, -36.0],
  [-8, 0.0, 8.0, 4.0, 0.0, -4.0, -8.0, -12.0, -16.0, -20.0, -24.0, -28.0],
  [-12, -4.0, 4.0, 12.0, 8.0, 4.0, 0.0, -4.0, -8.0, -12.0, -16.0, -20.0],
  [-16, -8.0, 0.0, 8.0, 16.0, 12.0, 8.0, 4.0, 0.0, -4.0, -8.0, -12.0],
  [-20, -12.0, -4.0, 4.0, 12.0, 20.0, 16.0, 12.0, 8.0, 4.0, 0.0, -4.0],
  [-24, -16.0, -8.0, 0.0, 8.0, 16.0, 24.0, 20.0, 16.0, 12.0, 8.0, 4.0],
  [-28, -20.0, -12.0, -4.0, 4.0, 12.0, 20.0, 21.0, 20.0, 20.0, 16.0, 12.0],
  [-32, -24.0, -16.0, -8.0, 0.0, 8.0, 16.0, 17.0, 20.0, 23.0, 24.0, 20.0],
  [-36, -28.0, -20.0, -12.0, -4.0, 4.0, 12.0, 22.0, 18.0, 19.0, 20.0, 24.0],
  [-40, -32.0, -24.0, -16.0, -8.0, 0.0, 8.0, 18.0, 26.0, 22.0, 18.0, 21.0]],
 [[0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
  [2, 1, 3, 1, 3, 3, 3, 3, 3, 1, 3, 3],
  [2, 2, 1, 3, 3, 3, 3, 3, 3, 3, 1, 3],
  [2, 1, 2, 1, 3, 3, 3, 3, 3, 1, 3, 3],
  [2, 2, 2, 2, 1, 3, 1, 3, 1, 3,

In [18]:
def getGlobalTrace(p):
    """It gets as input the pointers matrix (p) and returns the trace"""

    # Define i, j, and trace variables
    i,j,trace = len(p)-1, len(p[0])-1, []
    
    # Trace the pointers matrix
    while p[i][j] != 0:
        trace.append(p[i][j])
        if p[i][j] == 1:
            i -= 1
            j -= 1
        elif p[i][j] == 2:
            i -= 1
        else:
            j -= 1
    
    # Reverse trace list
    trace.reverse()
    return trace


In [19]:
d,p = getGlobalTables('VIVALAVIDA','VIVALADAVIS',"BLOSUM62",-4)    
getGlobalTrace(p)

[1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1]

In [20]:
def getGlobalAlignment(s, t, trace):
    """It gets as input two sequences to align (s, t) and a trace, and it returns the obtained alignment."""
    # Define the variables: i, j, a, b
    i,j,a,b = 0,0,"",""

    # Iterate over trace
    for k in range(0,len(trace)):
        if trace[k] == 1:
            a += s[i]
            b += t[j]
            i += 1
            j += 1
        elif trace[k] == 2:
            a += s[i]
            b += "-"
            i += 1
        else:
            a += "-"
            b += t[j]
            j += 1
    return (a,b)


In [21]:
d,p = getGlobalTables('VIVALAVIDA','VIVALADAVIS',"BLOSUM62",-4)    
trace = getGlobalTrace(p)
getGlobalAlignment('VIVALAVIDA','VIVALADAVIS',trace)    

('VIVALA-VIDA', 'VIVALADAVIS')