In [12]:
import numpy as np

In [14]:
#the log odds of one amino acid being substituted by another
blosum50 = np.loadtxt("blosum50.txt", dtype = 'i')

In [149]:
from termcolor import colored

In [222]:
#assumes that the blosum50 field is accessible in its environment
def needleman_wunsch(str2, str1):
    
    #our penalty score
    d = -8
    
    #initialize an empty matrix
    height = len(str1) + 1
    width = len(str2) + 1
    
    #explicitly object data type so that we can store tuples
    matrix = np.zeros((height, width), dtype = "object")
    
    #initialization step, mark the cells at the sides of the matrix
    #as incrementing by the penalty, and mark the "arrow" directions all as north/west
    count = 0
    for i in range(width):
        matrix[0][i] = (count,"w")
        count += d
    count = 0
    for i in range(height):
        matrix[i][0] = (count, "n")
        count +=d
    
    #map the characters with their indices
    blosum_index_map = dict(zip(['A','R','N','D','C','Q','E','G','H','I','L','K',
                    'M','F','P','S','T','W','Y','V'],
                      [i for i in range(len(blosum50))]))
    
    #given the bases to consider swapping, get the blosum log probability score of swapping them
    #by looking up the corresponding index of the chars and then accessing that index of the matrix 
    def get_blosum_score(char1, char2):
        i = blosum_index_map[char1]
        j = blosum_index_map[char2]
        return blosum50[i][j]
    
    for i in range(1, height):
        
        for j in range(1, width):
            
            char1 = str1[i - 1]
            char2 = str2[j - 1]
            score = get_blosum_score(char2, char1)
            
            #direct implementation of the dynamic programming step
            matrix[i][j] = max([
                #each cell contains a tuple of score and direction so extract the score
                (matrix[i-1][j-1][0] + score,"nw"),
                (matrix[i-1][j][0] + d,"n"),
                (matrix[i][j-1][0] + d,"w")
            ], key = lambda tup: tup[0])
        
    #F matrix has been generated, now need to perform backwards algorithm
    #start at the bottom right of the matrix
    i = height - 1
    j = width - 1
    
    #path of cell transitions we will build up
    path = []
    
    while True:
        
        #end condition, we have retraced the path
        if i == 0 and j == 0:
            break
        
        #the direction we need to go in
        dr = matrix[i][j][1]
                
        if dr == "nw":
            path.append((i,j))
            i -= 1
            j -= 1
            continue
        if dr == "n":
            path.append((i,j))
            i -= 1
            continue
        if dr == "w":
            path.append((i,j))
            j -= 1
    
    path.append((0,0))
    
    print("\nIndices of path found starting from bottom right:\n\n" + str(path) + "\n")
    
    print("Solution visualization on F matrix:\n")
    
    for i in range(height):
        
        for j in range(width):
            
            if((i,j) in path):
                print(colored(matrix[i][j][0], "red"), end = '')
            else:
                print(matrix[i][j][0], end = '')
        
        #print a new line
        print()        

In [223]:
needleman_wunsch("HEAGAWGHEE", "PAWHEAE")


Indices of path found starting from bottom right:

[(7, 10), (6, 9), (5, 9), (4, 8), (3, 7), (3, 6), (2, 5), (1, 4), (1, 3), (0, 2), (0, 1), (0, 0)]

Solution visualization on F matrix:

[31m0[0m[31m-8[0m[31m-16[0m-24-32-40-48-56-64-72-80
-8-2-9[31m-17[0m[31m-25[0m-33-41-49-57-65-73
-16-10-3-4-12[31m-20[0m-28-36-44-52-60
-24-18-11-6-7-15[31m-5[0m[31m-13[0m-21-29-37
-32-14-18-13-8-9-13-7[31m-3[0m-11-19
-40-22-8-16-16-9-12-15-7[31m3[0m-5
-48-30-16-3-11-11-12-12-15[31m-5[0m2
-56-38-24-11-6-12-14-15-12-9[31m1[0m


In [224]:
needleman_wunsch("SALPQPTTPVSSFTSGSMLGRTDTALTNTYSAL", "PSPTMEAVTSVEASTASHPHSTSSYFATTYYHLY")


Indices of path found starting from bottom right:

[(34, 33), (33, 33), (32, 32), (31, 31), (30, 30), (29, 29), (28, 28), (27, 27), (26, 26), (25, 25), (24, 24), (23, 23), (22, 22), (21, 21), (20, 20), (19, 19), (18, 18), (17, 17), (16, 16), (15, 15), (14, 14), (13, 13), (13, 12), (12, 11), (11, 10), (10, 9), (9, 8), (8, 7), (7, 6), (6, 5), (5, 4), (4, 3), (3, 2), (2, 1), (1, 0), (0, 0)]

Solution visualization on F matrix:

[31m0[0m-8-16-24-32-40-48-56-64-72-80-88-96-104-112-120-128-136-144-152-160-168-176-184-192-200-208-216-224-232-240-248-256-264
[31m-8[0m-1-9-17-14-22-30-38-46-54-62-70-78-86-94-102-110-118-126-134-142-150-158-166-174-182-190-198-206-214-222-230-238-246
-16[31m-3[0m0-8-16-14-22-28-36-44-52-57-65-73-81-89-97-105-113-121-129-137-145-153-161-169-177-185-193-201-209-217-225-233
-24-11[31m-4[0m-42-6-4-12-20-26-34-42-50-58-66-74-82-90-98-106-114-122-130-138-146-154-162-170-178-186-194-202-210-218
-32-19-11[31m-5[0m-51-71-7-15-23-31-39-47-53-61-69-77-85-93-101-

In [None]:
#assumes that the blosum50 field is accessible in its environment
def smith_waterman(str2, str1):
    
    #our penalty score
    d = -8
    
    #initialize an empty matrix
    height = len(str1) + 1
    width = len(str2) + 1
    
    #explicitly object data type so that we can store tuples
    matrix = np.zeros((height, width), dtype = "object")
    
    #initialization step, mark the cells at the sides of the matrix
    #as incrementing by the penalty, and mark the "arrow" directions all as north/west
    count = 0
    for i in range(width):
        matrix[0][i] = (count,"w")
        count += d
    count = 0
    for i in range(height):
        matrix[i][0] = (count, "n")
        count +=d
    
    for i in range(1, height):
        
        for j in range(1, width):
            
            char1 = str1[i - 1]
            char2 = str2[j - 1]
            score = get_blosum_score(char2, char1)
            
            #direct implementation of the dynamic programming step
            matrix[i][j] = max([
                #each cell contains a tuple of score and direction so extract the score
                (matrix[i-1][j-1][0] + score,"nw"),
                (matrix[i-1][j][0] + d,"n"),
                (matrix[i][j-1][0] + d,"w")
            ], key = lambda tup: tup[0])
        
    #F matrix has been generated, now need to perform backwards algorithm
    #start at the bottom right of the matrix
    i = height - 1
    j = width - 1
    
    #path of cell transitions we will build up
    path = []
    
    while True:
        
        #end condition, we have retraced the path
        if i == 0 and j == 0:
            break
        
        #the direction we need to go in
        dr = matrix[i][j][1]
                
        if dr == "nw":
            path.append((i,j))
            i -= 1
            j -= 1
            continue
        if dr == "n":
            path.append((i,j))
            i -= 1
            continue
        if dr == "w":
            path.append((i,j))
            j -= 1
    
    print("\nIndices of path found starting from bottom right:\n\n" + str(path) + "\n")
    
    print("Solution visualization on F matrix:\n")
    
    for i in range(height):
        
        for j in range(width):
            
            if((i,j) in path or i == 0 and j == 0):
                print(colored(matrix[i][j][0], "red"), end = '')
            else:
                print(matrix[i][j][0], end = '')
        
        #print a new line
        print()        