In [1]:
import numpy as np
import pandas as pd

In [2]:
seq1 = "HGSAQVKGHG"               
seq2 = "KTEAEMKASEDLKKHGT"             
gap_open = -8     
gap_extend = -8        

In [3]:
def load_blosum45(filepath):
    with open(filepath, "r") as f:
        lines = f.readlines()

        start = None
        for i, line in enumerate(lines):
            if line.strip().startswith("A "):
                start = i
                break
        header = lines[start].split()
        matrix = []
        for l in lines[start+1:]:
            if not l.strip():
                break
            matrix.append(l.split())

    df = pd.DataFrame(matrix, columns=["AA"] + header)
    df.set_index("AA", inplace=True)
    df = df.astype(int)
    return df

In [6]:
blosum45 = load_blosum45("C:\\Users\\biana\\Downloads\\dataset_assignment_module_9.txt")

In [7]:
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
print(blosum45)

    A  R  N  D   C  Q  E  G   H  I  L  K  M  F  P  S  T   W  Y  V  B  J  Z  X  \
AA                                                                              
A   5 -2 -1 -2  -1 -1 -1  0  -2 -1 -1 -1 -1 -2 -1  1  0  -2 -2  0 -1 -1 -1 -1   
R  -2  7  0 -1  -3  1  0 -2   0 -3 -2  3 -1 -2 -2 -1 -1  -2 -1 -2 -1 -3  1 -1   
N  -1  0  6  2  -2  0  0  0   1 -2 -3  0 -2 -2 -2  1  0  -4 -2 -3  5 -3  0 -1   
D  -2 -1  2  7  -3  0  2 -1   0 -4 -3  0 -3 -4 -1  0 -1  -4 -2 -3  6 -3  1 -1   
C  -1 -3 -2 -3  12 -3 -3 -3  -3 -3 -2 -3 -2 -2 -4 -1 -1  -5 -3 -1 -2 -2 -3 -1   
Q  -1  1  0  0  -3  6  2 -2   1 -2 -2  1  0 -4 -1  0 -1  -2 -1 -3  0 -2  4 -1   
E  -1  0  0  2  -3  2  6 -2   0 -3 -2  1 -2 -3  0  0 -1  -3 -2 -3  1 -3  5 -1   
G   0 -2  0 -1  -3 -2 -2  7  -2 -4 -3 -2 -2 -3 -2  0 -2  -2 -3 -3 -1 -4 -2 -1   
H  -2  0  1  0  -3  1  0 -2  10 -3 -2 -1  0 -2 -2 -1 -2  -3  2 -3  0 -2  0 -1   
I  -1 -3 -2 -4  -3 -2 -3 -4  -3  5  2 -3  2  0 -2 -2 -1  -2  0  3 -3  4 -3 -1   
L  -1 -2 -3 -3  -2 -2 -2 -3 

In [8]:
n, m = len(seq1), len(seq2)
score_matrix = np.zeros((n+1, m+1))
trace_matrix = np.zeros((n+1, m+1), dtype=str)

In [9]:
for i in range(1, n+1):
    score_matrix[i, 0] = i * gap_open
    trace_matrix[i, 0] = 'UP' # delete
for j in range(1, m+1):
    score_matrix[0, j] = j * gap_open
    trace_matrix[0, j] = 'Left' #insert

In [16]:
for i in range(1, n+1):
    for j in range(1, m+1):
        match = score_matrix[i-1, j-1] + blosum45.loc[seq1[i-1], seq2[j-1]]
        delete = score_matrix[i-1, j] + gap_extend
        insert = score_matrix[i, j-1] + gap_extend

        score_matrix[i, j] = max(match, delete, insert)

        if score_matrix[i, j] == match:
            trace_matrix[i, j] = 'Diagonal'  # Diagonal
        elif score_matrix[i, j] == delete:
            trace_matrix[i, j] = 'Up'  # Up
        else:
            trace_matrix[i, j] = 'Left'  # Left

print(score_matrix)
print(trace_matrix)

[[   0.   -8.  -16.  -24.  -32.  -40.  -48.  -56.  -64.  -72.  -80.  -88.
   -96. -104. -112. -120. -128. -136.]
 [  -8.   -1.   -9.  -16.  -24.  -32.  -40.  -48.  -56.  -64.  -72.  -80.
   -88.  -96. -104. -102. -110. -118.]
 [ -16.   -9.   -3.  -11.  -16.  -24.  -32.  -40.  -48.  -56.  -64.  -72.
   -80.  -88.  -96. -104.  -95. -103.]
 [ -24.  -17.   -7.   -3.  -10.  -16.  -24.  -32.  -39.  -44.  -52.  -60.
   -68.  -76.  -84.  -92. -100.  -93.]
 [ -32.  -25.  -15.   -8.    2.   -6.  -14.  -22.  -27.  -35.  -43.  -51.
   -59.  -67.  -75.  -83.  -91.  -99.]
 [ -40.  -31.  -23.  -13.   -6.    4.   -4.  -12.  -20.  -27.  -33.  -41.
   -49.  -57.  -65.  -73.  -81.  -89.]
 [ -48.  -39.  -31.  -21.  -13.   -4.    5.   -3.  -11.  -19.  -27.  -35.
   -40.  -48.  -56.  -64.  -72.  -80.]
 [ -56.  -43.  -39.  -29.  -21.  -12.   -3.   10.    2.   -6.  -14.  -22.
   -30.  -35.  -43.  -51.  -59.  -67.]
 [ -64.  -51.  -45.  -37.  -29.  -20.  -11.    2.   10.    2.   -6.  -14.
   -22.  -30.  -37.  -

In [14]:
# store alignment
align1 = ""
align2 = ""
i = n
j = m

while i > 0 or j > 0:
    if trace_matrix[i, j] == 'Delete':
        align1 += seq1[i-1]
        align2 += seq2[j-1]
        i -= 1
        j -= 1
    elif trace_matrix[i, j] == 'Up':
        align1 += seq1[i-1]
        align2 += '-'
        i -= 1
    elif trace_matrix[i, j] == 'Left':
        align1 += '-'
        align2 += seq2[j-1]
        j -= 1
    else:
        break

align1 = align1[::-1]
align2 = align2[::-1]

In [15]:
print("Sequence 1:", seq1)
print("Sequence 2:", seq2)
print("\nOptimal Global Alignment:\n")
print(align1)
print(align2)
print("\nFinal Alignment Score:", score_matrix[n, m])

Sequence 1: HGSAQVKGHG
Sequence 2: KTEAEMKASEDLKKHGT

Optimal Global Alignment:




Final Alignment Score: -28.0
