In [1]:
import numpy as np
import pandas as pd

In [2]:
seq1 = "HGSAQVKGHG"               
seq2 = "KTEAEMKASEDLKKHGT"             
gap_open = -8     
gap_extend = -8        

In [3]:
def load_blosum45(filepath):
    with open(filepath, "r") as f:
        lines = f.readlines()

        start = None
        for i, line in enumerate(lines):
            if line.strip().startswith("A "):
                start = i
                break
        header = lines[start].split()
        matrix = []
        for l in lines[start+1:]:
            if not l.strip():
                break
            matrix.append(l.split())

    df = pd.DataFrame(matrix, columns=["AA"] + header)
    df.set_index("AA", inplace=True)
    df = df.astype(int)
    return df

In [None]:
blosum45 = load_blosum45("dataset_assignment_module_9.txt")

In [None]:
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
print(blosum45)

In [8]:
n, m = len(seq1), len(seq2)
score_matrix = np.zeros((n+1, m+1))
trace_matrix = np.zeros((n+1, m+1), dtype=str)

In [9]:
for i in range(1, n+1):
    score_matrix[i, 0] = i * gap_open
    trace_matrix[i, 0] = 'Up' # delete
for j in range(1, m+1):
    score_matrix[0, j] = j * gap_open
    trace_matrix[0, j] = 'Left' #insert

In [None]:
for i in range(1, n+1):
    for j in range(1, m+1):
        match = score_matrix[i-1, j-1] + blosum45.loc[seq1[i-1], seq2[j-1]]
        delete = score_matrix[i-1, j] + gap_extend
        insert = score_matrix[i, j-1] + gap_extend

        score_matrix[i, j] = max(match, delete, insert)

        if score_matrix[i, j] == match:
            trace_matrix[i, j] = 'Diagonal'  # Diagonal
        elif score_matrix[i, j] == delete:
            trace_matrix[i, j] = 'Up'  # Up
        else:
            trace_matrix[i, j] = 'Left'  # Left

print(score_matrix)
print(trace_matrix)

In [14]:
# store alignment
align1 = ""
align2 = ""
i = n
j = m

while i > 0 or j > 0:
    if trace_matrix[i, j] == 'Diagonal':
        align1 += seq1[i-1]
        align2 += seq2[j-1]
        i -= 1
        j -= 1
    elif trace_matrix[i, j] == 'Up':
        align1 += seq1[i-1]
        align2 += '-'
        i -= 1
    elif trace_matrix[i, j] == 'Left':
        align1 += '-'
        align2 += seq2[j-1]
        j -= 1
    else:
         print('Traceback stopped at:', i, j, repr(trace_matrix[i, j]))
        break

align1 = align1[::-1]
align2 = align2[::-1]

In [None]:
print("Sequence 1:", seq1)
print("Sequence 2:", seq2)
print("\nOptimal Global Alignment:\n")
print(align1)
print(align2)
print("\nFinal Alignment Score:", score_matrix[n, m])