<a href="https://colab.research.google.com/github/abinayaa-148/Bio---Silicon-Labs/blob/main/Local_Alignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

# scoring setup
MATCH_SCORE = 1
MISMATCH_SCORE = -1
GAP_SCORE = -2

def smith_waterman(seq1, seq2):
    m, n = len(seq1), len(seq2)
    score_matrix = np.zeros((m+1, n+1), dtype=int)
    # Step 2: Matrix filling
    for i in range(1, m+1):
        for j in range(1, n+1):
            match = score_matrix[i-1, j-1] + (MATCH_SCORE if seq1[i-1] == seq2[j-1] else MISMATCH_SCORE)
            delete = score_matrix[i-1, j] + GAP_SCORE
            insert = score_matrix[i, j-1] + GAP_SCORE
            score_matrix[i, j] = max(0, match, delete, insert)
    # Step 3: Traceback
    align1, align2 = [], []
    i, j = np.unravel_index(np.argmax(score_matrix), score_matrix.shape)
    max_score = score_matrix[i, j]
    while i > 0 and j > 0 and score_matrix[i, j] > 0:
        current_score = score_matrix[i, j]
        diagonal_score = score_matrix[i-1, j-1]
        up_score = score_matrix[i-1, j]
        left_score = score_matrix[i, j-1]
        if current_score == diagonal_score + (MATCH_SCORE if seq1[i-1] == seq2[j-1] else MISMATCH_SCORE):
            align1.append(seq1[i-1])
            align2.append(seq2[j-1])
            i -= 1
            j -= 1
        elif current_score == up_score + GAP_SCORE:
            align1.append(seq1[i-1])
            align2.append('-')
            i -= 1
        elif current_score == left_score + GAP_SCORE:
            align1.append('-')
            align2.append(seq2[j-1])
            j -= 1
    align1.reverse()
    align2.reverse()
    aligned_seq1 = ''.join(align1)
    aligned_seq2 = ''.join(align2)
    return aligned_seq1, aligned_seq2, max_score, score_matrix



# Test case
seq1 = "ATGCATAGGC"
seq2 = "ATGCATGC"

# Call smith_waterman function and store the alignment scores
aligned_seq1, aligned_seq2, score, score_matrix = smith_waterman(seq1, seq2)

print("\nAlignment Score:", score)
print("\nAligned Sequences:")
print(aligned_seq1)
print(aligned_seq2)

# Convert score matrix to DataFrame (excluding the 0th row/column)
score_df = pd.DataFrame(
    score_matrix[1:, 1:],  # skip the first row and column (which are zeros)
    index=list(seq1),
    columns=list(seq2)
)

print("\nScore Matrix:")
print(score_df)


Alignment Score: 6

Aligned Sequences:
ATGCAT
ATGCAT

Score Matrix:
   A  T  G  C  A  T  G  C
A  1  0  0  0  1  0  0  0
T  0  2  0  0  0  2  0  0
G  0  0  3  1  0  0  3  1
C  0  0  1  4  2  0  1  4
A  1  0  0  2  5  3  1  2
T  0  2  0  0  3  6  4  2
A  1  0  1  0  1  4  5  3
G  0  0  1  0  0  2  5  4
G  0  0  1  0  0  0  3  4
C  0  0  0  2  0  0  1  4
