In [11]:
from Bio import pairwise2
from Bio.pairwise2 import format_alignment
import random




In [10]:
!pip install Bio

Collecting Bio
  Downloading bio-1.7.1-py3-none-any.whl.metadata (5.7 kB)
Collecting biopython>=1.80 (from Bio)
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting gprofiler-official (from Bio)
  Downloading gprofiler_official-1.0.0-py3-none-any.whl.metadata (11 kB)
Collecting mygene (from Bio)
  Downloading mygene-3.2.2-py2.py3-none-any.whl.metadata (10 kB)
Collecting biothings-client>=0.2.6 (from mygene->Bio)
  Downloading biothings_client-0.4.1-py3-none-any.whl.metadata (10 kB)
Downloading bio-1.7.1-py3-none-any.whl (280 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m281.0/281.0 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m30.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gprofiler_official-1.0.0-py3-none-any.whl (9.3

In [13]:
def generate_dna(length):
    nucleotides = ['A', 'T', 'G', 'C']
    random_sequence = ''.join(random.choice(nucleotides) for _ in range(length))
    return random_sequence

In [5]:
def needleman_wunsch(seq1, seq2, match, mismatch, indel):
    n, m = len(seq1), len(seq2)
    score_matrix = [[0 for _ in range(m + 1)] for _ in range(n + 1)]

    for i in range(1, n + 1):
        score_matrix[i][0] = i * indel
    for j in range(1, m + 1):
        score_matrix[0][j] = j * indel

    for i in range(1, n + 1):
        for j in range(1, m + 1):
            match_score = score_matrix[i - 1][j - 1] + (match if seq1[i - 1] == seq2[j - 1] else mismatch)
            delete_score = score_matrix[i - 1][j] + indel
            insert_score = score_matrix[i][j - 1] + indel
            score_matrix[i][j] = max(match_score, delete_score, insert_score)

    align1, align2 = "", ""
    i, j = n, m
    while i > 0 or j > 0:
        if i > 0 and j > 0 and score_matrix[i][j] == score_matrix[i - 1][j - 1] + (match if seq1[i - 1] == seq2[j - 1] else mismatch):
            align1 = seq1[i - 1] + align1
            align2 = seq2[j - 1] + align2
            i -= 1
            j -= 1
        elif i > 0 and score_matrix[i][j] == score_matrix[i - 1][j] + indel:
            align1 = seq1[i - 1] + align1
            align2 = "-" + align2
            i -= 1
        else:
            align1 = "-" + align1
            align2 = seq2[j - 1] + align2
            j -= 1

    return align1, align2, score_matrix[n][m]

In [6]:
def smith_waterman(seq1, seq2, match, mismatch, indel):
    n, m = len(seq1), len(seq2)

    score_matrix = [[0 for _ in range(m + 1)] for _ in range(n + 1)]
    max_score = 0
    max_pos = (0, 0)

    for i in range(1, n + 1):
        for j in range(1, m + 1):
            match_score = score_matrix[i - 1][j - 1] + (match if seq1[i - 1] == seq2[j - 1] else mismatch)
            delete_score = score_matrix[i - 1][j] + indel
            insert_score = score_matrix[i][j - 1] + indel
            score_matrix[i][j] = max(0, match_score, delete_score, insert_score)

            if score_matrix[i][j] > max_score:
                max_score = score_matrix[i][j]
                max_pos = (i, j)

    align1, align2 = "", ""
    i, j = max_pos
    while i > 0 and j > 0 and score_matrix[i][j] != 0:
        if score_matrix[i][j] == score_matrix[i - 1][j - 1] + (match if seq1[i - 1] == seq2[j - 1] else mismatch):
            align1 = seq1[i - 1] + align1
            align2 = seq2[j - 1] + align2
            i -= 1
            j -= 1
        elif score_matrix[i][j] == score_matrix[i - 1][j] + indel:
            align1 = seq1[i - 1] + align1
            align2 = "-" + align2
            i -= 1
        else:
            align1 = "-" + align1
            align2 = seq2[j - 1] + align2
            j -= 1

    return align1, align2, max_score

In [3]:
seq1 = "AAAGTGA"
seq2 = "CAATGC"

In [18]:
align1, align2, score = needleman_wunsch(seq1, seq2, 2, -1, -3)
print(align1)
print(align2)
print(score)

AAAGTGA
CAA-TGC
3


In [22]:
align1, align2, score = smith_waterman(seq1, seq2, 2, -1, -3)
print(align1)
print(align2)
print(score)

AAAG
AATG
5


In [24]:
alignments = pairwise2.align.globalms("AAAGTGA", "CAATGC", 2, -1, -3, -3)
for alignment in alignments:
    print(format_alignment(*alignment))

AAAGTGA
.|| ||.
CAA-TGC
  Score=3

