### Import Library yang Dibutuhkan

In [None]:
!pip install BioPython
from Bio import SeqIO
from enum import IntEnum
import numpy as np
from google.colab import files



## 1. Global Alignment (Needleman-Wunsch)

In [None]:
# Fungsi membaca file fasta
def read_fasta(file):
    return [str(record.seq) for record in SeqIO.parse(file, "fasta")]

# Fungsi Needleman-Wunsch untuk global alignment
def needleman_wunsch(seq1, seq2):
    m = len(seq1)
    n = len(seq2)

    # Scoring
    match = 1
    mismatch = -1
    gap = -1

    # Matriks kosong untuk skoring dan tracing
    score_matrix = np.zeros((m + 1, n + 1), dtype=int)
    traceback_matrix = np.zeros((m + 1, n + 1), dtype=int)

    # Inisialisasi skoring matriks
    for i in range(m + 1):
        score_matrix[i][0] = gap * i
    for j in range(n + 1):
        score_matrix[0][j] = gap * j

    # Melakukan perhitungan skoring pada matriks
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            match_score = score_matrix[i - 1][j - 1] + (match if seq1[i - 1] == seq2[j - 1] else mismatch)
            delete_score = score_matrix[i - 1][j] + gap
            insert_score = score_matrix[i][j - 1] + gap

            scores = [match_score, delete_score, insert_score]
            max_score = max(scores)

            score_matrix[i][j] = max_score

            if max_score == match_score:
                traceback_matrix[i][j] = 1
            elif max_score == delete_score:
                traceback_matrix[i][j] = 2
            else:
                traceback_matrix[i][j] = 3

    # Tracing back
    aligned_seq1 = ""
    aligned_seq2 = ""
    i, j = m, n

    while i > 0 or j > 0:
        if traceback_matrix[i][j] == 1:
            aligned_seq1 = seq1[i - 1] + aligned_seq1
            aligned_seq2 = seq2[j - 1] + aligned_seq2
            i -= 1
            j -= 1
        elif traceback_matrix[i][j] == 2:
            aligned_seq1 = seq1[i - 1] + aligned_seq1
            aligned_seq2 = '-' + aligned_seq2
            i -= 1
        else:
            aligned_seq1 = '-' + aligned_seq1
            aligned_seq2 = seq2[j - 1] + aligned_seq2
            j -= 1

    # Hitung skor alignment
    alignment_score = 0
    for i in range(len(aligned_seq1)):
        if aligned_seq1[i] == aligned_seq2[i]:
            alignment_score += match
        else:
            alignment_score += mismatch

    return aligned_seq1, aligned_seq2, alignment_score

if __name__ == "__main__":
    # Upload file fasta
    nama_file = ['african-swine-fever-virus_BA71V_genomic.fna',
                 'african-swine-fever-virus_E75_genomic.fna'
                 ]

    for nama_file in nama_file:
        uploaded_file = files.upload()
        with open(nama_file, 'wb') as f:
            f.write(next(iter(uploaded_file.values())))

    # Membaca file fasta
    genome1 = read_fasta('african-swine-fever-virus_BA71V_genomic.fna')
    genome2 = read_fasta('african-swine-fever-virus_E75_genomic.fna')

    # Melakukan Needleman-Wunsch global alignment
    output_1, output_2, alignment_score = needleman_wunsch(genome1[0], genome2[0])

    # Menampilkan hasil alignment
    print("Hasil Global Sequence Alignment:")
    print(output_1)
    print(output_2)

    # Menampilkan skor alignment
    print(f"Skor Alignment: {alignment_score}")


Saving african-swine-fever-virus_BA71V_genomic.fna to african-swine-fever-virus_BA71V_genomic (2).fna


Saving african-swine-fever-virus_E75_genomic.fna to african-swine-fever-virus_E75_genomic (2).fna
Hasil Global Sequence Alignment:
-TATA-AG-TATA---TTA--TATATTTTATAATTTTATATATATAAAGCAGGTACAATTTTATTATATAGTGCAGATCTTTTTTGTGAAAAAAAAATAATTATTTTTTTGC-GCGCCGGAATAAT-TATTTTTTTTGCGCGGAAATATTT-T-TGCAGTGGCGATT-G-ATT--TTTTTTTCAGTTGTCTGCCCGTGGC-GGA--ATAATTATTTTTTCGCCGGAAAGCATTTTGTAGTTTCACAAGATGCAGAATTTGGC--G-GGCCGGAATAATTATTTTTTCGCCGGAAAGTA-TTTTG-TGTA-CCGTAGTTTCACTAG-ATTTCGCGGTGCATACCGGTGGGCGATAGGTATTAAAA--TAATTTGTA-C-TGCTGTAGGCGTTAAAGATTAA-AATTATTATTACTGCTGTAGGCGTTAAAC-ATTAAACTTATTACTACTGCTATAGGCGTTAAACATTAAAATTATTACTACTGTTGTAGGCGTTAAACATTAAAATTATTACTACTGCTGTAGGCGTCAAAGATTAAAATTATTACTACTGCTGTAGGCGTTAAACA-TTAAACTTATTACTAC-TGCTATAGG-CGTTAAACATTAAAATTATTACTAC-TGT-T-GTA--GGCGTCAAAGATTAAAATTATTACTACTGCTGTAG-GCGTTA--AACATTA-AAATTATTAC-TACTG-CTGTAGGCGT-TA-AACATTA--AAATTATTACTACTGT--TGTAGGCGTTAAACATTAA--A-AT-TATTA-CT-ACTGTT-GTAGGCGTCAAAGATTAAAATTATTACTACTGCT-GTAGGCGTTAAACATTAAAATTATTACTACTGCTGTAGGCGTTAAACAT

## 2. Local Alignment (Smith-Waterman)

In [None]:
# Fungsi membaca file fasta
def read_fasta(file):
    return [str(record.seq) for record in SeqIO.parse(file, "fasta")]

# Fungsi Smith-Waterman untuk lokal alignment
def smith_waterman(seq1, seq2):

    # Konstanta untuk skoring
    class Score(IntEnum):
        MATCH = 1
        MISMATCH = -1
        GAP = -1

    # Konstanta untuk tracing back
    class Trace(IntEnum):
        STOP = 0
        LEFT = 1
        UP = 2
        DIAGONAL = 3

    # Matriks kosong untuk skoring dan tracing
    row = len(seq1) + 1
    col = len(seq2) + 1
    matrix = np.zeros(shape=(row, col), dtype=int)
    tracing_matrix = np.zeros(shape=(row, col), dtype=int)

    max_score = 0
    max_index = (-1, -1)

    # Menghitung skor untuk semua cell matriks
    for i in range(1, row):
        for j in range(1, col):
            # Menghitung skor diagonal (match score)
            match_value = Score.MATCH if seq1[i - 1] == seq2[j - 1] else Score.MISMATCH
            diagonal_score = matrix[i - 1, j - 1] + match_value

            # Menghitung skor gap vertikal
            vertical_score = matrix[i - 1, j] + Score.GAP

            # Menghitung skor gap horizontal
            horizontal_score = matrix[i, j - 1] + Score.GAP

            # Mengambil nilai tertinggi
            matrix[i, j] = max(0, diagonal_score, vertical_score, horizontal_score)

            # Tracing back
            if matrix[i, j] == 0:
                tracing_matrix[i, j] = Trace.STOP

            elif matrix[i, j] == horizontal_score:
                tracing_matrix[i, j] = Trace.LEFT

            elif matrix[i, j] == vertical_score:
                tracing_matrix[i, j] = Trace.UP

            elif matrix[i, j] == diagonal_score:
                tracing_matrix[i, j] = Trace.DIAGONAL

            # Tracing cell dengan nilai paling tinggi
            if matrix[i, j] >= max_score:
                max_index = (i,j)
                max_score = matrix[i, j]

    # Inisialisasi tracing variabel
    aligned_seq1 = ""
    aligned_seq2 = ""
    current_aligned_seq1 = ""
    current_aligned_seq2 = ""
    (max_i, max_j) = max_index

    # Tracing dan menenemukan jalur untuk local alignment
    while tracing_matrix[max_i, max_j] != Trace.STOP:
        if tracing_matrix[max_i, max_j] == Trace.DIAGONAL:
            current_aligned_seq1 = seq1[max_i - 1]
            current_aligned_seq2 = seq2[max_j - 1]
            max_i = max_i - 1
            max_j = max_j - 1

        elif tracing_matrix[max_i, max_j] == Trace.UP:
            current_aligned_seq1 = seq1[max_i - 1]
            current_aligned_seq2 = '-'
            max_i = max_i - 1

        elif tracing_matrix[max_i, max_j] == Trace.LEFT:
            current_aligned_seq1 = '-'
            current_aligned_seq2 = seq2[max_j - 1]
            max_j = max_j - 1

        aligned_seq1 = aligned_seq1 + current_aligned_seq1
        aligned_seq2 = aligned_seq2 + current_aligned_seq2

    aligned_seq1 = aligned_seq1[::-1]
    aligned_seq2 = aligned_seq2[::-1]

    return aligned_seq1, aligned_seq2

# Fungsi untuk menghitung skor alignment
def calculate_alignment_score(seq1, seq2):
    # Konstanta skoring
    class Score(IntEnum):
        MATCH = 1
        MISMATCH = -1
        GAP = -1

    score = 0
    for i in range(len(seq1)):
        if seq1[i] == '-' or seq2[i] == '-':
            score += Score.GAP
        elif seq1[i] == seq2[i]:
            score += Score.MATCH
        else:
            score += Score.MISMATCH
    return score

if __name__ == "__main__":
    # Upload file fasta
    nama_file = ['african-swine-fever-virus_BA71V_genomic.fna',
                 'african-swine-fever-virus_dna-polymerase_gene.fna']

    for nama_file in nama_file:
        uploaded_file = files.upload()
        with open(nama_file, 'wb') as f:
            f.write(next(iter(uploaded_file.values())))

    # Membaca file fasta
    genome = read_fasta('african-swine-fever-virus_BA71V_genomic.fna')
    gene = read_fasta('african-swine-fever-virus_dna-polymerase_gene.fna')

    # Melakukan Smith-Waterman local alignment
    output_1, output_2 = smith_waterman(genome[0], gene[0])

    # Menampilkan hasil alignment
    print("Hasil Local Sequence Alignment:")
    print(output_1)
    print(output_2)

    # Menghitung dan menampilkan skor alignment
    alignment_score = calculate_alignment_score(output_1, output_2)
    print(f"Skor Alignment: {alignment_score}")


Saving african-swine-fever-virus_BA71V_genomic.fna to african-swine-fever-virus_BA71V_genomic (3).fna


Saving african-swine-fever-virus_dna-polymerase_gene.fna to african-swine-fever-virus_dna-polymerase_gene.fna
Hasil Local Sequence Alignment:
ATGATATCTATCATGGACCGTTCTGAGATTGTTGCACGGGAGAACCCGGTGATTACCCAACGAGTTACAAATCTCCTACAAACCAATGCTCCTCTACTATTCATGCCCATTGATATCCATGAAGTACGATATGGAGCCTACACACTTTTCATGTATGGTTCCCTCGAAAACGGTTACAAAGCAGAAGTAAGGATTGAAAACATCCCAGTTTTCTTTGACGTACAGATTGAGTTCAATGATACAAACCAGCTTTTTTTAAAGTCGCTACTGACGGCTGAAAATATTGCGTATGAACGGCTGGAGACGCTCACCCAGCGTCCTGTAATGGGGTACCGCGAGAAGGAAAAAGAGTTTGCACCATACATTCGAATATTTTTTAAAAGCCTGTATGAGCAACGAAAAGCCATTACTTACTTGAATAATATGGGTTACAACACCGCCGCGGACGACACAACCTGTTACTACCGAATGGTTTCCCGAGAGCTAAAACTGCCTCTTACAAGTTGGATACAGCTTCAGCACTATTCCTACGAGCCTCGCGGCTTGGTACACAGGTTTTCCGTAACCCCCGAGGATCTTGTTTCCTATCAGGATGATGGCCCCACAGACCACAGCATCGTTATGGCCTACGATATAGAGACCTATAGCCCTGTTAAGGGAACCGTTCCGGACCCAAATCAGGCAAACGACGTGGTGTTCATGATATGCATGCGCATTTTTTGGATTCACTCCACAGAGCCTCTAGCGAGCACGTGCATCACTATGGCACCCTGCAAAAAGTCCTCAGAGTGGACCACCATTCTATGCTCCTCTGAAAAAAATTTGCTGTTAAGCTTTGCTGAACAGTTCAGCCGCTGG