# CMPE 549: Bioinformatics, Fall 2023
## Assignment I -  Pairwise Sequence Global Alignment

Please describe your work clearly since this notebook is considered as your report.

In [10]:
import random
import requests
import numpy as np
import pandas as pd

from Bio.Align import substitution_matrices
from Bio import Align

### Needleman Wunsch (Global Sequence Alignment)

In [44]:
import numpy as np
import pandas as pd

def initialize_matrix(seq1, seq2, gap_penalty):
    # Initialize the matrix with zeros
    matrix = np.zeros((len(seq1) + 1, len(seq2) + 1))

    # Fill in the first row and column with incremental gap penalties
    for i in range(1, len(seq1) + 1):
        matrix[i, 0] = gap_penalty * i

    for j in range(1, len(seq2) + 1):
        matrix[0, j] = gap_penalty * j

    return matrix

def parse_scoring_matrix(dataframe):
        # Get the list of amino acids from the index of the input DataFrame
    amino_acids = dataframe.index.tolist()

    # Initialize an empty dictionary to store the scoring matrix
    scoring_matrix = {}

    # Iterate through rows of the DataFrame, where each row represents an amino acid
    for row_amino_acid, row in dataframe.iterrows():
        # Convert the scores in the row to a list of floats
        scores = row.astype(float).tolist()

        # Create a dictionary where keys are amino acids and values are their corresponding scores
        scoring_matrix[row_amino_acid] = dict(zip(amino_acids, scores))

    # Return the final scoring matrix as a dictionary
    return scoring_matrix

def needleman_wunsch_algorithm(seq1, seq2, penalty_type, penalty_params):
    if penalty_type == 'linear':
        return needleman_wunsch_algorithm_linear_penalty(seq1, seq2, penalty_params)
    elif penalty_type == 'affine':
        return needleman_wunsch_algorithm_affine_penalty(seq1, seq2, penalty_params)
    else:
        raise ValueError("Invalid penalty type. Choose 'linear' or 'affine'.")

def needleman_wunsch_algorithm_linear_penalty(seq1, seq2, penalty_params):
    # Extract gap penalty and scoring matrix from the input parameters
    gap_penalty = penalty_params['gap_penalty']
    scoring_matrix = penalty_params['scoring_matrix']

    # Initialize the scoring matrix for dynamic programming
    matrix = initialize_matrix(seq1, seq2, gap_penalty)

    # Fill in the scoring matrix using the Needleman-Wunsch algorithm
    for i in range(1, len(seq1) + 1):
        for j in range(1, len(seq2) + 1):
            # Calculate the score for a match/mismatch
            match_score = matrix[i - 1, j - 1] + scoring_matrix.get(seq1[i-1], {}).get(seq2[j-1], 0)

            # Calculate the score for an insertion (gap in seq2)
            gap_up_score = matrix[i - 1, j] + gap_penalty  # Change: Add gap_penalty instead of subtracting

            # Calculate the score for a deletion (gap in seq1)
            gap_left_score = matrix[i, j - 1] + gap_penalty  # Change: Add gap_penalty instead of subtracting

            # Update the current cell in the scoring matrix with the maximum score
            matrix[i, j] = max(match_score, gap_up_score, gap_left_score)

    # Perform traceback to obtain the alignment and additional information
    alignment_score, alignment, match_mismatch_score = traceback_linear(matrix, seq1, seq2, gap_penalty, scoring_matrix)

    # Return the alignment score, aligned sequences, and match/mismatch score
    return alignment_score, alignment, match_mismatch_score



def traceback_linear(matrix, seq1, seq2, gap_penalty, scoring_matrix):
    # Initialize indices for traceback at the bottom-right corner of the matrix
    i, j = len(seq1), len(seq2)

    # Initialize lists to store aligned sequences
    aligned_seq1, aligned_seq2 = [], []

    # Retrieve the overall match/mismatch score from the bottom-right corner of the matrix
    match_mismatch_score = matrix[-1, -1]

    # Perform traceback until reaching the top-left corner of the matrix
    while i > 0 or j > 0:
        # Check if the current cell corresponds to a match/mismatch
        if i > 0 and j > 0 and matrix[i, j] == matrix[i - 1, j - 1] + scoring_matrix.get(seq1[i-1], {}).get(seq2[j-1], 0):
            # Add the aligned characters to the sequences
            aligned_seq1.append(seq1[i-1])
            aligned_seq2.append(seq2[j-1])
            i -= 1
            j -= 1
        # Check if the current cell corresponds to an insertion (gap in seq2)
        elif i > 0 and matrix[i, j] == matrix[i - 1, j] + gap_penalty:  # Change: Add gap_penalty instead of subtracting
            # Add the character from seq1 to aligned_seq1 and a gap to aligned_seq2
            aligned_seq1.append(seq1[i-1])
            aligned_seq2.append('-')
            i -= 1
        # The current cell corresponds to a deletion (gap in seq1)
        else:
            # Add a gap to aligned_seq1 and the character from seq2 to aligned_seq2
            aligned_seq1.append('-')
            aligned_seq2.append(seq2[j-1])
            j -= 1


    # Reverse the aligned sequences
    aligned_seq1.reverse()
    aligned_seq2.reverse()

    # Return the alignment score, aligned sequences, and match/mismatch score
    return matrix[-1, -1], (''.join(aligned_seq1), ''.join(aligned_seq2)), match_mismatch_score


def initialize_matrix_affine(seq1, seq2, gap_penalty, gap_extension_penalty):
    # Get the lengths of the input sequences
    rows, cols = len(seq1) + 1, len(seq2) + 1

    # Initialize three matrices for Match, Insertion, and Deletion penalties
    M = np.zeros((rows, cols))  # Match matrix
    X = np.zeros((rows, cols))  # Insertion matrix
    Y = np.zeros((rows, cols))  # Deletion matrix

    # Fill in the first column of each matrix with initial gap penalties
    for i in range(1, rows):
        M[i, 0] = -float('inf')  # M matrix: No opening gap penalty in the first column
        X[i, 0] = gap_penalty - (i - 1) * gap_extension_penalty  # X matrix: Incremental gap penalties for opening
        Y[i, 0] = -float('inf')  # Y matrix: No opening gap penalty in the first column

    # Fill in the first row of each matrix with initial gap penalties
    for j in range(1, cols):
        M[0, j] = -float('inf')  # M matrix: No opening gap penalty in the first row
        X[0, j] = -float('inf')  # X matrix: No opening gap penalty in the first row
        Y[0, j] = gap_penalty - (j - 1) * gap_extension_penalty  # Y matrix: Incremental gap penalties for opening

    # Return the initialized matrices
    return M, X, Y

def needleman_wunsch_algorithm_affine_penalty(seq1, seq2, penalty_params):
    # Extract parameters from the input
    gap_penalty = penalty_params['gap_penalty']
    gap_extension_penalty = penalty_params['gap_extension_penalty']
    scoring_matrix = penalty_params['scoring_matrix']

    # Initialize three matrices for Match, Insertion, and Deletion penalties
    M, X, Y = initialize_matrix_affine(seq1, seq2, gap_penalty, gap_extension_penalty)

    # Fill in the matrices using the Needleman-Wunsch algorithm with affine gap penalties
    for i in range(1, len(seq1) + 1):
        for j in range(1, len(seq2) + 1):
            # Calculate the score for a match/mismatch
            match_score = M[i - 1, j - 1] + scoring_matrix[seq1[i-1]][seq2[j-1]]

            # Update the Insertion matrix
            X[i, j] = max(X[i - 1, j] + gap_extension_penalty, M[i - 1, j] + gap_penalty)

            # Update the Deletion matrix
            Y[i, j] = max(Y[i, j - 1] + gap_extension_penalty, M[i, j - 1] + gap_penalty)

            # Update the Match matrix with the maximum score among match, insertion, and deletion
            M[i, j] = max(match_score, X[i, j], Y[i, j])

    # Perform traceback to obtain the alignment
    alignment_score, alignment = traceback_affine(M, X, Y, seq1, seq2, gap_penalty, gap_extension_penalty, scoring_matrix)

    # Return the alignment score and aligned sequences
    return alignment_score, alignment


def traceback_affine(M, X, Y, seq1, seq2, gap_penalty, gap_extension_penalty, scoring_matrix):
    # Initialize indices for traceback starting from the bottom-right corner of matrices
    i, j = len(seq1), len(seq2)

    # Initialize lists to store aligned sequences
    aligned_seq1, aligned_seq2 = [], []

    # Perform traceback until reaching the top-left corner of matrices
    while i > 0 or j > 0:
        # Check if the current cell corresponds to a match/mismatch
        if i > 0 and j > 0 and M[i, j] == M[i - 1, j - 1] + scoring_matrix[seq1[i-1]][seq2[j-1]]:
            # Add the aligned characters to the sequences
            aligned_seq1.append(seq1[i-1])
            aligned_seq2.append(seq2[j-1])
            i -= 1
            j -= 1
        # Check if the current cell corresponds to an insertion (gap in seq2)
        elif i > 0 and M[i, j] == X[i, j]:
            # Add the character from seq1 to aligned_seq1 and a gap to aligned_seq2
            aligned_seq1.append(seq1[i-1])
            aligned_seq2.append('-')
            i -= 1
        # Check if the current cell corresponds to a deletion (gap in seq1)
        elif j > 0 and M[i, j] == Y[i, j]:
            # Add a gap to aligned_seq1 and the character from seq2 to aligned_seq2
            aligned_seq1.append('-')
            aligned_seq2.append(seq2[j-1])
            j -= 1

    # Return the alignment score and the aligned sequences in reverse order
    return M[-1, -1], (''.join(aligned_seq1[::-1])+ "\n"+ ''.join(aligned_seq2[::-1]))




In [28]:
print(scoring_matrix)

   A  R  N  D  C  Q  E  G  H  I  ...  P  S  T   W  Y  V  B  Z  X  *
A  4 -1 -2 -2  0 -1 -1  0 -2 -1  ... -1  1  0  -3 -2  0 -2 -1  0 -4
R -1  5  0 -2 -3  1  0 -2  0 -3  ... -2 -1 -1  -3 -2 -3 -1  0 -1 -4
N -2  0  6  1 -3  0  0  0  1 -3  ... -2  1  0  -4 -2 -3  3  0 -1 -4
D -2 -2  1  6 -3  0  2 -1 -1 -3  ... -1  0 -1  -4 -3 -3  4  1 -1 -4
C  0 -3 -3 -3  9 -3 -4 -3 -3 -1  ... -3 -1 -1  -2 -2 -1 -3 -3 -2 -4
Q -1  1  0  0 -3  5  2 -2  0 -3  ... -1  0 -1  -2 -1 -2  0  3 -1 -4
E -1  0  0  2 -4  2  5 -2  0 -3  ... -1  0 -1  -3 -2 -2  1  4 -1 -4
G  0 -2  0 -1 -3 -2 -2  6 -2 -4  ... -2  0 -2  -2 -3 -3 -1 -2 -1 -4
H -2  0  1 -1 -3  0  0 -2  8 -3  ... -2 -1 -2  -2  2 -3  0  0 -1 -4
I -1 -3 -3 -3 -1 -3 -3 -4 -3  4  ... -3 -2 -1  -3 -1  3 -3 -3 -1 -4
L -1 -2 -3 -4 -1 -2 -3 -4 -3  2  ... -3 -2 -1  -2 -1  1 -4 -3 -1 -4
K -1  2  0 -1 -3  1  1 -2 -1 -3  ... -1  0 -1  -3 -2 -2  0  1 -1 -4
M -1 -1 -2 -3 -1  0 -2 -3 -2  1  ... -2 -1 -1  -1 -1  1 -3 -1 -1 -4
F -2 -3 -3 -3 -2 -3 -3 -3 -1  0  ... -4 -2 -2   

#YOUR OUTPUT

#### Test Case 1 of linear and affine penalty respectively


In [45]:
# Example usage:
seq1 = "PRTEINS"
seq2 = "PRTWPSEIN"
gap_penalty = -1  # Replace with your desired gap penalty
penalty_params = {
    'gap_penalty': gap_penalty,
    'scoring_matrix': scoring_matrix,
}

# Perform sequence alignment using the Needleman-Wunsch algorithm with linear gap penalty
alignment_score, alignment, match_mismatch_score = needleman_wunsch_algorithm(seq1, seq2, 'linear', penalty_params)

# Display the results
print("Alignment Score:", alignment_score)
print("Alignment:", alignment)
print("Match/Mismatch Score:", match_mismatch_score)


Alignment Score: 28.0
Alignment: ('PRT---EINS', 'PRTWPSEIN-')
Match/Mismatch Score: 28.0


In [23]:
seq1 = "PRTEINS"
seq2 = "PRTWPSEIN"

penalty_params = {
    'gap_penalty': -10,
    'gap_extension_penalty': -1,
    'scoring_matrix': scoring_matrix  # Make sure scoring_matrix is defined
}

affine_alignment_score, affine_alignment = needleman_wunsch_algorithm_affine_penalty(seq1, seq2, penalty_params)
print("Alignment Score:", affine_alignment_score)
print("Alignment:")
print(affine_alignment)



Alignment Score: 10.0
Alignment:
PRT---EINS
PRTWPSEIN-


#### Test Case 2 of linear and affine penalty respectively


In [24]:
seq3 = "QMETSTTGAAYFNG"
seq4 = "QFISPTGAAKYFNT"

penalty_params = {
    'gap_penalty': -10,
    'gap_extension_penalty': -1,
    'scoring_matrix': scoring_matrix  # Make sure scoring_matrix is defined
}

affine_alignment_score2, affine_alignment2 = needleman_wunsch_algorithm_affine_penalty(seq3, seq4, penalty_params)
print("Alignment Score:", affine_alignment_score2)
print("Alignment:")
print(affine_alignment2)



Alignment Score: 25.0
Alignment:
QMETSTTGAAYFNG
QFISPTGAAKYFNT


In [21]:
sample1_sequence1 = "QMETSTTGAAYFNG"
sample1_sequence2 = "QFISPTGAAKYFNT"
gap_penalty = -1

# Perform sequence alignment using the Needleman-Wunsch algorithm with linear gap penalty
alignment_score, alignment, match_mismatch_score = needleman_wunsch_algorithm(sample1_sequence1, sample1_sequence2, 'linear', penalty_params)

# Display the results
print("Alignment Score:", alignment_score)
print("Alignment:", alignment)
print("Match/Mismatch Score:", match_mismatch_score)



Alignment Score: 41.0
Alignment: ('QMETSTTGAA-YFNG', 'QF-ISPTGAAKYFNT')
Match/Mismatch Score: 41.0


#### Test Case 3 of linear and affine penalty respectively


In [20]:
# Define the new sequences and gap penalty
seq3 = "YHFDVPDCWAHRYWVENPQAIAQMEQICFNWFPSMMMKQPHVFKVDHHMSCRWLPIRGKKCSSCCTRMRVRTVWE"
seq4 = "YHEDVAHEDAIAQMVNTFGFVWQICLNQFPSMMMKIYWIAVLSAHVADRKTWSKHMSCRWLPIISATCARMRVRTVWE"
gap_penalty = -1

# Perform sequence alignment using the Needleman-Wunsch algorithm with linear gap penalty
alignment_score, alignment, match_mismatch_score = needleman_wunsch_algorithm(seq3, seq4, 'linear', penalty_params)

# Display the results
print("Alignment Score:", alignment_score)
print("Alignment:", alignment)
print("Match/Mismatch Score:", match_mismatch_score)


Alignment Score: 222.0
Alignment: ('YH-FDVPDCWAHRYWVENPQAIAQM-E------QICFNWFPSMMMK-------QPHV---FKV-DHHMSCRWLPIRGKKCSSCCTRMRVRTVWE', 'YHE-DV----AH----ED--AIAQMVNTFGFVWQICLNQFPSMMMKIYWIAVLSAHVADR-KTWSKHMSCRWLPI----ISATCARMRVRTVWE')
Match/Mismatch Score: 222.0


In [25]:
# Define the new sequences and gap penalty
seq5 = "YHFDVPDCWAHRYWVENPQAIAQMEQICFNWFPSMMMKQPHVFKVDHHMSCRWLPIRGKKCSSCCTRMRVRTVWE"
seq6 = "YHEDVAHEDAIAQMVNTFGFVWQICLNQFPSMMMKIYWIAVLSAHVADRKTWSKHMSCRWLPIISATCARMRVRTVWE"
penalty_params = {
    'gap_penalty': -10,
    'gap_extension_penalty': -1,
    'scoring_matrix': scoring_matrix  # Make sure scoring_matrix is defined
}
affine_alignment_score3, affine_alignment3 = needleman_wunsch_algorithm_affine_penalty(seq5, seq6, penalty_params)
print("Alignment Score:", affine_alignment_score3)
print("Alignment:")
print(affine_alignment3)


Alignment Score: 150.0
Alignment:
YHFDVPDCWAHRYWVENPQAIAQM------E-QICFNWFPSMMMK-------QPHVF---KVDHHMSCRWLPIRGKKCSSCCTRMRVRTVWE
YHEDV----AH-----E-DAIAQMVNTFGFVWQICLNQFPSMMMKIYWIAVLSAHVADRKTWSKHMSCRWLPI----ISATCARMRVRTVWE


###Test case 4 of affine penalty

In [26]:
# Define the new sequences and gap penalty
seq7 = "SKGSLWLWMCKDKLGENLVVFNNVLGAGLFDEPHYPCVEWYDFVR"
seq8 = "SHKGSLNWLGWNCKDKLGENLVVFPNYLFAAHFEPHYPCIVEYDFVD"
penalty_params = {
    'gap_penalty': -10,
    'gap_extension_penalty': -1,
    'scoring_matrix': scoring_matrix  # Make sure scoring_matrix is defined
}
affine_alignment_score4, affine_alignment4 = needleman_wunsch_algorithm_affine_penalty(seq7, seq8, penalty_params)
print("Alignment Score:", affine_alignment_score4)
print("Alignment:")
print(affine_alignment4)


Alignment Score: 144.0
Alignment:
S-KGSL-WL-WMCKDKLGENLVVFNNVLGAGLFDEPHYPCVEWYDFVR
SHKGSLNWLGWNCKDKLGENLVVFPNYLFAAHF-EPHYPCIVEYDFVD


###Test case 5 of affine penalty

In [27]:
seq9 = "DAHGDMWNYFRANETVAWEWFPHHAQFLHCELPKDK"
seq10 = "DAHGGCDWNYFANELTVMWEHFNPHAQFLHCELEYMDL"

penalty_params = {
    'gap_penalty': -10,
    'gap_extension_penalty': -1,
    'scoring_matrix': scoring_matrix  # Make sure scoring_matrix is defined
}
affine_alignment_score5, affine_alignment5 = needleman_wunsch_algorithm_affine_penalty(seq9, seq10, penalty_params)
print("Alignment Score:", affine_alignment_score5)
print("Alignment:")
print(affine_alignment5)


Alignment Score: 106.0
Alignment:
DAHGDM-WNYFRANE-TVAWEWFPHHAQFLHCELP-KDK
DAHGGCDWNYF-ANELTVMWEHFNPHAQFLHCELEYMDL


### BLOSUM62 Scoring Matrix

Run the cell below to obtain the scoring matrix.

In [13]:
resp = requests.get(
    "https://raw.githubusercontent.com/rdpstaff/AlignmentTools/master/src/data/blosum62.txt"
)
_scoring_matrix = np.stack(
    [
        np.fromstring(line[2:].strip(), sep=" ")
        for line in resp.text.split("\n")
        if line and (line[0] not in ["#", " "])
    ]
)
_amino_acids = resp.text.split("\n")[6].strip().split("  ")


scoring_matrix = pd.DataFrame(
    _scoring_matrix, index=_amino_acids, columns=_amino_acids, dtype=int
)

### Generic Sequence Alignment Function

In [16]:
def align_sequences(seq1, seq2, penalty, penalty_params):
    scoring_matrix = penalty_params.get("scoring_matrix", None)

    if penalty == "linear":
        return needleman_wunsch_algorithm_linear_penalty(seq1, seq2, penalty_params, scoring_matrix)
    elif penalty == "affine":
        return needleman_wunsch_algorithm_affine_penalty(seq1, seq2, penalty_params, scoring_matrix)
    else:
        raise NotImplementedError


### Inputs

You can use the functions below to generate sample sequences to test your algorithms.

In [24]:
aminoacids = [
    "A",
    "R",
    "N",
    "D",
    "C",
    "Q",
    "E",
    "G",
    "H",
    "I",
    "L",
    "K",
    "M",
    "F",
    "P",
    "S",
    "T",
    "W",
    "Y",
    "V",
]


def generate_sequence(length=50):
    return "".join([random.choice(aminoacids) for i in range(length)])


def mutate_sequence(seq, n_mutations=10):
    seq = list(seq)
    pos = {
        random.randint(1, len(seq)): random.choice(["substitute", "delete"])
        for i in range(n_mutations)
    }
    mutated_sequence = ""
    for ix, aminoacid in enumerate(seq):
        if ix in pos:
            if pos[ix] == "substitute":
                mutated_sequence += random.choice(aminoacids)
        else:
            mutated_sequence += aminoacid
    return mutated_sequence


sample_sequence = generate_sequence()
sequence1 = mutate_sequence(sample_sequence)
sequence2 = mutate_sequence(sample_sequence)
sequence1, sequence2

('HWTYTKAPSSARHMEDWDDEYQAMGAYHRWTMFEQFTQMSGMCV',
 'HWTYTGKAPSSEYLRTHMEDPDDEYQAMGAYHWNFEYAQAYQMCV')

In [30]:
def align_sequences_biopython(seq1, seq2, penalty_params):
    aligner = Align.PairwiseAligner()
    aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
    if penalty_params["penalty_type"] == "linear":
        aligner.open_gap_score = penalty_params["gap_penalty"]  # -1
        aligner.extend_gap_score = penalty_params["gap_penalty"]  # -1
    elif penalty_params["penalty_type"] == "affine":
        aligner.open_gap_score = penalty_params["gap_penalty"]  # -10
        aligner.extend_gap_score = penalty_params["gap_extension_penalty"]  # -1

    aligner.mode = "global"
    for alignment in aligner.align(seq1, seq2):
        print("Score:", alignment.score)
        print(alignment)

#### Sample 1

In [33]:
sample2_sequence1 = "PRTEINS"
sample2_sequence2 = "PRTWPSEIN"

In [34]:
align_sequences_biopython(
    sample2_sequence1, sample2_sequence2, {"penalty_type": "linear", "gap_penalty": -1}
)

Score: 28.0
target            0 PRT---EINS  7
                  0 |||---|||- 10
query             0 PRTWPSEIN-  9



In [35]:
align_sequences_biopython(
    sample2_sequence1,
    sample2_sequence2,
    {"penalty_type": "affine", "gap_penalty": -10, "gap_extension_penalty": -1},
)

Score: 10.0
target            0 PRT---EINS  7
                  0 |||---|||- 10
query             0 PRTWPSEIN-  9



#### Sample 2

In [29]:
sample1_sequence1 = "QMETSTTGAAYFNG"
sample1_sequence2 = "QFISPTGAAKYFNT"




In [31]:
align_sequences_biopython(
    sample1_sequence1, sample1_sequence2, {"penalty_type": "linear", "gap_penalty": -1}
)

Score: 41.0
target            0 Q-METSTTGAA-YFNG- 14
                  0 |-.--|.||||-|||-- 17
query             0 QFI--SPTGAAKYFN-T 14

Score: 41.0
target            0 QMETSTTGAA-YFNG- 14
                  0 |.-.|.||||-|||-- 16
query             0 QF-ISPTGAAKYFN-T 14

Score: 41.0
target            0 Q-METSTTGAA-YFN-G 14
                  0 |-.--|.||||-|||-- 17
query             0 QFI--SPTGAAKYFNT- 14

Score: 41.0
target            0 QMETSTTGAA-YFN-G 14
                  0 |.-.|.||||-|||-- 16
query             0 QF-ISPTGAAKYFNT- 14

Score: 41.0
target            0 Q-METSTTGAA-YFNG 14
                  0 |-.--|.||||-|||. 16
query             0 QFI--SPTGAAKYFNT 14

Score: 41.0
target            0 QMETSTTGAA-YFNG 14
                  0 |.-.|.||||-|||. 15
query             0 QF-ISPTGAAKYFNT 14



In [32]:
align_sequences_biopython(
    sample1_sequence1,
    sample1_sequence2,
    {"penalty_type": "affine", "gap_penalty": -10, "gap_extension_penalty": -1},
)

Score: 25.0
target            0 QMETSTTGAAYFNG 14
                  0 |....|..|.|||. 14
query             0 QFISPTGAAKYFNT 14



#### Sample 3

In [46]:
sample3_sequence1 = (
    "YHFDVPDCWAHRYWVENPQAIAQMEQICFNWFPSMMMKQPHVFKVDHHMSCRWLPIRGKKCSSCCTRMRVRTVWE"
)
sample3_sequence2 = (
    "YHEDVAHEDAIAQMVNTFGFVWQICLNQFPSMMMKIYWIAVLSAHVADRKTWSKHMSCRWLPIISATCARMRVRTVWE"
)

In [47]:
align_sequences_biopython(
    sample3_sequence1, sample3_sequence2, {"penalty_type": "linear", "gap_penalty": -1}
)

Score: 222.0
target            0 YHF-DVPDCWAHRYWVENPQAIAQM-E------QICFNW-FPSMMMK-------QPHVFK
                  0 ||--||----||----|.--|||||-.------|||.|--|||||||-------..||--
query             0 YH-EDV----AH----ED--AIAQMVNTFGFVWQICLN-QFPSMMMKIYWIAVLSAHV--

target           44 VDH-----HMSCRWLPIRGKKCSSCCTRMRVRTVWE 75
                 60 .|.-----|||||||||----.|..|.||||||||| 96
query            46 ADRKTWSKHMSCRWLPI----ISATCARMRVRTVWE 78

Score: 222.0
target            0 YH-FDVPDCWAHRYWVENPQAIAQM-E------QICFNW-FPSMMMK-------QPHVFK
                  0 ||--||----||----|.--|||||-.------|||.|--|||||||-------..||--
query             0 YHE-DV----AH----ED--AIAQMVNTFGFVWQICLN-QFPSMMMKIYWIAVLSAHV--

target           44 VDH-----HMSCRWLPIRGKKCSSCCTRMRVRTVWE 75
                 60 .|.-----|||||||||----.|..|.||||||||| 96
query            46 ADRKTWSKHMSCRWLPI----ISATCARMRVRTVWE 78

Score: 222.0
target            0 YHF-DVPDCWAHRYWVENPQAIAQM-E------QICFN-WFPSMMMK-------QPHVFK
                  0 ||--||----

In [38]:
align_sequences_biopython(
    sample3_sequence1,
    sample3_sequence2,
    {"penalty_type": "affine", "gap_penalty": -10, "gap_extension_penalty": -1},
)


Score: 150.0
target            0 YHFDVPDCWAHRYWVENPQAIAQME-------QICFNWFPSMMMK-------QPHVF---
                  0 ||.||----||.------.|||||.-------|||.|.|||||||-------..||.---
query             0 YHEDV----AHE------DAIAQMVNTFGFVWQICLNQFPSMMMKIYWIAVLSAHVADRK

target           43 KVDHHMSCRWLPIRGKKCSSCCTRMRVRTVWE 75
                 60 ....|||||||||----.|..|.||||||||| 92
query            50 TWSKHMSCRWLPI----ISATCARMRVRTVWE 78

Score: 150.0
target            0 YHFDVPDCWAHRYWVENPQAIAQME-------QICFNWFPSMMMK-------QPHVFK--
                  0 ||.||----||.------.|||||.-------|||.|.|||||||-------..||..--
query             0 YHEDV----AHE------DAIAQMVNTFGFVWQICLNQFPSMMMKIYWIAVLSAHVADRK

target           44 -VDHHMSCRWLPIRGKKCSSCCTRMRVRTVWE 75
                 60 -...|||||||||----.|..|.||||||||| 92
query            50 TWSKHMSCRWLPI----ISATCARMRVRTVWE 78

Score: 150.0
target            0 YHFDVPDCWAHRYWVENPQAIAQME-------QICFNWFPSMMMK-------QPHVF--K
                  0 ||.||----||.------.|||||.-------||

#### Sample 4

In [36]:
seq7 = "SKGSLWLWMCKDKLGENLVVFNNVLGAGLFDEPHYPCVEWYDFVR"
seq8 = "SHKGSLNWLGWNCKDKLGENLVVFPNYLFAAHFEPHYPCIVEYDFVD"


align_sequences_biopython(
    seq7,
    seq8,
    {"penalty_type": "affine", "gap_penalty": -10, "gap_extension_penalty": -1},
)



Score: 144.0
target            0 S-KGSL-WL-WMCKDKLGENLVVFNNVLGAGLFDEPHYPCVEWYDFVR 45
                  0 |-||||-||-|.||||||||||||.|.|.|..|-||||||...||||. 48
query             0 SHKGSLNWLGWNCKDKLGENLVVFPNYLFAAHF-EPHYPCIVEYDFVD 47



####Sample 5






In [39]:
seq9 = "DAHGDMWNYFRANETVAWEWFPHHAQFLHCELPKDK"
seq10 = "DAHGGCDWNYFANELTVMWEHFNPHAQFLHCELEYMDL"

align_sequences_biopython(
    seq9,
    seq10,
    {"penalty_type": "affine", "gap_penalty": -10, "gap_extension_penalty": -1},
)


Score: 106.0
target            0 DAHGDM-WNYFRANE-TVAWEWFPHHAQFLHCELP-KDK 36
                  0 ||||..-||||-|||-||.||.|..|||||||||.-.|. 39
query             0 DAHGGCDWNYF-ANELTVMWEHFNPHAQFLHCELEYMDL 38

