In [1]:
import numpy as np
from typing import Tuple

class NeedlemanWunsch:
    """ Class for NeedlemanWunsch Alignment

    Parameters:
        sub_matrix_file: str
            Path/filename of substitution matrix
        gap_open: float
            Gap opening penalty
        gap_extend: float
            Gap extension penalty

    Attributes:
        seqA_align: str
            seqA alignment
        seqB_align: str
            seqB alignment
        alignment_score: float
            Score of alignment from algorithm
        gap_open: float
            Gap opening penalty
        gap_extend: float
            Gap extension penalty
    """
    def __init__(self, sub_matrix_file: str, gap_open: float, gap_extend: float):
        # Init alignment and gap matrices
        self._align_matrix = None
        self._gapA_matrix = None
        self._gapB_matrix = None

        # Init matrices for backtrace procedure
        self._back = None
        self._back_A = None
        self._back_B = None

        # Init alignment_score
        self.alignment_score = 0

        # Init empty alignment attributes
        self.seqA_align = ""
        self.seqB_align = ""

        # Init empty sequences
        self._seqA = ""
        self._seqB = ""

        # Setting gap open and gap extension penalties
        self.gap_open = gap_open
        assert gap_open < 0, "Gap opening penalty must be negative."
        self.gap_extend = gap_extend
        assert gap_extend < 0, "Gap extension penalty must be negative."

        # Generating substitution matrix
        self.sub_dict = self._read_sub_matrix(sub_matrix_file) # substitution dictionary

    def _read_sub_matrix(self, sub_matrix_file):
        """
        DO NOT MODIFY THIS METHOD! IT IS ALREADY COMPLETE!

        This function reads in a scoring matrix from any matrix like file.
        Where there is a line of the residues followed by substitution matrix.
        This file also saves the alphabet list attribute.

        Parameters:
            sub_matrix_file: str
                Name (and associated path if not in current working directory)
                of the matrix file that contains the scoring matrix.

        Returns:
            dict_sub: dict
                Substitution matrix dictionary with tuple of the two residues as
                the key and score as value e.g. {('A', 'A'): 4} or {('A', 'D'): -8}
        """
        with open(sub_matrix_file, 'r') as f:
            dict_sub = {}  # Dictionary for storing scores from sub matrix
            residue_list = []  # For storing residue list
            start = False  # trigger for reading in score values
            res_2 = 0  # used for generating substitution matrix
            # reading file line by line
            for line_num, line in enumerate(f):
                # Reading in residue list
                if '#' not in line.strip() and start is False:
                    residue_list = [k for k in line.strip().upper().split(' ') if k != '']
                    start = True
                # Generating substitution scoring dictionary
                elif start is True and res_2 < len(residue_list):
                    line = [k for k in line.strip().split(' ') if k != '']
                    # reading in line by line to create substitution dictionary
                    assert len(residue_list) == len(line), "Score line should be same length as residue list"
                    for res_1 in range(len(line)):
                        dict_sub[(residue_list[res_1], residue_list[res_2])] = float(line[res_1])
                    res_2 += 1
                elif start is True and res_2 == len(residue_list):
                    break
        return dict_sub

    def align(self, seqA: str, seqB: str) -> Tuple[float, str, str]:
        """
        TODO

        This function performs global sequence alignment of two strings
        using the Needleman-Wunsch Algorithm

        Parameters:
        	seqA: str
         		the first string to be aligned
         	seqB: str
         		the second string to be aligned with seqA

        Returns:
         	(alignment score, seqA alignment, seqB alignment) : Tuple[float, str, str]
         		the score and corresponding strings for the alignment of seqA and seqB
        """
        # Resetting alignment in case method is called more than once
        self.seqA_align = ""
        self.seqB_align = ""

        # Resetting alignment score in case method is called more than once
        self.alignment_score = 0

        # Initializing sequences for use in backtrace method
        self._seqA = seqA
        self._seqB = seqB

        # TODO: Initialize matrix private attributes for use in alignment

        gap_penalty = self.gap_open

        sub_dict = self.sub_dict
        # create matrices for alignment scores, gaps, and backtracing

        # Determine length of each sequence and store in n or m
        m = len(seqA)
        n = len(seqB)

        # Generate matrix of zeros to store scores
        self._align_matrix = np.zeros((n + 1, m + 1))


        # TODO: Implement global alignment here

        for i in range(0, n + 1):
            self._align_matrix[i][0] = gap_penalty * i

        for j in range(0, m + 1):
            self._align_matrix[0][j] = gap_penalty * j

        for i in range(0, n + 1):
            for j in range(0, m + 1):
                match = self._align_matrix[i][j] + sub_dict[(seqA[i - 1], seqB[j - 1])]
                delete = self._align_matrix[i][j + 1] + gap_penalty
                insert = self._align_matrix[i + 1][j] + gap_penalty

                self._align_matrix[i][j] = max(match, delete, insert)

        return self._backtrace()

    def _backtrace(self) -> Tuple[float, str, str]:
        """
        TODO

        This function traces back through the back matrix created with the
        align function in order to return the final alignment score and strings.

        Parameters:
        	None

        Returns:
         	(alignment score, seqA alignment, seqB alignment) : Tuple[float, str, str]
         		the score and corresponding strings for the alignment of seqA and seqB
        """
        seqA = self._seqA
        seqB = self._seqB
        sub_dict = self.sub_dict
        alignA = self.seqA_align
        alignB = self.seqB_align

        gap_penalty = self.gap_open

        i = len(seqA)
        j = len(seqB)

        while i > 0 and j > 0:  # end touching the top or the left edge
            score_current = self._align_matrix[i][j]
            score_diagonal = self._align_matrix[i - 1][j - 1]
            score_up = self._align_matrix[i][j - 1]
            score_left = self._align_matrix[i - 1][j]

            # Check to figure out which cell the current score was calculated from,
            # then update i and j to correspond to that cell.
            if score_current == score_diagonal + sub_dict[(seqA[j - 1], seqB[i - 1])]:
                alignA += seqA[j - 1]
                alignB += seqB[i - 1]
                i -= 1
                j -= 1
            elif score_current == score_up + gap_penalty:
                alignA += seqA[j - 1]
                alignB += '-'
                j -= 1
            elif score_current == score_left + gap_penalty:
                alignA += '-'
                alignB += seqB[i - 1]
                i -= 1

        # Finish tracing up to the top left cell
        while i > 0:
            alignA += seqA[i - 1]
            alignB += '-'
            j -= 1
        while j > 0:
            alignA += '-'
            alignB += seqB[j - 1]
            i -= 1

        # Traversed the score matrix from the bottom right so the two sequences will be reversed.
        # Reverse the sequences.
        alignA = alignA[::-1]
        alignB = alignB[::-1]

        # Update seq alignment values
        self.seqA_align = alignA
        self.seqB_align = alignB

        self.alignment_score = self._align_matrix.sum()

        return (self.alignment_score, self.seqA_align, self.seqB_align)


def read_fasta(fasta_file: str) -> Tuple[str, str]:
    """
    DO NOT MODIFY THIS FUNCTION! IT IS ALREADY COMPLETE!

    This function reads in a FASTA file and returns the associated
    string of characters (residues or nucleotides) and the header.
    This function assumes a single protein or nucleotide sequence
    per fasta file and will only read in the first sequence in the
    file if multiple are provided.

    Parameters:
        fasta_file: str
            name (and associated path if not in current working directory)
            of the Fasta file.

    Returns:
        seq: str
            String of characters from FASTA file
        header: str
            Fasta header
    """
    assert fasta_file.endswith(".fa"), "Fasta file must be a fasta file with the suffix .fa"
    with open(fasta_file) as f:
        seq = ""  # initializing sequence
        first_header = True
        for line in f:
            is_header = line.strip().startswith(">")
            # Reading in the first header
            if is_header and first_header:
                header = line.strip()  # reading in fasta header
                first_header = False
            # Reading in the sequence line by line
            elif not is_header:
                seq += line.strip().upper()  # generating full sequence
            # Breaking if more than one header is provided in the fasta file
            elif is_header and not first_header:
                break
    return seq, header


In [20]:
alg = NeedlemanWunsch("substitution_matrices/BLOSUM62.mat", -10.0, -1.0)

In [21]:
seq1, _ = read_fasta("./data/Homo_sapiens_BRD2.fa")
seq2, _ = read_fasta("./data/Mus_musculus_BRD2.fa")

In [22]:
n = len(seq1)
m = len(seq2)
gap_open_penalty = -10
gap_ext_penalty = -1

# Initialize empty matrices
score_mat = np.zeros((m + 1, n + 1))
align_mat = np.zeros((m + 1, n + 1))
gapA_mat = np.zeros((m + 1, n + 1))
gapB_mat = np.zeros((m + 1, n + 1))
index_mat = np.zeros((m + 1, n + 1))

# Initialize matrix values

for i in range(0, m + 1):
    score_mat[i][0] = gap_open_penalty * i
    align_mat[i][0] = gap_open_penalty * i

for i in range(1, m + 1):
    gapA_mat[i][0] = np.NINF
    gapB_mat[i][0] = -(gap_open_penalty + gap_ext_penalty * i)
    
for j in range(1, n + 1):
    gapA_mat[0][j] = -(gap_open_penalty + gap_ext_penalty * j)
    gapB_mat[0][j] = np.NINF

for j in range(0, n + 1):
    score_mat[0][j] = gap_open_penalty * j
    align_mat[0][j] = gap_open_penalty * j

"""
for i in range(1, m + 1):
    for j in range(1, n + 1):
        # Calculate the score by checking the top, left, and diagonal cells
        match = align_mat[i - 1][j - 1] + alg.sub_dict[(seq1[j-1], seq2[i-1])]
        delete = align_mat[i - 1][j] + gap_open_penalty
        insert = align_mat[i][j - 1] + gap_open_penalty
        # Record the maximum score from the three possible scores calculated above
        align_mat[i][j] = max(match, delete, insert)
"""
    
for i in range(1, m + 1):
    for j in range(1, n + 1):
        score_mat[i][j] = (align_mat[i - 1][j - 1] + alg.sub_dict[(seq1[j-1], seq2[i-1])]) + max(score_mat[i-1][j-1], gapA_mat[i-1][j-1], gapB_mat[i-1][j-1])
        gapA_mat[i][j] = max((score_mat[i][j - 1] + (gap_open_penalty + gap_ext_penalty)), (gapA_mat[i][j-1] + gap_open_penalty), ((gapB_mat[i][j-1]) + (gap_open_penalty+gap_ext_penalty)))
        gapB_mat[i][j] = max((score_mat[i - 1][j] + (gap_open_penalty + gap_ext_penalty)), (gapA_mat[i - 1][j] + (gap_open_penalty + gap_ext_penalty)), (gapB_mat[i - 1][j] + gap_open_penalty))
print(score_mat)
print(np.max(score_mat))

[[ 0.000e+00 -1.000e+01 -2.000e+01 ... -7.990e+03 -8.000e+03 -8.010e+03]
 [-1.000e+01  5.000e+00  3.000e+00 ... -7.175e+03 -7.182e+03 -7.193e+03]
 [-2.000e+01  0.000e+00  4.000e+00 ...  7.950e+02  7.990e+02  7.990e+02]
 ...
 [-7.500e+03 -6.734e+03  7.440e+02 ...  3.771e+03  3.754e+03  3.743e+03]
 [-7.510e+03 -6.741e+03  7.470e+02 ...  3.754e+03  3.775e+03  3.760e+03]
 [-7.520e+03 -6.752e+03  7.460e+02 ...  3.743e+03  3.760e+03  3.781e+03]]
3781.0


In [23]:
print(gapA_mat)

[[ 0.000e+00  1.100e+01  1.200e+01 ...  8.090e+02  8.100e+02  8.110e+02]
 [      -inf  0.000e+00 -6.000e+00 ...  7.860e+02  7.870e+02  7.880e+02]
 [      -inf  1.000e+00 -9.000e+00 ...  7.860e+02  7.840e+02  7.880e+02]
 ...
 [      -inf  7.490e+02  7.390e+02 ...  3.743e+03  3.760e+03  3.750e+03]
 [      -inf  7.500e+02  7.400e+02 ...  3.733e+03  3.749e+03  3.764e+03]
 [      -inf  7.510e+02  7.410e+02 ...  3.723e+03  3.739e+03  3.753e+03]]


In [24]:
print(gapB_mat)

[[ 0.000e+00       -inf       -inf ...       -inf       -inf       -inf]
 [ 1.100e+01  0.000e+00  1.000e+00 ...  7.980e+02  7.990e+02  8.000e+02]
 [ 1.200e+01 -6.000e+00 -8.000e+00 ...  7.880e+02  7.890e+02  7.900e+02]
 ...
 [ 7.600e+02  7.370e+02  7.340e+02 ...  3.743e+03  3.733e+03  3.723e+03]
 [ 7.610e+02  7.380e+02  7.330e+02 ...  3.760e+03  3.749e+03  3.739e+03]
 [ 7.620e+02  7.390e+02  7.360e+02 ...  3.750e+03  3.764e+03  3.753e+03]]


In [27]:
for i in range(1, m + 1):
    for j in range(1, n + 1):
        print(np.where(score_mat[i][j] == gapA_mat[i, j - 1]))
        

(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], d

(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([0]),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),

(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([0]),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], d

(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], d

(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([0]),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([0]),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([0]),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([0]),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([

(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([0]),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),)
(array([], dtype=int64),

In [19]:
i = len(seq2)
j = len(seq1)

alignA = ""
alignB = ""

while i > 0 and j > 0:  # end touching the top or the left edge
    score_current = score_mat[i][j]
    score_diagonal = score_mat[i - 1][j - 1]
    score_up = score_mat[i][j - 1]
    score_left = score_mat[i - 1][j]

    # Check to figure out which cell the current score was calculated from,
    # then update i and j to correspond to that cell.
    if score_current == (score_diagonal + alg.sub_dict[(seq1[j-1], seq2[i-1])]):
        alignA += seq1[j - 1]
        alignB += seq2[i - 1]
        i -= 1
        j -= 1
    elif score_current == gapA_mat[i, j - 1]:
        alignA += seq1[j - 1]
        alignB += '-'
        j -= 1
    elif score_current == gapB_mat[i - 1, j]:
        alignA += '-'
        alignB += seq2[i - 1]
        i -= 1

# Finish tracing up to the top left cell
while j > 0:
    alignA += seq1[j - 1]
    alignB += '-'
    j -= 1
while i > 0:
    alignA += '-'
    alignB += seq2[i - 1]
    i -= 1

# Traversed the score matrix from the bottom right so the two sequences will be reversed.
# Reverse the sequences.
alignA = alignA[::-1]
alignB = alignB[::-1]
print

# Update seq alignment values
print(alignA)
print(alignB)

KeyboardInterrupt: 

In [84]:
i = m
j = n

alignA = ""
alignB = ""

while i > 0 and j > 0:  # end touching the top or the left edge
    score_current = score_mat[i][j]
    score_diagonal = score_mat[i - 1][j - 1]
    score_up = score_mat[i][j - 1]
    score_left = score_mat[i - 1][j]

    # Check to figure out which cell the current score was calculated from,
    # then update i and j to correspond to that cell.
    if score_current == match:
        alignA += seq1[j - 1]
        alignB += seq2[i - 1]
        i -= 1
        j -= 1
    elif score_current == gapA_mat[i, j]:
        alignA += seq1[j - 1]
        alignB += '-'
        j -= 1
    elif score_current == gapB_mat[i, j]:
        alignA += '-'
        alignB += seq2[i - 1]
        i -= 1

# Finish tracing up to the top left cell
while i > 0:
    alignA += seq1[i - 1]
    alignB += '-'
    j -= 1
while j > 0:
    alignA += '-'
    alignB += seq2[j - 1]
    i -= 1

# Traversed the score matrix from the bottom right so the two sequences will be reversed.
# Reverse the sequences.
alignA = alignA[::-1]
alignB = alignB[::-1]

# Update seq alignment values
print(alignA)
print(alignB)

KeyboardInterrupt: 

In [51]:
i = m
j = n

traceback_matrix = np.zeros((m+1, n+1))

for i in range(1, m + 1):
    for j in range(1, n + 1):
        traceback_matrix[i][j] = np.argmax(score_mat[i][j], gapA_mat[i][j], gapB_mat[i][j])
        #score_mat[i][j] = (align_mat[i - 1][j - 1] + alg.sub_dict[(seq3[j-1], seq4[i-1])]) + max(score_mat[i-1][j-1], gapA_mat[i-1][j-1], gapB_mat[i-1][j-1])
        #gapA_mat[i][j] = max((score_mat[i][j - 1] + (gap_open_penalty + gap_ext_penalty)), (gapA_mat[i][j-1] + gap_open_penalty), ((gapB_mat[i][j-1]) + (gap_open_penalty+gap_ext_penalty)))
        #gapB_mat[i][j] = max((score_mat[i - 1][j] + (gap_open_penalty + gap_ext_penalty)), (gapA_mat[i - 1][j] + (gap_open_penalty + gap_ext_penalty)), (gapB_mat[i - 1][j] + gap_open_penalty))

TypeError: 'numpy.float64' object cannot be interpreted as an integer

In [56]:
for i in range(1, m + 1):
    for j in range(1, n + 1):
        traceback_matrix[i][j] = np.argmax(score_mat[i][j], gapA_mat[i][j])

TypeError: 'numpy.float64' object cannot be interpreted as an integer

In [55]:
print(traceback_matrix)

[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]


In [9]:
self._back = np.ones((len(seqA) + 1, len(seqB) + 1) * -np.inf)

NameError: name 'seqA' is not defined

In [None]:
for i in range(0, len(seqA) + 1):
    self._gapA_matrix[i][0] = self.gap_open + (self.gap_extend * i)

for j in range(0, len(seqB) + 1):
    self._gapA_matrix[0][j] = self.gap_open + (self.gap_extend * j)

In [None]:
for i:
    for j:
        baseA = self._seqA[i-1]
        m_score = self.sub_dict([baseA, baseB])
        m = align_mat[i - 1][j - 1]
        insert = gapA_mat[i - 1][j - 1]
        delete = gapB_mat[i - 1][j - 1]
        m = max(m, insert, delete)

In [None]:
gapA_1 = self.gap_open + self.gap_extend + self._align_matrix[i][j-1]
gapA_2 = self.gap_open +

In [None]:
# Fill in back matrix for backtracing
best_score = max(self._align_matrix[i][j], self._gapA_matrix[i][j], self._gapB_matrix[i][j])

In [None]:
# A match is represented in the back matrix as a 0
if best_score == self._align_matrix[i][j]:
    self._back[i][j] = 0
# A gap in seqA is represented in the back matrix as a -1
elif best_score == self._gapA_matrix[i][j]:
    self._back[i][j] = -1
# A gap in seqB is represented in the back matrix as a 1
else:
    self._back[i][j] = 1

In [None]:
while i > 0 and j >  0:
    back_step = self._back[i][j]
    if back_step = 0:
        self.seqA_align = self._seqA[i-1] + self.seqA_align
        repeat with seqB
        i -= 1
        j -= 1
    elif back_step == -1:
        self.seqA_align = "-" + self.seqA_align
        self.seqB_align = self._seqB[j -1] + self.seqB_align
        j-=1
    elif back_step == 1:
        self.seqA_align = self._seqA[i - 1] + self.seqA_align
        self.seqB_align = "-" + self.seqB_align
        i -= 1

if i == 0 and j > 0:
    while j > 0:
        self.seqA_align = "-" + self.seqA_align
        self.seqB_align = self._seqB[j-1] + self.seqB_align
        j -= 1