In [1]:
from Bio.SubsMat import MatrixInfo
blosum = MatrixInfo.blosum62

In [2]:
blosum

{('W', 'F'): 1,
 ('L', 'R'): -2,
 ('S', 'P'): -1,
 ('V', 'T'): 0,
 ('Q', 'Q'): 5,
 ('N', 'A'): -2,
 ('Z', 'Y'): -2,
 ('W', 'R'): -3,
 ('Q', 'A'): -1,
 ('S', 'D'): 0,
 ('H', 'H'): 8,
 ('S', 'H'): -1,
 ('H', 'D'): -1,
 ('L', 'N'): -3,
 ('W', 'A'): -3,
 ('Y', 'M'): -1,
 ('G', 'R'): -2,
 ('Y', 'I'): -1,
 ('Y', 'E'): -2,
 ('B', 'Y'): -3,
 ('Y', 'A'): -2,
 ('V', 'D'): -3,
 ('B', 'S'): 0,
 ('Y', 'Y'): 7,
 ('G', 'N'): 0,
 ('E', 'C'): -4,
 ('Y', 'Q'): -1,
 ('Z', 'Z'): 4,
 ('V', 'A'): 0,
 ('C', 'C'): 9,
 ('M', 'R'): -1,
 ('V', 'E'): -2,
 ('T', 'N'): 0,
 ('P', 'P'): 7,
 ('V', 'I'): 3,
 ('V', 'S'): -2,
 ('Z', 'P'): -1,
 ('V', 'M'): 1,
 ('T', 'F'): -2,
 ('V', 'Q'): -2,
 ('K', 'K'): 5,
 ('P', 'D'): -1,
 ('I', 'H'): -3,
 ('I', 'D'): -3,
 ('T', 'R'): -1,
 ('P', 'L'): -3,
 ('K', 'G'): -2,
 ('M', 'N'): -2,
 ('P', 'H'): -2,
 ('F', 'Q'): -3,
 ('Z', 'G'): -2,
 ('X', 'L'): -1,
 ('T', 'M'): -1,
 ('Z', 'C'): -3,
 ('X', 'H'): -1,
 ('D', 'R'): -2,
 ('B', 'W'): -4,
 ('X', 'D'): -1,
 ('Z', 'K'): 1,
 ('F', 'A'): -

In [14]:
import numpy as np
from Bio.SubsMat import MatrixInfo

blosum = MatrixInfo.blosum62

class Aligner:

    def __init__(self, seq1, seq2, gapPenalty):
        self.seq1 = seq1
        self.seq2 = seq2
        self.gapPenalty = gapPenalty
        self.alignMatrix = np.zeros(
            (len(self.seq1)+1, len(self.seq2)+1), dtype=int)
        self.traceBackMatrix = np.zeros(
            (len(self.seq1)+1, len(self.seq2)+1), dtype='U4')
        self.indexToTrace = {
            0: "d",
            1: "l",
            2: "u"
        }
        self.finalScore = 0
        self.identity = 0

        
    def getValue(self, i, j):
        
        try:
            blosumVal = blosum[(self.seq1[i-1], self.seq2[j-1])]
        except:
            blosumVal = blosum[(self.seq2[j-1], self.seq1[i-1])]
        
        possibleValues = [
            self.alignMatrix[i-1][j-1] + blosumVal,
            self.alignMatrix[i][j-1] + self.gapPenalty,
            self.alignMatrix[i-1][j] + self.gapPenalty
        ]

        return max(possibleValues), possibleValues.index(max(possibleValues))


    def align(self):

        for row in self.traceBackMatrix:
            row[0] = "u"

        for i in range(len(self.traceBackMatrix[0])):
            self.traceBackMatrix[0][i] = "l"
        self.traceBackMatrix[0][0] = "f"

        accGap = 0
        for row in self.alignMatrix:
            row[0] = accGap
            accGap += self.gapPenalty

        accGap = 0
        for i in range(len(self.alignMatrix[0])):
            self.alignMatrix[0][i] = accGap
            accGap += self.gapPenalty

        for i, j in np.ndindex(self.alignMatrix.shape):
            if i == 0:
                continue
            if j == 0:
                continue

            self.alignMatrix[i][j], index = self.getValue(i, j)
            self.traceBackMatrix[i][j] = self.indexToTrace[index]

        self.finalScore = self.alignMatrix[len(self.seq1)][len(self.seq2)]
        self.makeAlignment()

        
    def makeAlignment(self):

        s1 = ''
        s2 = ''

        i = len(self.seq1)
        j = len(self.seq2)
        alignType = self.traceBackMatrix[i][j]

        while alignType != 'f':

            if alignType == 'd':
                s1 = self.seq1[i-1] + s1
                s2 = self.seq2[j-1] + s2
                i -= 1
                j -= 1

            if alignType == 'l':
                s1 = '-' + s1
                s2 = self.seq2[j-1] + s2
                j -= 1

            if alignType == 'u':
                s1 = self.seq1[i-1] + s1
                s2 = '-' + s2
                i -= 1

            alignType = self.traceBackMatrix[i][j]

        self.s1 = s1
        self.s2 = s2

        self.getIdentity()


    def getIdentity(self):

        ident = 0

        for i in range(len(self.s1)):
            if self.s1[i] == self.s2[i]:
                ident += 1

        totalPositions = max([len(self.seq1), len(self.seq2)])
        self.identity = ident/totalPositions

    def printResults(self):
        print(self.alignMatrix, '\n')
        print(self.s1)
        print(self.s2, '\n')
        print('Final Score:', self.finalScore)
        print('Identity:', self.identity)

In [13]:
file1 = open("hemoglobins/human.txt", "r")
human = file1.read()
file1.close()

animals = {}

animalList = ["chicken", "cow", "deer", "horse", "pig", "trout", "wolf"]
animals = {}

for animal in animalList:
    file1 = open("hemoglobins/"+animal+".txt", "r")
    animalSequence = file1.read()
    file1.close()
    animals[animal] = animalSequence


gap = -4

scores = {}


for animal in animalList:

    aligner = Aligner(human, animals[animal], gap)
    aligner.align()
    scores["human vs "+animal] = (aligner.finalScore, aligner.identity)

    print("Results for human vs "+animal+":\n")
    aligner.printResults()
    print("\n")

Results for human vs chicken:

[[   0   -4   -8 ... -556 -560 -564]
 [  -4    1   -3 ... -548 -552 -556]
 [  -8   -2    5 ... -540 -544 -548]
 ...
 [-552 -543 -535 ...  425  421  417]
 [-556 -547 -539 ...  434  430  426]
 [-560 -551 -543 ...  430  441  437]] 

VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKY-
MLTAEDKKLIQQAWEKAASHQEEFGAEALTRMFTTYPQTKTYFPHFDLSPGSDQVRGHGKKVLGALGNAVKNVDNLSQAMAELSNLHAYNLRVDPVNFKLLSQCIQVVLAVHMGKDYTPEVHAAFDKFLSAVSAVLAEKYR 

Final Score: 437
Identity: 0.5886524822695035


Results for human vs cow:

[[   0   -4   -8 ... -556 -560 -564]
 [  -4    4    0 ... -548 -552 -556]
 [  -8    0    8 ... -540 -544 -548]
 ...
 [-552 -544 -536 ...  618  614  610]
 [-556 -548 -540 ...  627  623  619]
 [-560 -552 -544 ...  623  634  630]] 

VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKY-
VLSAAD

In [11]:
blosum[('L','M')]

KeyError: ('L', 'M')

In [10]:
blosum[('M','L')]

2