In [1]:
import numpy as np
f = open('input/blosum62_affine.txt', 'r')
score = [line.strip().split() for line in f]
alphabet = score[0]
blosum62 = np.array([i[1:] for i in score[1:]], dtype = np.float64)

In [2]:
def affine_globalAlign(x, y, score_matrix, sigma, epsilon):
    """Returns three levels for global alignment graph with affine gap penalty. 
    Affine penalty for a gap of length k as σ + ε · (k − 1).
    sigma - gap opening penalty; epsilon- gap extension penalty"""
    
    #Initialize the numpy array
    lower = np.zeros((len(x) + 1, len(y) + 1), dtype = np.float64)
    middle = np.zeros((len(x) + 1, len(y) + 1), dtype = np.float64)
    upper = np.zeros((len(x) + 1, len(y) + 1), dtype = np.float64)
    
    #Fill in the first column of three levels
    for i in xrange(1, len(x) + 1):
        lower[i,0] = -sigma -(i-1)*epsilon
        middle[i,0] = -sigma -(i-1)*epsilon
        upper[i,0] = -float('inf')
        
    #Fill in the first row of three levels
    for j in xrange(1, len(y) + 1):
        lower[0,j] = -float('inf')
        middle[0,j] = -sigma -(j-1)*epsilon
        upper[0,j] = -sigma -(j-1)*epsilon
    
    # Middle level- diagonal edges of weight score(x_i, y_j) representing matches and mismatches. 
    #Lower level- only vertical edges with weight −ε to represent gap extensions in x
    #Upper level- only horizontal edges with weights −ε to represent gap extensions in y
    for i in xrange(1, len(x)+1):
        for j in xrange(1, len(y)+1):
            lower[i, j] = max(lower[i-1, j] - epsilon, middle[i-1, j] - sigma)
            upper[i, j] = max(upper[i, j-1] - epsilon, middle[i, j-1] - sigma)
            middle[i, j] = max(lower[i, j], 
                               middle[i-1, j-1] + score_matrix[alphabet.index(x[i-1]), alphabet.index(y[j-1])], 
                               upper[i, j])
                  
    return lower, middle, upper

In [4]:
x = 'PRTEINS'
y = 'PRTWPSEIN'
lower, middle, upper = affine_globalAlign(x, y, blosum62, 11, 1)
print lower
print middle
print upper

[[  0. -inf -inf -inf -inf -inf -inf -inf -inf -inf]
 [-11. -22. -23. -24. -25. -26. -27. -28. -29. -30.]
 [-12.  -4. -15. -16. -17. -18. -19. -20. -21. -22.]
 [-13.  -5.   1. -10. -11. -12. -13. -14. -15. -16.]
 [-14.  -6.   0.   6.  -5.  -6.  -7.  -8.  -9. -10.]
 [-15.  -7.  -1.   5.   3.  -6.  -6.  -2. -10.  -9.]
 [-16.  -8.  -2.   4.   2.   0.  -7.  -3.   2.  -9.]
 [-17.  -9.  -3.   3.   1.  -1.   1.  -4.   1.   8.]]
[[  0. -11. -12. -13. -14. -15. -16. -17. -18. -19.]
 [-11.   7.  -4.  -5.  -6.  -7.  -8.  -9. -10. -11.]
 [-12.  -4.  12.   1.   0.  -1.  -2.  -3.  -4.  -5.]
 [-13.  -5.   1.  17.   6.   5.   4.   3.   2.   1.]
 [-14.  -6.   0.   6.  14.   5.   5.   9.   0.   2.]
 [-15.  -7.  -1.   5.   3.  11.   3.   2.  13.   2.]
 [-16.  -8.  -2.   4.   2.   1.  12.   3.   2.  19.]
 [-17.  -9.  -3.   3.   1.   1.   5.  12.   1.   8.]]
[[  0. -11. -12. -13. -14. -15. -16. -17. -18. -19.]
 [-inf -22.  -4.  -5.  -6.  -7.  -8.  -9. -10. -11.]
 [-inf -23. -15.   1.   0.  -1.  -2.  -3.  -

In [22]:
import numpy as np
def traceback(lower, middle, upper, x, y, score_matrix, sigma, epsilon):
    """Traceback in the three level matrix to get the global alignment with affine gap penalty"""
    #get i,j for maximal cell
    i, j = len(x), len(y)
    alx, aly = [], []
    traceback = middle #Start traceback in the middle
    while i*j != 0:
        if traceback is middle:
            ls = lower[i,j]
            us = upper[i,j]
            ms = middle[i-1, j-1] + score_matrix[alphabet.index(x[i-1]), alphabet.index(y[j-1])]
            max_score = max(ls, us, ms)
            if max_score == ms:
                alx.append(x[i-1]); aly.append(y[j-1])
                i -= 1; j -= 1 
                traceback = middle
            elif max_score == ls:
                traceback = lower
            else:
                traceback = upper
        
        elif traceback is lower:
            ls = lower[i-1, j] - epsilon
            ms = middle[i-1, j] - sigma
            max_score = max(ls, ms)
            if max_score == ls:
                alx.append(x[i-1]); aly.append('-')
                i -= 1
                traceback = lower
            else:
                alx.append(x[i-1]); aly.append('-')
                i -= 1
                traceback = middle
                
        elif traceback is upper:
            us = upper[i, j-1] - epsilon
            ms = middle[i, j-1] - sigma
            max_score = max(us, ms)
            if max_score == us:
                alx.append('-'); aly.append(y[j-1]) 
                j -= 1
                traceback = upper
            else:
                alx.append('-'); aly.append(y[j-1])
                j -= 1
                traceback = middle
                
    alignment = map(lambda x: ''.join(x), [alx[::-1], aly[::-1]])
    return alignment

In [26]:
x = 'PRTEINS'
y = 'PRTWPSEIN'
lower, middle, upper = affine_globalAlign(x, y, blosum62, 11, 1)
algn = traceback(lower, middle, upper, x, y, blosum62, 11, 1)
print '\n'.join(algn)

PRT---EINS
PRTWPSEIN-


In [27]:
x = 'AHRQPQ'
y = 'AHED'
lower, middle, upper = affine_globalAlign(x, y, blosum62, 11, 1)
algn = traceback(lower, middle, upper, x, y, blosum62, 11, 1)
print '\n'.join(algn)

AHRQPQ
AHE--D


In [28]:
x, y = [i.strip() for i in open('input/dataset_249_8.txt', 'r')]
lower, middle, upper = affine_globalAlign(x, y, blosum62, 11, 1)
algn = traceback(lower, middle, upper, x, y, blosum62, 11, 1)
print int(middle[len(x), len(y)])
print '\n'.join(algn)

IVSPGIENID---HLEIKAIRWGWEPRIMTAWKGQQMYRQPFSSFFVPPRRSLQQEW----PH-NWAHHS-RIMEQSINAICG
IVSPGIENINQYGHLEHK---WIWEPRIMTAWKGQQMYRQPFSSFFVPPRRSLQQEWHWWPTHINWAHHSNQLMEQSINAICV


In [32]:
x, y = [i.strip() for i in open('input/rosalind_ba5j.txt', 'r')]
lower, middle, upper = affine_globalAlign(x, y, blosum62, 11, 1)
algn = traceback(lower, middle, upper, x, y, blosum62, 11, 1)
print int(middle[len(x), len(y)])
print '\n'.join(algn)

332
WHHKILVADTRLRYVPAIMDDMGVRHFDWMQYNMQMHQNDFNPRHQNKRKKCSWNRIKPNWFGQNGSRKHLKQWDDI---LFNIKTWRSTIAM
WHHKIKVADTRLRYVPAIMDDMGVRHFHWMQYCMQMHQNDFNPR----RIKCSWNRIKPNCTDIN----CFGCWDNMVMDLFNIKTWRSTIAM
