Permalink
Switch branches/tags
Nothing to show
Find file Copy path
4e2707f Apr 15, 2011
1 contributor

Users who have contributed to this file

167 lines (141 sloc) 5.29 KB
#This software is a free software. Thus, it is licensed under GNU General Public License.
#Python implementation to Smith-Waterman Algorithm for Homework 1 of Bioinformatics class.
#Forrest Bao, Sept. 26 <http://fsbao.net> <forrest.bao aT gmail.com>
# zeros() was origianlly from NumPy.
# This version is implemented by alevchuk 2011-04-10
def zeros(shape):
retval = []
for x in range(shape[0]):
retval.append([])
for y in range(shape[1]):
retval[-1].append(0)
return retval
match_award = 10
mismatch_penalty = -5
gap_penalty = -5 # both for opening and extanding
def match_score(alpha, beta):
if alpha == beta:
return match_award
elif alpha == '-' or beta == '-':
return gap_penalty
else:
return mismatch_penalty
def finalize(align1, align2):
align1 = align1[::-1] #reverse sequence 1
align2 = align2[::-1] #reverse sequence 2
i,j = 0,0
#calcuate identity, score and aligned sequeces
symbol = ''
found = 0
score = 0
identity = 0
for i in range(0,len(align1)):
# if two AAs are the same, then output the letter
if align1[i] == align2[i]:
symbol = symbol + align1[i]
identity = identity + 1
score += match_score(align1[i], align2[i])
# if they are not identical and none of them is gap
elif align1[i] != align2[i] and align1[i] != '-' and align2[i] != '-':
score += match_score(align1[i], align2[i])
symbol += ' '
found = 0
#if one of them is a gap, output a space
elif align1[i] == '-' or align2[i] == '-':
symbol += ' '
score += gap_penalty
identity = float(identity) / len(align1) * 100
print 'Identity =', "%3.3f" % identity, 'percent'
print 'Score =', score
print align1
print symbol
print align2
def needle(seq1, seq2):
m, n = len(seq1), len(seq2) # length of two sequences
# Generate DP table and traceback path pointer matrix
score = zeros((m+1, n+1)) # the DP table
# Calculate DP table
for i in range(0, m + 1):
score[i][0] = gap_penalty * i
for j in range(0, n + 1):
score[0][j] = gap_penalty * j
for i in range(1, m + 1):
for j in range(1, n + 1):
match = score[i - 1][j - 1] + match_score(seq1[i-1], seq2[j-1])
delete = score[i - 1][j] + gap_penalty
insert = score[i][j - 1] + gap_penalty
score[i][j] = max(match, delete, insert)
# Traceback and compute the alignment
align1, align2 = '', ''
i,j = m,n # start from the bottom right cell
while i > 0 and j > 0: # end toching the top or the left edge
score_current = score[i][j]
score_diagonal = score[i-1][j-1]
score_up = score[i][j-1]
score_left = score[i-1][j]
if score_current == score_diagonal + match_score(seq1[i-1], seq2[j-1]):
align1 += seq1[i-1]
align2 += seq2[j-1]
i -= 1
j -= 1
elif score_current == score_left + gap_penalty:
align1 += seq1[i-1]
align2 += '-'
i -= 1
elif score_current == score_up + gap_penalty:
align1 += '-'
align2 += seq2[j-1]
j -= 1
# Finish tracing up to the top left cell
while i > 0:
align1 += seq1[i-1]
align2 += '-'
i -= 1
while j > 0:
align1 += '-'
align2 += seq2[j-1]
j -= 1
finalize(align1, align2)
def water(seq1, seq2):
m, n = len(seq1), len(seq2) # length of two sequences
# Generate DP table and traceback path pointer matrix
score = zeros((m+1, n+1)) # the DP table
pointer = zeros((m+1, n+1)) # to store the traceback path
max_score = 0 # initial maximum score in DP table
# Calculate DP table and mark pointers
for i in range(1, m + 1):
for j in range(1, n + 1):
score_diagonal = score[i-1][j-1] + match_score(seq1[i-1], seq2[j-1])
score_up = score[i][j-1] + gap_penalty
score_left = score[i-1][j] + gap_penalty
score[i][j] = max(0,score_left, score_up, score_diagonal)
if score[i][j] == 0:
pointer[i][j] = 0 # 0 means end of the path
if score[i][j] == score_left:
pointer[i][j] = 1 # 1 means trace up
if score[i][j] == score_up:
pointer[i][j] = 2 # 2 means trace left
if score[i][j] == score_diagonal:
pointer[i][j] = 3 # 3 means trace diagonal
if score[i][j] >= max_score:
max_i = i
max_j = j
max_score = score[i][j];
align1, align2 = '', '' # initial sequences
i,j = max_i,max_j # indices of path starting point
#traceback, follow pointers
while pointer[i][j] != 0:
if pointer[i][j] == 3:
align1 += seq1[i-1]
align2 += seq2[j-1]
i -= 1
j -= 1
elif pointer[i][j] == 2:
align1 += '-'
align2 += seq2[j-1]
j -= 1
elif pointer[i][j] == 1:
align1 += seq1[i-1]
align2 += '-'
i -= 1
finalize(align1, align2)