forked from dnase/affine-gap-sequence-alignment
-
Notifications
You must be signed in to change notification settings - Fork 0
/
alignment.py
90 lines (78 loc) · 2.17 KB
/
alignment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from pprint import pprint
global S
global E
global match
global mismatch
global MIN
S = -10.
E = -0.5
match = 1.
mismatch = -4.
MIN = -float("inf")
#return match or mismatch score
def _match(s, t, i, j):
if t[i-1] == s[j-1]:
return match
else:
return mismatch
#initializers for matrices
def _init_x(i, j):
if i > 0 and j == 0:
return MIN
else:
if j > 0:
return -10 + (-0.5 * j)
else:
return 0
def _init_y(i, j):
if j > 0 and i == 0:
return MIN
else:
if i > 0:
return -10 + (-0.5 * i)
else:
return 0
def _init_m(i, j):
if j == 0 and i == 0:
return 0
else:
if j == 0 or i == 0:
return MIN
else:
return 0
def _format_tuple(inlist, i, j):
return 0
def distance_matrix(s, t):
dim_i = len(t) + 1
dim_j = len(s) + 1
#abuse list comprehensions to create matrices
X = [[_init_x(i, j) for j in range(0, dim_j)] for i in range(0, dim_i)]
Y = [[_init_y(i, j) for j in range(0, dim_j)] for i in range(0, dim_i)]
M = [[_init_m(i, j) for j in range(0, dim_j)] for i in range(0, dim_i)]
for j in range(1, dim_j):
for i in range(1, dim_i):
X[i][j] = max((S + E + M[i][j-1]), (E + X[i][j-1]), (S + E + Y[i][j-1]))
Y[i][j] = max((S + E + M[i-1][j]), (S + E + X[i-1][j]), (E + Y[i-1][j]))
M[i][j] = max(_match(s, t, i, j) + M[i-1][j-1], X[i][j], Y[i][j])
return [X, Y, M]
def backtrace(s, t, X, Y, M):
sequ1 = ''
sequ2 = ''
i = len(t)
j = len(s)
while (i>0 or j>0):
if (i>0 and j>0 and M[i][j] == M[i-1][j-1] + _match(s, t, i, j)):
sequ1 += s[j-1]
sequ2 += t[i-1]
i -= 1; j -= 1
elif (i>0 and M[i][j] == Y[i][j]):
sequ1 += '_'
sequ2 += t[i-1]
i -= 1
elif (j>0 and M[i][j] == X[i][j]):
sequ1 += s[j-1]
sequ2 += '_'
j -= 1
sequ1r = ' '.join([sequ1[j] for j in range(-1, -(len(sequ1)+1), -1)])
sequ2r = ' '.join([sequ2[j] for j in range(-1, -(len(sequ2)+1), -1)])
return [sequ1r, sequ2r]