In [1]:
def multiple_alignment_3(v, w, u):
    '''Returns the alignment of three sequences v, w, and u.'''
    # Initialize the matrices.
    S = [[[0 for k in xrange(len(u)+1)] for j in xrange(len(w)+1)] for i in xrange(len(v)+1)]
    backtrack = [[[0 for k in xrange(len(u)+1)] for j in xrange(len(w)+1)] for i in xrange(len(v)+1)]

    # Fill in the Score and Backtrack matrices.
    for i in xrange(1, len(v)+1):
        for j in xrange(1, len(w)+1):
            for k in xrange(1, len(u)+1):
                scores = [S[i-1][j-1][k-1] + int(v[i-1] == w[j-1] == u[k-1]), S[i-1][j][k], S[i][j-1][k], S[i][j][k-1], S[i-1][j][k-1], S[i][j-1][k-1]]
                backtrack[i][j][k], S[i][j][k] = max(enumerate(scores), key=lambda p: p[1])

    # Quick lambda function to insert indels.
    insert_indel = lambda word, i: word[:i] + '-' + word[i:]

    # Initialize the aligned strings as the input strings.
    v_aligned, w_aligned, u_aligned = v, w, u

    # Get the position of the highest scoring cell in the matrix and the high score.
    i, j, k = len(v), len(w), len(u)
    max_score = S[i][j][k]

    # Backtrack to the edge of the matrix starting at the highest scoring cell.
    while i*j*k != 0:
        if backtrack[i][j][k] == 1:
            i -= 1
            w_aligned = insert_indel(w_aligned, j)
            u_aligned = insert_indel(u_aligned, k)
        elif backtrack[i][j][k] == 2:
            j -= 1
            v_aligned = insert_indel(v_aligned, i)
            u_aligned = insert_indel(u_aligned, k)
        elif backtrack[i][j][k] == 3:
            k -= 1
            v_aligned = insert_indel(v_aligned, i)
            w_aligned = insert_indel(w_aligned, j)
        elif backtrack[i][j][k] == 4:
            i -= 1
            j -= 1
            u_aligned = insert_indel(u_aligned, k)
        elif backtrack[i][j][k] == 5:
            i -= 1
            k -= 1
            w_aligned = insert_indel(w_aligned, j)
        elif backtrack[i][j][k] == 6:
            j -= 1
            k -= 1
            v_aligned = insert_indel(v_aligned, i)
        else:
            i -= 1
            j -= 1
            k -= 1

    # Prepend the necessary preceeding indels to get match lengths.
    while len(v_aligned) != max(len(v_aligned),len(w_aligned),len(u_aligned)):
        v_aligned = insert_indel(v_aligned, 0)
    while len(w_aligned) != max(len(v_aligned),len(w_aligned),len(u_aligned)):
        w_aligned = insert_indel(w_aligned, 0)
    while len(u_aligned) != max(len(v_aligned),len(w_aligned),len(u_aligned)):
        u_aligned = insert_indel(u_aligned, 0)

    return str(max_score), v_aligned, w_aligned, u_aligned

In [14]:
input = [line.strip() for line in open('input/rosalind_ba5m.txt', 'r')]
print input
alignment = multiple_alignment_3(input[0], input[1], input[2])
print '\n'.join(alignment)

['ACGATTTCAGGGGAAGTACCATGTTAG', 'TTTCGGTTACGCGTGTTCGCTATCGAT', 'GTGAGTAATCTTGGACCTCGTGATGG']
14
ACGAT---T--TC--AGG--GGAAGTAC-C-AT-GT--------TAG--
----T---T--TC---GGTT------ACGCG-T-GT--TCGCTATCGAT
---GTGAGTAATCTT-GG--------AC-C--TCGTGA------TGG--


In [16]:
alignment = multiple_alignment_3('ATATCCG', 'TCCGA', 'ATGTACTG')
print '\n'.join(alignment)

3
--AT-ATCCG-
---T---CCGA
ATGTA--CTG-


In [29]:
a='ATATCCG'
b='TCCGA'
c='ATGTACTG'
abc_list=[]
for i in range(len(a)+1):
    abc_list.append([])
    for j in range(len(b)+1):
        abc_list[i].append([])
        for k in range(len(c)+1):
            abc_list[i][j].append(0)
            if i==0 or j ==0 or k==0:
                abc_list[i][j][k]=0
            elif a[i-1]==b[j-1] and b[j-1]==c[k-1]:
                abc_list[i][j][k]=abc_list[i-1][j-1][k-1]+1
            else:
                abc_list[i][j][k]=max(abc_list[i-1][j][k],abc_list[i][j-1][k],abc_list[i][j][k-1],abc_list[i-1][j-1][k],abc_list[i-1][j][k-1],abc_list[i][j-1][k-1])
print abc_list[-1][-1][-1]
i=len(a);j=len(b);k=len(c)
a_list=[];b_list=[];c_list=[]
while i!=0 and j!=0 and k!=0:
    m=max(abc_list[i-1][j][k],abc_list[i][j-1][k],abc_list[i][j][k-1],abc_list[i-1][j-1][k],abc_list[i-1][j][k-1],abc_list[i][j-1][k-1])
    print m
    if a[i-1]==b[j-1] and b[j-1]==c[k-1]:
        a_list.append(a[i-1])
        b_list.append(b[j-1])
        c_list.append(c[k-1])
        i-=1;j-=1;k-=1    
    elif abc_list[i-1][j][k]==m:
        a_list.append(a[i-1])
        b_list.append("-")
        c_list.append('-')
        i-=1
    elif abc_list[i][j-1][k]==m:
        a_list.append('-')
        b_list.append(b[j-1])
        c_list.append('-')
        j-=1
    elif abc_list[i][j][k-1]:
        a_list.append('-')
        b_list.append('-')
        c_list.append(c[k-1])
        k-=1
    elif abc_list[i-1][j-1][k]==m:
        a_list.append(a[i-1])
        b_list.append(b[j-1])
        c_list.append('-')
        i-=1
        j-=1
    elif abc_list[i-1][j][k-1]==m:
        a_list.append(a[i-1])
        b_list.append('-')
        c_list.append(c[k-1])
        i-=1
        k-=1
    elif abc_list[i][j-1][k-1]==m:
        a_list.append('-')
        b_list.append(b[j-1])
        c_list.append(c[k-1])
        j-=1
        k-=1
print a_list
print b_list
print c_list
now_i=i;now_j=j;now_k=k
for l in range(max(i,j,k)):
    if now_i>l:
        a_list.append(a[i-1])
        i-=1
    else:
        a_list.append('-')
    if now_j>l:
        b_list.append(b[j-1])
        j-=1
    else:
        b_list.append('-')
    if now_k>l:
        c_list.append(c[k-1])
        k-=1
    else:
        c_list.append('-')                    
print ''.join(a_list[::-1]) 
print ''.join(b_list[::-1])
print ''.join(c_list[::-1])

3
3
2
2
2
2
1
1
1
1
1
['-', 'G', 'C', '-', '-', 'C', 'T', 'A', '-', 'T']
['A', 'G', '-', 'C', '-', 'C', '-', '-', '-', 'T']
['-', 'G', '-', '-', 'T', 'C', '-', '-', 'A', 'T']
--AT-ATC--CG-
---T---C-C-GA
ATGTA--CT--G-
