In [4]:
def backtrack(D, x, y):
    """Backtrack for LCS; returns LCS as string"""
    i, j = len(x), len(y) # start in lower right
    st = []
    while i > 0 and j > 0:
        # get three contributions
        distDiag, distVer, distHor = 0, 0, 0
        if i > 0 and j > 0:
            delta = -1 if x[i-1] == y[j-1] else 1
            distDiag = D[i-1, j-1] + delta
        if i > 0: distVer = D[i-1, j]
        if j > 0: distHor = D[i, j-1]
        if distDiag <= distVer and distDiag <= distHor:
            # diagonal is best, thus, this char is part of LCS
            st.append(x[i-1])
            i -= 1; j -= 1 # move up and left
        elif distVer <= distHor: i-= 1 # vertical is best; move up
        else: j-= 1 # horizontal is best; move left
    # reverse it, then return string-ized LCS
    return (''.join(st))[::-1]

In [5]:
import numpy
def lcsDp(x, y):
    """Longest common subsequence(LCS) of x and y. Uses backtracking with dynamic programming."""
    D = numpy.zeros((len(x) + 1, len(y) + 1), dtype = int)
    for i in xrange(1, len(x) + 1):
        for j in xrange(1, len(y) + 1):
            delta = -1 if x[i-1] == y[j-1] else 1
            distDiag = D[i-1, j-1] + delta
            distHor = D[i, j-1]
            distVer = D[i-1, j]
            D[i,j] = min(distDiag, distHor, distVer)
    return backtrack(D, x, y), D

In [6]:
lcs, D = lcsDp('GCGATC', 'CTGACG')
print 'LCS is:', lcs
print D

LCS is: CGAC
[[ 0  0  0  0  0  0  0]
 [ 0  0  0 -1 -1 -1 -1]
 [ 0 -1 -1 -1 -1 -2 -2]
 [ 0 -1 -1 -2 -2 -2 -3]
 [ 0 -1 -1 -2 -3 -3 -3]
 [ 0 -1 -2 -2 -3 -3 -3]
 [ 0 -1 -2 -2 -3 -4 -4]]


In [31]:
#Read the input data
x, y = (i.strip() for i in open('input/rosalind_ba5c.txt', 'r'))

In [32]:
#Call function
lcs, D = lcsDp(x,y)
lcs

'GAATGAACAAGAGTCAAGGTTACCGCAACGGTGAATGTACCACAAGCTTGCAATTCCGATAGGGGGTGGTATACAGGCCGGTTGTTACCCAATCGCTTAAAATTTAAGCGGCAGTTTCCAACTGGATAACGGACCCGGCACCAGCGCGGTCAAGATAATGCGGTGGGGCGAGGAGGATAAATGGGAGTGGCCCGCGGCCTCACGGTACTCAAACTGTGGGGTATAAACGTCCTTAGAACCGCTTTGCGGGCCGGCCCTGCAGGTCAAATCAGATGCCAAAACGGGCCACATTAACGCCCCAACTTCCGGAACCCTTCTTGCGCGCGAGATATTCCAGGCATACTAGATTCCCGACCGCTGAGGTTGACATAGCTGGGCCTCGGACTCAAAAACTGCCGCGATGAACACGTCACCATCATGAATGTGAATTCAAGACAAAAAGGACCGCATCCTACCCATAGATTCACAGTTACCGCGGGGATCCCAGCCACAAGCAGCTTTAATATATATGAATTAAAAGGTCTTTACGTCTTCGTGGAGTGCCGAACTGAAGTGAGGCCCTCAACCACACTTGGGAGTTGTGCCCGTGTAG'