## Text Justification by Dynamic Programming
### Youn-Long Lin 2017/3/16

In [1]:
# Greedy Method

def tj_greedy(text, width):
    word_list = text.split()
    i = 0
    while i < len(word_list):
        line = ''
        usage = 0
        while (i < len(word_list) and
               usage < width and 
               len(word_list[i])+usage < width):
            if line != '':
                line = line +  " " + word_list[i]
                usage += len(word_list[i]) + 1
            else:
                line = word_list[i]
                usage += len(word_list[i])
            i += 1
        print ("waste=", width-usage, line)
    return

In [2]:
text = "This is the worst of time. This is the best of time."
tj_greedy(text, 20)

waste= 0 This is the worst of
waste= 3 time. This is the
waste= 7 best of time.


In [4]:
# Brute Force Method; Exponentially Recursive Call

def tj_bf(text, width):
    word_list = text.split()
    return tj_bf_engine(word_list, width, 0, len(word_list)-1)

def tj_bf_engine(wl, width, start, end):
    if start > end:
        return 0
    candidates = []
    usage = 0
    i = start
    while (i <= end and
           usage + len(wl[i]) <= width):
        usage += len(wl[i]) + 1
        candidates.append((i, (width-(usage-1))**2))
        i += 1
    min_cost = min([t[1] + tj_bf_engine(wl, width, t[0]+1, end) 
                for t in candidates])
    print ("Start={} end={} min_cost={}".format(start, end, min_cost))
    return min_cost
    

In [5]:
text = "This is the worst of time. This is the best of time."
tj_bf(text, 20)

Start=11 end=11 min_cost=225
Start=10 end=11 min_cost=144
Start=11 end=11 min_cost=225
Start=9 end=11 min_cost=49
Start=11 end=11 min_cost=225
Start=10 end=11 min_cost=144
Start=11 end=11 min_cost=225
Start=8 end=11 min_cost=9
Start=11 end=11 min_cost=225
Start=10 end=11 min_cost=144
Start=11 end=11 min_cost=225
Start=9 end=11 min_cost=49
Start=11 end=11 min_cost=225
Start=10 end=11 min_cost=144
Start=11 end=11 min_cost=225
Start=7 end=11 min_cost=0
Start=11 end=11 min_cost=225
Start=10 end=11 min_cost=144
Start=11 end=11 min_cost=225
Start=9 end=11 min_cost=49
Start=11 end=11 min_cost=225
Start=10 end=11 min_cost=144
Start=11 end=11 min_cost=225
Start=8 end=11 min_cost=9
Start=11 end=11 min_cost=225
Start=10 end=11 min_cost=144
Start=11 end=11 min_cost=225
Start=9 end=11 min_cost=49
Start=11 end=11 min_cost=225
Start=10 end=11 min_cost=144
Start=11 end=11 min_cost=225
Start=6 end=11 min_cost=130
Start=11 end=11 min_cost=225
Start=10 end=11 min_cost=144
Start=11 end=11 min_cost=225
Sta

34

In [6]:
# Use a dictionary to store found solutions to subproblems

def tj_dict(text, width):
    word_list = text.split()
    d = dict()
    return tj_dict_engine(word_list, width, 0, len(word_list)-1, d)

def tj_dict_engine(wl, width, start, end, d):
    if start > end:
        return 0
    key = str(start)+'-'+str(end)
    if key in d:
        return d[key]
    
    candidates = []
    usage = 0
    i = start
    while (i <= end and
           usage + len(wl[i]) <= width):
        usage += len(wl[i]) + 1
        candidates.append((i, (width-(usage-1))**2))
        i += 1
    min_cost = min([t[1] + tj_dict_engine(wl, width, t[0]+1, end, d) 
                for t in candidates])
    d[key] = min_cost
    print ("key {} and min_cost{}".format(key, min_cost))
    return min_cost

In [7]:
text = "This is the worst of time. This is the best of time."
tj_dict(text, 20)

key 11-11 and min_cost225
key 10-11 and min_cost144
key 9-11 and min_cost49
key 8-11 and min_cost9
key 7-11 and min_cost0
key 6-11 and min_cost130
key 5-11 and min_cost58
key 4-11 and min_cost25
key 3-11 and min_cost1
key 2-11 and min_cost122
key 1-11 and min_cost83
key 0-11 and min_cost34


34

In [9]:
# Looping over a 1d array

def usage(wl, start, end):
    word_use = 0
    for i in range(start, end+1):
        word_use += len(wl[i])
    word_use += end- start
    return word_use

def tj_loop(text, width):
    word_list = text.split()
    N = len(word_list) 
    sol_list = [-99999 for i in range(N)]
    sol_list.append(0)
    for i in range(N-1, -1, -1):    
        j = i
        curr_cost = 0
        min_cost = 99999
        while (j <= N-1 and usage(word_list, i, j) <= width):
            curr_cost = (width-usage(word_list, i, j))**2 + sol_list[j+1]
            if curr_cost < min_cost:
                min_cost = curr_cost
            j += 1
        sol_list[i] = min_cost
    return sol_list
            
        

In [10]:
text = "This is the worst of time. This is the best of time."
tj_loop(text, 20)

[34, 83, 122, 1, 25, 58, 130, 0, 9, 49, 144, 225, 0]