In [1]:
import numpy as np
# w is an array containing words
def textJustification(w: np.array, pageWidth: int) -> (np.array, np.array):
    # we could also use np.empty but it initialize the matrix to "random" values 
    # that should be treated carefully
    badness = np.zeros([len(w), len(w)], dtype=float) # we use float to handle infinity
    # range creates an iterator going from 0 to len(w)-1 <-- diff to pseudocode
    for i in range(0,len(w)):
        badness[i,i] = pageWidth-len(w[i])
        for j in range(i+1, len(w)):
            badness[i,j] = badness[i, j-1] - len(w[j]) - 1
    for i in range(0, len(w)):
        for j in range(i, len(w)):
            if badness[i,j]<0:
                badness[i,j] = np.inf
            else:
                badness[i,j] = badness[i,j]**2

    # let's initialize the minCost and cIndex arrays
    minCost = np.zeros(len(w), dtype=float) 
    cIndex = np.zeros(len(w), dtype=float) 
    
    for i in reversed(range(0, len(w))): 
        minCost[i] = badness[i, len(w) - 1]
        cIndex[i] = len(w) # careful here.
        for j in reversed(range(i+1,len(w))):
            if badness[i,j-1] != np.inf:
                if (minCost[i] > badness[i, j-1] + minCost[j]):
                    minCost[i] = badness[i, j-1] + minCost[j]
                    cIndex[i] = j
    
    return minCost, cIndex

In [2]:
def printJustifiedText(w: np.array, pageWidth: int) -> None:
    (minCosts, index) = textJustification(w, pageWidth)
    i = 0
    # this is needed because there is no do-while in Python
    while(True):
        j = int(index[i])
        for k in range(i, j): 
            if k != j-1:
                print(w[k], end =" ")
            else:
                print(w[k], end ="")
        print("")
        i = j
        if not(j < len(w)):
            break
        

In [3]:
w = np.array(["diamonds", "are", "girls", "best", "friends"])

(a,b) = textJustification(w,12)
print(a,b)
printJustifiedText(w, 12)


[25.  9. 29.  0. 25.] [1. 3. 4. 5. 5.]
diamonds
are girls
best friends


In [4]:
def fileTokenizer(filePath: str) -> np.array:
    tokens = [] 
    with open(filePath) as f:
        for line in f.readlines():
            tokens += line.split()
    return np.array(tokens)


In [5]:
import os
def writeJustifiedTextfile(filepath: str, pageWidth: int):
    # read, parse and return an array of word tokens
    w = fileTokenizer(filepath)
    # new file
    filepathnew = os.path.splitext(filepath)[0] + "_justified" + os.path.splitext(filepath)[1]
    
    (minCosts, index) = textJustification(w, pageWidth)
    with open(filepathnew, 'w') as f:
        i = 0
        # this is needed because there is no do-while in Python
        j = int(index[0])
        while(j < len(w)):
            j = int(index[i])
            for k in range(i, j): 
                if k != j-1:
                    f.write(w[k] + " ")
                else:
                    f.write(w[k])
            f.write("\n")
            i = j

In [6]:

def optWriteJustifiedTextfile(filePath: str, pageWidth: int):    
    filepathnew = os.path.splitext(filepath)[0] + "_justified_opt" + os.path.splitext(filepath)[1]
    with open(filePath, 'r') as f, open(filepathnew, 'w') as fnew:
        for line in f.readlines():
            w = np.array(line.split())
            if(len(w) != 0):
                (minCosts, index) = textJustification(w, pageWidth)
                i = 0
                # this is a workaround to emulate the do-while loop in Python
                while(True):
                    j = int(index[i])
                    for k in range(i, j): 
                        if k != j-1:
                            fnew.write(w[k] + " ")
                        else:
                            fnew.write(w[k])
                    fnew.write("\n")
                    i = j
                    if not(j < len(w)):
                        break
            else:
                fnew.write("\n")


    

In [None]:
start_time = time.time()
writeJustifiedTextfile(filepath, 20)
print("--- basic version %s seconds ---" % (time.time() - start_time))

In [10]:
import time
filepath = "C:\\users\\alvis\\Desktop\\mobydick.txt"




start_time = time.time()
optWriteJustifiedTextfile(filepath, 30)
print("--- optimized version: %s seconds ---" % (time.time() - start_time))



--- optimized version: 3.1481986045837402 seconds ---
