In [14]:
import cvxpy as cvx
import numpy as np
import timeit
from ios import *

In [137]:
bigram = get_bigram('data/processed/bigram.npy')
plainseq = get_plaintext('408plaincleaned')
plainseq = tointseq(plainseq)
plainmat = np.reshape(plainseq, (24, 17))

def toCharMat(intMat, tour=None):
    res = np.chararray(intMat.shape, unicode=True)
    if tour is None:
        for i in range(intMat.shape[1]):
            res[:,i] = tocharseq(intMat[:,i])
    else:
        for i,j in enumerate(tour):
            res[:,i] = tocharseq(intMat[:,j])
    return res   

charmat = toCharMat(plainmat)
print(charmat)

[['I' 'L' 'I' 'K' 'E' 'K' 'I' 'L' 'L' 'I' 'N' 'G' 'P' 'E' 'O' 'P' 'L']
 ['E' 'B' 'E' 'C' 'A' 'U' 'S' 'E' 'I' 'T' 'I' 'S' 'S' 'O' 'M' 'U' 'C']
 ['H' 'F' 'U' 'N' 'I' 'T' 'I' 'S' 'M' 'O' 'R' 'E' 'F' 'U' 'N' 'T' 'H']
 ['A' 'N' 'K' 'I' 'L' 'L' 'I' 'N' 'G' 'W' 'I' 'L' 'D' 'G' 'A' 'M' 'E']
 ['I' 'N' 'T' 'H' 'E' 'F' 'O' 'R' 'R' 'E' 'S' 'T' 'B' 'E' 'C' 'A' 'U']
 ['S' 'E' 'M' 'A' 'N' 'I' 'S' 'T' 'H' 'E' 'M' 'O' 'S' 'T' 'D' 'A' 'N']
 ['G' 'E' 'R' 'O' 'U' 'S' 'A' 'N' 'I' 'M' 'A' 'L' 'O' 'F' 'A' 'L' 'L']
 ['T' 'O' 'K' 'I' 'L' 'L' 'S' 'O' 'M' 'E' 'T' 'H' 'I' 'N' 'G' 'G' 'I']
 ['V' 'E' 'S' 'M' 'E' 'T' 'H' 'E' 'M' 'O' 'S' 'T' 'T' 'H' 'R' 'I' 'L']
 ['L' 'I' 'N' 'G' 'E' 'X' 'P' 'E' 'R' 'E' 'N' 'C' 'E' 'I' 'T' 'I' 'S']
 ['E' 'V' 'E' 'N' 'B' 'E' 'T' 'T' 'E' 'R' 'T' 'H' 'A' 'N' 'G' 'E' 'T']
 ['T' 'I' 'N' 'G' 'Y' 'O' 'U' 'R' 'R' 'O' 'C' 'K' 'S' 'O' 'F' 'F' 'W']
 ['I' 'T' 'H' 'A' 'G' 'I' 'R' 'L' 'T' 'H' 'E' 'B' 'E' 'S' 'T' 'P' 'A']
 ['R' 'T' 'O' 'F' 'I' 'T' 'I' 'S' 'T' 'H' 'A' 'T' 'W' 'H' 'E' 'N' 'I']
 ['D' 

In [138]:
# column permutation and revsersible mapping

perm = np.random.permutation(17)
perm_plainmat = plainmat[:,perm]
# print(toCharMat(perm_plainmat, perm=perm))

mapback = dict()
for i,j in enumerate(perm):
    mapback[i] = j

In [139]:
def col2col(a,b):
    logp = 0
    for i,j in zip(a,b):
        logp += np.log(bigram[i,j])
    return logp

def colscore(mat):
    score = 0
    for i in range(mat.shape[1] - 1):
        score += col2col(mat[:,i], mat[:,i+1])
    return score

In [140]:
# gold key score
colscore(plainmat)

-991.3825477818259

In [141]:
# construct score matrix for the permuted character matrix

A = np.zeros((18, 18))
for i in range(18):
    for j in range(18):
        if i == 17 or j == 17:
            A[i,j] = 0
        else:
            A[i,j] = col2col(perm_plainmat[:,i], perm_plainmat[:,j])
        
# AA = np.zeros([18, 18], dtype=float)
# AA[1:,1:] = A
# A = AA
# n = 18

n = 18

In [142]:
def subtour(B):
    """
    helper function: return subtour from a boolean matrix B
    """
    node = 0
    subt = [node]
    while True:
        for j in range(n):
            #print (B[subt[-1], j])
            if B[subt[-1],j] > 0.99:
                if j not in subt:
                    subt.append(j)
                else:
                    return subt

In [148]:
"""
Approach 1: MTZ subtour elimination constraint
"""

# boolean matrix, indicating the trip
B = cvx.Bool(n,n)

# exemplary matrix
C = np.ones((1,n), dtype=int)

# auxiliary var
# u = cvx.Variable(n, integer=True)
u = cvx.Int(n)

# objective
obj = cvx.Maximize(sum([A[i,:]*B[:,i] for i in range(n)]))


# basic condition
constraints = [(cvx.sum_entries(B, axis=0) == C), (cvx.sum_entries(B, axis=1) == C.transpose())]

# subtour elimination
for i in range(1,n):
    for j in range(1,n):
        if i != j:
            constraints.append(u[i] - u[j] + n*B[i,j] <= n - 1)
            
# condition for u
for i in range(1,n):
    constraints.append(u[i] >= 0)
    constraints.append(u[i] <= n - 1)
    
st = timeit.default_timer()
prob = cvx.Problem(obj, constraints)

# Time performance:

opt = prob.solve(solver=cvx.GLPK_MI)

# Print results
print ("Minimal time: ", opt)
print ("Optimal tour: ", subtour(B.value))
print ("Converge time: ", timeit.default_timer() - st)


Exception: The 'maximize' objective must resolve to a scalar.

In [147]:
# re-map and print
tour = subtour(B.value)
dummy_node_idx = tour.index(17)
tour = tour[dummy_node_idx+1:] + tour[:dummy_node_idx]
print(tour)
proposal = perm_plainmat[:,tour]
print(proposal)
colscore(proposal)


[8, 10, 4, 3, 5, 1, 7, 13, 6, 11, 2, 14, 0, 12, 15, 9, 16]
[[11 15 14  4 15  6 13  8 11 11  8 10  4 10  8 11  8]
 [ 2 20 12 14 18 18  8 19  8  4 18 20  0  2  4  1  4]
 [ 7 19 13 20  5  4 17 14 12 18  8 19  8 13 20  5  7]
 [ 4 12  0  6  3 11  8 22  6 13  8 11 11  8 10 13  0]
 [20  0  2  4  1 19 18  4 17 17 14  5  4  7 19 13  8]
 [13  0  3 19 18 14 12  4  7 19 18  8 13  0 12  4 18]
 [11 11  0  5 14 11  0 12  8 13  0 18 20 14 17  4  6]
 [ 8  6  6 13  8  7 19  4 12 14 18 11 11  8 10 14 19]
 [11  8 17  7 19 19 18 14 12  4  7 19  4 12 18  4 21]
 [18  8 19  8  4  2 13  4 17  4 15 23  4  6 13  8 11]
 [19  4  6 13  0  7 19 17  4 19 19  4  1 13  4 21  4]
 [22  5  5 14 18 10  2 14 17 17 20 14 24  6 13  8 19]
 [ 0 15 19 18  4  1  4  7 19 11 17  8  6  0  7 19  8]
 [ 8 13  4  7 22 19  0  7 19 18  8 19  8  5 14 19 17]
 [ 8 13 17 14  1  4 17  4  1 11 11  8 22  8  4  8  3]
 [ 7 19 11 11  0  3 13  0  4  2  8  3  0 17  0 15 13]
 [ 1 11 11  8 22  3  4 11 11  8 10  4 21  0  7  8  4]
 [11  8 22  8 18  4 21 

-1201.1959859040594

In [36]:
"""
Approach 2: Lazy subtour elimination
"""

# boolean matrix, indicating the trip
B = cvx.Bool(n,n)

# exemplary matrix
C = np.ones((1,n), dtype=int)

# objective
obj = cvx.Minimize(sum([A[i,:]*B[:,i] for i in range(n)]))

# basic condition
constraints = [(cvx.sum_entries(B, axis=0) == C), (cvx.sum_entries(B, axis=1) == C.transpose())]

# preliminary solution, which might involve subtours
prob = cvx.Problem(obj, constraints)
st = timeit.default_timer()
opt = prob.solve()

# while True:
#     subt = subtour(B.value)
#     if len(subt) == n:
#         print ("Minimal time: ", opt)
#         print ("Optimal tour: ", subt)
#         print ("Converge time: ", timeit.default_timer() - st)
#         break
#     else:
#         print ("Try: ", subt)
#         nots = [j for j in range(n) if j not in subt]
#         constraints.append(sum(B[i,j] for i in subt for j in nots) >= 1)
#         prob = cvx.Problem(obj, constraints)
#         opt = prob.solve(solver=cvx.GLPK_MI)

In [37]:
B.value

matrix([[9.99999997e-01, 1.08968720e-10, 1.50885147e-10, 1.35379399e-09,
         7.24304750e-11, 9.23005124e-11, 1.66271375e-10, 1.02878158e-10,
         8.74324951e-11, 6.90602468e-11, 9.38697053e-11, 3.02445817e-11,
         6.03897949e-11, 7.24266894e-11, 1.12347878e-10, 8.47974977e-11,
         1.14180070e-10],
        [1.07016605e-10, 9.99999998e-01, 5.30375920e-11, 4.27324159e-11,
         1.08091065e-10, 5.75327717e-11, 6.31866911e-11, 6.63936830e-11,
         1.07180177e-10, 6.68407713e-11, 1.04969196e-09, 6.47728835e-11,
         1.14642196e-10, 6.97352608e-11, 8.01425994e-11, 6.45545222e-11,
         6.30278202e-11],
        [1.38614245e-10, 8.89906980e-11, 9.99999997e-01, 1.70909650e-10,
         1.20425320e-10, 9.52899085e-11, 5.45364363e-11, 1.05685931e-10,
         5.02997406e-11, 9.23422070e-11, 8.01234848e-11, 7.74441529e-11,
         8.80059879e-11, 1.00198824e-10, 3.61054977e-11, 2.01817976e-09,
         9.68426469e-11],
        [8.72545119e-11, 1.23086498e-10, 3.625