In [2]:
import numpy as np
import numpy.linalg as la
import scipy.linalg as sla
import tensors.synthetic_tensors as synthetic_tensors
import backend.numpy_ext as tenpy
from CPD.common_kernels import get_residual, compute_lin_sys

import matplotlib.pyplot as plt

# Naive NLS Implementation

In [3]:
def jacobian3(A):
    J = np.zeros((Q,order*s*R))
    for i in range(order):
        offset1 = i*s*R
        for j in range(R):
            offset2 = j*s
            start = offset1+offset2
            end = start + s
            if i==0:
                J[:,start:end] = np.kron(np.identity(s),np.kron(A[1][:,j],A[2][:,j])).T
            elif i==1:
                J[:,start:end] = np.kron(A[0][:,j],np.kron(np.identity(s),A[2][:,j])).T
            elif i==2:
                J[:,start:end] = np.kron(A[0][:,j],np.kron(A[1][:,j],np.identity(s))).T
    return J

def F(T,A):
    f = (T - np.einsum("ir,jr,kr->ijk",A[0],A[1],A[2])).reshape(-1)
    return f


In [4]:
def gradient(T,A):
    g = np.zeros(order*s*R)
    G = []
    
    TC = tenpy.einsum("ijk,ka->ija",T,A[2])
    M1 = tenpy.einsum("ija,ja->ia",TC,A[1])
    G1 = -1*M1 + np.dot(A[0],compute_lin_sys(tenpy,A[1],A[2],0))
    G.append(G1)
    
    M2 = tenpy.einsum("ija,ia->ja",TC,A[0])
    G2 = -1*M2 + np.dot(A[1],compute_lin_sys(tenpy,A[0],A[2],0))
    G.append(G2)
    
    M3 = tenpy.einsum("ijk,ia,ja->ka",T,A[0],A[1])
    G3 = -1*M3 + np.dot(A[2],compute_lin_sys(tenpy,A[0],A[1],0))
    G.append(G3)
    
    for i in range(order):
        offset1 = i*s*R
        for j in range(R):
            offset2 = j*s
            start = offset1 + offset2
            end = start + s
            g[start:end] = G[i][:,j]
    
    return g

In [5]:
def flatten_A(A):
    a = np.zeros(order*R*s)
    for i in range(order):
        offset1 = i*s*R
        for j in range(R):
            offset2 = j*s
            start = offset1+offset2
            end = start+s
            a[start:end] = A[i][:,j]
    return a

def update_A(A,x):
    for i in range(order):
        offset1 = i*s*R
        for j in range(R):
            offset2 = j*s
            start = offset1+offset2
            end = start+s
            A[i][:,j] += x[start:end]

# Fast NLS Implementation with Block Matrix-Vector Multiplication

In [5]:
# fast Hessian approximation by Gauss-Newton
def compute_coeff(G,n1,r1,n2,r2):
    return np.prod([G[i][r1,r2] for i in range(len(G)) if i!=n1 and i!=n2])
        
def compute_block(A,G,n1,r1,n2,r2):
    if n1 == n2:
        return compute_coeff(G,n1,r1,n2,r2)*np.identity(A[0].shape[0])
    else:
        return compute_coeff(G,n1,r1,n2,r2)*np.outer(A[n1][:,r2],A[n2][:,r1])

def fast_hessian3(A):
    G1 = A[0].T.dot(A[0])
    G2 = A[1].T.dot(A[1])
    G3 = A[2].T.dot(A[2])
    G = [G1,G2,G3]
    N = order*s*R
    hessian = np.zeros((N,N))
    
    for n1 in range(order):
        for r1 in range(R):
            startv = n1*R*s + r1*s
            endv = startv + s
            for n2 in range(order):
                for r2 in range(R):
                    starth = n2*R*s + r2*s
                    endh = starth + s
                    hessian[startv:endv,starth:endh] = compute_block(A,G,n1,r1,n2,r2)
    return hessian

def compute_result_block(A,G,n1,r1,n2,r2,x):
    if n1==n2:
        return compute_coeff(G,n1,r1,n2,r2)*x
    else:
        s = compute_coeff(G,n1,r1,n2,r2)*np.inner(A[n2][:,r1],x)
        return s*A[n1][:,r2]

def fast_hessian3_mult(A,x,regu=1):
    ret = regu*x
    G = []
    for i in range(len(A)):
        G.append(A[i].T.dot(A[i]))
    
    for n1 in range(order):
        for r1 in range(R):
            startv = n1*R*s + r1*s
            endv = startv + s
            for n2 in range(order):
                for r2 in range(R):
                    starth = n2*R*s + r2*s
                    endh = starth + s
                    ret[startv:endv] += compute_result_block(A,G,n1,r1,n2,r2,x[starth:endh])
    return ret


In [6]:
def conjugate_gradient(A,x,b,tol=1e-5):
    r = b - A.dot(x)
    if la.norm(r)<tol:
        return x
    p = r
    counter = 0
    while True:
        alpha = np.inner(r,r)/np.inner(p,A.dot(p))
        x += alpha*p
        r_new = r - alpha*A.dot(p)
        if la.norm(r_new)<tol:
            break
        beta = np.inner(r_new,r_new)/np.inner(r,r)
        p = r_new + beta*p
        r = r_new
        counter += 1
    print("conjugate gradient took ",counter," iteration(s).")
    return x,counter

def preconditioned_conjugate_gradient(A,x,b,M,tol=1e-5,formula="PR"):
    r = b - A.dot(x)
    if la.norm(r)<tol:
        return x
    z = M.dot(r)
    p = z
    counter = 0
    while True:
        alpha = np.inner(r,z)/np.inner(p,A.dot(p))
        x += alpha*p
        r_new = r - alpha*A.dot(p)
        if la.norm(r_new)<tol: ## need to add max iteration
            break
        z_new = M.dot(r_new)
        if formula == "PR":
            beta = np.inner(z_new,r_new-r)/np.inner(z,r)
        else:
            beta = np.inner(z_new,r_new)/np.inner(z,r)
        p = z_new + beta*p
        r = r_new
        z = z_new
        counter += 1
    print("conjugate gradient took ",counter," iteration(s).")
    return x,counter


def fast_conjugate_gradient():
    return 

In [7]:
def naive_block_preconditioner(H,order,stride):
    M = np.zeros(H.shape)
    for i in range(order):
        start = i*stride
        end = start + stride
        M[start:end,start:end] = H[start:end,start:end]
    L = la.cholesky(M)
    Y = sla.solve_triangular(L,np.identity(H.shape[0]),trans=0,lower=True)
    X = sla.solve_triangular(L,Y,trans=1,lower=True)
    return X

def naive_block_preconditioner2(G):
    N = order*s*R
    ret = np.zeros((N,N))
    n = s*R
    X = np.zeros((n,n)) #X should be indexed by n1,r1,n2,r2; n: range(R), r: range(s)
    I = np.eye(s)
    
    # form X
    # do Cholesky on X
    # invert two factors by tri-solve
    # form the inverse by kronecker product
    return

In [10]:
#naive NLS implementation for order 3 CP decomposition
order = 3
s = 4
R = 6
sp_frac = 1
iteration = 20

[T,O] = synthetic_tensors.init_rand(tenpy,order,s,R,sp_frac)
A = []
for i in range(T.ndim):
    A.append(tenpy.random((T.shape[i],R)))

Q = s**order

In [11]:
res = get_residual(tenpy,T,A)
print("Start residual is ",res)
x = flatten_A(A)
print("x shape is",x.shape)
a = 0
for i in range(iteration):
    J = jacobian3(A)
    #f = F(T,A)
    JT = np.transpose(J)
    regu = 1/(i+1)
    H = np.dot(JT,J) + regu*np.identity(J.shape[1])
    #plt.matshow(np.isclose(H,0))
    #plt.show()
    #M = np.diag(1/np.diag(H))
    #H = fast_hessian3(A,regu)
    
    b = -1*gradient(T,A)
    print("[",i,"] iteration gradient norm is ",la.norm(b))
    M = naive_block_preconditioner(H,order,s*R)
    #b = np.dot(JT,f)
    #L = la.cholesky(H)
    #y = sla.solve_triangular(L,b,trans=0,lower=True)
    #x = sla.solve_triangular(L,y,trans=1,lower=True)
    #x = la.solve(H,b)
    #x,c = conjugate_gradient(H,x,b)
    x,c = preconditioned_conjugate_gradient(H,x,b,M)
    a += c
    update_A(A,x)
    res = get_residual(tenpy,T,A)
    print("[",i,"] iteration residual is ",res)
print("Total number of CG iterations is ",a)

('Residual computation took', 0.00011730194091796875, 'seconds')
Start residual is  4.264598812881373
x shape is (72,)
[ 0 ] iteration gradient norm is  20.276611145438107
conjugate gradient took  24  iteration(s).
('Residual computation took', 8.368492126464844e-05, 'seconds')
[ 0 ] iteration residual is  1.1480816000247636
[ 1 ] iteration gradient norm is  2.4158674719623385
conjugate gradient took  25  iteration(s).
('Residual computation took', 8.559226989746094e-05, 'seconds')
[ 1 ] iteration residual is  0.5340810789357322
[ 2 ] iteration gradient norm is  0.3939618372231854
conjugate gradient took  27  iteration(s).
('Residual computation took', 8.869171142578125e-05, 'seconds')
[ 2 ] iteration residual is  0.2997238056422972
[ 3 ] iteration gradient norm is  0.16141677546777994
conjugate gradient took  30  iteration(s).
('Residual computation took', 7.081031799316406e-05, 'seconds')
[ 3 ] iteration residual is  0.22149628549548708
[ 4 ] iteration gradient norm is  0.06289943695

# Fast NLS Implementation with Tensor Contraction

In [15]:
def compute_coefficient_matrix(G,n1,n2):
    ret = np.ones(G[0].shape)
    for i in range(len(G)):
        if i!=n1 and i!=n2:
            ret = np.einsum("ij,ij->ij",ret,G[i])
    return ret

def fast_hessian_contract(A,X):
    N = len(A)
    ## Preprocessing step: should be moved outside of contraction 
    G = []
    for mat in A:
        G.append(mat.T.dot(mat))
    
    ret = []
    for n in range(N):
        for p in range(N):
            ## Computation of M should be done outside of contraction
            M = compute_coefficient_matrix(G,n,p)
            if n==p:
                Y = np.einsum("iz,zr->ir",X[p],M)
            else:
                B = np.einsum("jr,jz->rz",A[p],X[p])
                Y = np.einsum("iz,zr,rz->ir",A[n],M,B)
            if p==0:
                ret.append(Y)
            else:
                ret[n] += Y
    return ret

In [19]:
X = [np.random.random((s,R)) for i in range(order)]
x = flatten_A(X)
J = jacobian3(A)
JT = np.transpose(J)
H = np.dot(JT,J)
r1 = H.dot(x)
r2 = flatten_A(fast_hessian_contract(A,X))
print(np.isclose(r1,r2))

[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True]
