In [10]:
import numpy as np
import sys
import time
import os
import argparse
import csv
from pathlib import Path
from os.path import dirname, join
import tensors.synthetic_tensors as synthetic_tensors
import argparse
import arg_defs as arg_defs
import csv
import CPD.NLS
import backend.numpy_ext as tenpy
from CPD.NLS import CP_fastNLS_Optimizer
from CPD.common_kernels import get_residual_sp, get_residual
from CPD.standard_ALS import CP_DTALS_Optimizer, CP_PPALS_Optimizer, CP_partialPPALS_Optimizer
from CPD.lowr_ALS import CP_DTLRALS_Optimizer


import CPD.common_kernels as ck

In [11]:
from CPD.common_kernels import compute_number_of_variables, flatten_Tensor, reshape_into_matrices, solve_sys, get_residual
from scipy.sparse.linalg import LinearOperator
from CPD.standard_ALS import CP_DTALS_Optimizer
 

import scipy.sparse.linalg as spsalg
import numpy as np
import time

try:
    import Queue as queue
except ImportError:
    import queue

In [39]:
def fast_hessian_contract2(tenpy,X,A,gamma,regu=1):
    N = len(A)
    ret = []
    
    gammaA01 = tenpy.einsum("rz,jr->jrz",gamma[0][1],A[0])
    gammaA02 = tenpy.einsum("rz,jr->jrz",gamma[0][2],A[0])
    gammaA12 = tenpy.einsum("rz,jr->jrz",gamma[1][2],A[1])
    
    ret.append(tenpy.zeros(A[0].shape))
    
    tenpy.einsum("iz,zr->ir",X[0],gamma[0][0])
    
    B = tenpy.einsum("jr,jz->rz",A[1],X[1])
    ret[0]+= tenpy.einsum("irz,rz->ir",gammaA01,B) 
    
    B = tenpy.einsum("jr,jz->rz",A[2],X[2])
    ret[0]+=tenpy.einsum("irz,rz->ir",gammaA02,B)
    #-----------------------------------------------------
    
    ret.append(tenpy.zeros(A[1].shape))
    
    ret[1] = tenpy.einsum("iz,zr->ir",X[1],gamma[1][1])
    
    ret[1]+= tenpy.einsum("jrz,jz,iz->ir",gammaA01,X[0],A[1])
    
    B = tenpy.einsum("jr,jz->rz",A[2],X[2])
    ret[1]+= tenpy.einsum("irz,rz->ir",gammaA12,B)
    
    #------------------------------------
    
    ret.append(tenpy.zeros(A[2].shape))
    
    ret[2] = tenpy.einsum("iz,zr->ir",X[2],gamma[2][2])
    
    ret[2]+=tenpy.einsum("jrz,jz,iz->ir",gammaA02,X[0],A[2])
    
    ret[2]+= tenpy.einsum("jrz,jz,iz->ir",gammaA12,X[1],A[2])
    
    for n in range(N):
        ret[n] += regu*X[n]

    return ret

In [92]:
def fast_hessian_contract(tenpy,X,A,gamma,regu=1):
    N = len(A)
    ret = []
    for n in range(N):
        ret.append(tenpy.zeros(A[n].shape))
        for p in range(N):
            M = gamma[n][p]
            if n==p:
                ret[n] += tenpy.einsum("iz,zr->ir",X[p],M)
            else:
                time0 = time.time()
                B = tenpy.einsum("jr,jz->rz",A[p],X[p])
                time1 = time.time()
                ret[n] += tenpy.einsum("iz,zr,rz->ir",A[n],M,B)
                time2 = time.time()
                #tenpy.printf("compute B took ",time1-time0," seconds.")
                #tenpy.printf("compute ret took ",time2-time1,"seconds.")
                #Y = tenpy.einsum("iz,zr,jr,jz->ir",A[n],M,A[p],X[p])

        ret[n] += regu*X[n]

    return ret

def fast_block_diag_precondition(tenpy,X,P):
    N = len(X)
    ret = []
    for i in range(N):
        Y = tenpy.solve_tri(P[i], X[i], True, False, True)
        Y = tenpy.solve_tri(P[i], Y, True, False, False)
        ret.append(Y)
    return ret

class CP_fastNLS_Optimizer():
    """Fast Nonlinear Least Square Method for CP is a novel method of
    computing the CP decomposition of a tensor by utilizing tensor contractions
    and preconditioned conjugate gradient to speed up the process of solving
    damped Gauss-Newton problem of CP decomposition.
    """

    def __init__(self,tenpy,T,A,cg_tol=1e-4,num=1,args=None):
        self.tenpy = tenpy
        self.T = T
        self.A = A
        self.cg_tol = cg_tol
        self.num=num
        self.G = None
        self.gamma = None
        #self.side_length = get_side_length(A)
        #self.last_step = tenpy.zeros((compute_sum_side_length(A),A[0].shape[1]))
        self.atol = 0
        self.total_iters = 0


    def _einstr_builder(self,M,s,ii):
        ci = ""
        nd = M.ndim
        if len(s) != 1:
            ci ="R"
            nd = M.ndim-1

        str1 = "".join([chr(ord('a')+j) for j in range(nd)])+ci
        str2 = (chr(ord('a')+ii))+"R"
        str3 = "".join([chr(ord('a')+j) for j in range(nd) if j != ii])+"R"
        einstr = str1 + "," + str2 + "->" + str3
        return einstr

    def compute_G(self):
        G = []
        for i in range(len(self.A)):
            G.append(self.tenpy.einsum("ij,ik->jk",self.A[i],self.A[i]))
        self.G = G

    def compute_coefficient_matrix(self,n1,n2):
        ret = self.tenpy.ones(self.G[0].shape)
        for i in range(len(self.G)):
            if i!=n1 and i!=n2:
                ret = self.tenpy.einsum("ij,ij->ij",ret,self.G[i])
        return ret

    def compute_gamma(self):
        N = len(self.A)
        result = []
        for i in range(N):
            result.append([])
            for j in range(N):
                if j>=i:
                    M = self.compute_coefficient_matrix(i,j)
                    result[i].append(M)
                else:
                    M = result[j][i]
                    result[i].append(M)
        self.gamma = result

    def compute_block_diag_preconditioner(self,Regu):
        P = []
        for i in range(len(self.A)):
            n = self.gamma[i][i].shape[0]
            P.append(self.tenpy.cholesky(self.gamma[i][i]+Regu*self.tenpy.eye(n)))
        return P


    def gradient(self):
        grad = []
        q = queue.Queue()
        for i in range(len(self.A)):
            q.put(i)
        s = [(list(range(len(self.A))),self.T)]
        while not q.empty():
            i = q.get()
            while i not in s[-1][0]:
                s.pop()
                assert(len(s) >= 1)
            while len(s[-1][0]) != 1:
                M = s[-1][1]
                idx = s[-1][0].index(i)
                ii = len(s[-1][0])-1
                if idx == len(s[-1][0])-1:
                    ii = len(s[-1][0])-2

                einstr = self._einstr_builder(M,s,ii)

                N = self.tenpy.einsum(einstr,M,self.A[ii])

                ss = s[-1][0][:]
                ss.remove(ii)
                s.append((ss,N))
            M = s[-1][1]
            g = -1*M + self.A[i].dot(self.gamma[i][i])
            grad.append(g)
        return grad

    def power_method(self,l,iter=1):
        for i in range(iter):
            l = fast_hessian_contract(self.tenpy,l,self.A,self.gamma,0)
            a = self.tenpy.list_vecnorm(l)
            l = self.tenpy.scalar_mul(1/a,l)
        return l

    def rayleigh_quotient(self,l):
        a = self.tenpy.mult_lists(l,fast_hessian_contract(self.tenpy,l,self.A,self.gamma,0))
        b = self.tenpy.list_vecnormsq(l)
        return a/b


    def create_fast_hessian_contract_LinOp(self,Regu):
        num_var = compute_number_of_variables(self.A)
        A = self.A
        gamma = self.gamma
        tenpy = self.tenpy
        template = self.A

        def mv(delta):
            delta = reshape_into_matrices(tenpy,delta,template)
            result = fast_hessian_contract(tenpy,delta,A,gamma,Regu)
            vec = flatten_Tensor(tenpy,result)
            return vec

        V = LinearOperator(shape = (num_var,num_var), matvec=mv)
        return V

    def matvec(self,Regu,delta):
        #t = ctf.time("total fast hessian multiplication")
        t0 = time.time()
        #t.start()
        A = self.A
        gamma = self.gamma
        tenpy = self.tenpy
        template = self.A
        result = fast_hessian_contract(tenpy,delta,A,gamma,Regu)
        t1 = time.time()
        #t.end()
        #self.tenpy.printf("fast hessian contract took ",t1-t0,"seconds.")
        return result

    def fast_conjugate_gradient(self,g,Regu):

        x = [self.tenpy.zeros(A.shape) for A in g]

        tol = np.max([self.atol,self.cg_tol*self.tenpy.list_vecnorm(g)])


        r = self.tenpy.list_add(self.tenpy.scalar_mul(-1,g), self.tenpy.scalar_mul(-1,self.matvec(Regu,x)))

        if self.tenpy.list_vecnorm(r)<tol:
            return x
        p = r
        counter = 0

        while True:
            mv = self.matvec(Regu,p)

            alpha = self.tenpy.list_vecnormsq(r)/self.tenpy.mult_lists(p,mv)

            x = self.tenpy.list_add(x,self.tenpy.scalar_mul(alpha,p))

            r_new = self.tenpy.list_add(r, self.tenpy.scalar_mul(-1,self.tenpy.scalar_mul(alpha,mv)))

            if self.tenpy.list_vecnorm(r_new)<tol:
                break
            beta = self.tenpy.list_vecnormsq(r_new)/self.tenpy.list_vecnormsq(r)

            p = self.tenpy.list_add(r_new, self.tenpy.scalar_mul(beta,p))
            r = r_new
            counter += 1


        return x,counter

    def fast_precond_conjugate_gradient(self,g,P,Regu):
        x = [self.tenpy.zeros(A.shape) for A in g]

        tol = np.max([self.atol,self.cg_tol*self.tenpy.list_vecnorm(g)])

        r = self.tenpy.list_add(self.tenpy.scalar_mul(-1,g), self.tenpy.scalar_mul(-1,self.matvec(Regu,x)))

        if self.tenpy.list_vecnorm(r)<tol:
            return x

        z = fast_block_diag_precondition(self.tenpy,r,P)

        p = z

        counter = 0
        while True:
            mv = self.matvec(Regu,p)

            mul = self.tenpy.mult_lists(r,z)

            alpha = mul/self.tenpy.mult_lists(p,mv)

            x =self.tenpy.list_add(x,self.tenpy.scalar_mul(alpha,p))

            r_new = self.tenpy.list_add(r, self.tenpy.scalar_mul(-1,self.tenpy.scalar_mul(alpha,mv)))

            if self.tenpy.list_vecnorm(r_new)<tol:
                break

            z_new = fast_block_diag_precondition(self.tenpy,r_new,P)

            #if formula == "PR":
            #    beta = np.inner(z_new,r_new-r)/np.inner(z,r)
            #else:
            beta = self.tenpy.mult_lists(r_new,z_new)/mul

            p = self.tenpy.list_add(z_new, self.tenpy.scalar_mul(beta,p))

            r = r_new
            z = z_new
            counter += 1
            
            if counter>=8:
                break
        return x,counter

    def create_block_precondition_LinOp(self,P):
        num_var = compute_number_of_variables(self.A)
        tenpy = self.tenpy
        template = self.A

        def mv(delta):

            delta = reshape_into_matrices(tenpy,delta,template)
            result = fast_block_diag_precondition(tenpy,delta,P)
            vec = flatten_Tensor(tenpy,result)
            return vec

        V = LinearOperator(shape = (num_var,num_var), matvec=mv)
        return V



    def update_A(self,delta):
        for i in range(len(delta)):
            self.A[i] += delta[i]



    def step2(self,Regu):
        self.compute_G()
        self.compute_gamma()
        g= self.gradient()

        P = self.compute_block_diag_preconditioner(Regu)

        #[delta,counter] = self.fast_conjugate_gradient(g,Regu)

        [delta,counter] = self.fast_precond_conjugate_gradient(g,P,Regu)
        
        self.total_iters+= counter
        self.atol = self.num*self.tenpy.list_vecnorm(delta)
        self.tenpy.printf('cg iterations:',counter)
        self.update_A(delta)
        
        self.tenpy.printf("total cg iterations",self.total_iters)
        
        return [self.A,self.total_iters]



    def step(self,Regu):
        
        def cg_call(v):
            self.total_iters+=1

        self.compute_G()
        self.compute_gamma()
        #l = self.power_method([self.tenpy.random(M.shape) for M in self.A])
        #L2 = self.rayleigh_quotient(l)
        #print("L2 norm of hessian is ",L2)
        g = flatten_Tensor(self.tenpy,self.gradient())
        mult_LinOp = self.create_fast_hessian_contract_LinOp(Regu)
        P = self.compute_block_diag_preconditioner(Regu)
        precondition_LinOp = self.create_block_precondition_LinOp(P)
        [delta,_] = spsalg.cg(mult_LinOp,-1*g,tol=self.cg_tol,M=precondition_LinOp,callback=cg_call,atol=self.atol)
        #[delta,_] = spsalg.cg(mult_LinOp,-1*g,tol=self.cg_tol,callback=cg_call,atol=self.atol)
        self.atol = self.num*self.tenpy.norm(delta)
        delta = reshape_into_matrices(self.tenpy,delta,self.A)
        self.update_A(delta)
        self.tenpy.printf('total cg iterations:',self.total_iters)
        
        return [self.A,self.total_iters]

In [93]:
tenpy.seed(42)
s = 100

R = 100

a = tenpy.random((s,R)) - tenpy.random((s,R))
b = tenpy.random((s,R)) - tenpy.random((s,R))
c = tenpy.random((s,R)) - tenpy.random((s,R))
T = np.einsum('ia,ja,ka->ijk',a,b,c)



In [94]:
print(T)

[[[-1.23782262e-01  1.70018383e-03 -7.60734669e-01 ... -6.27622095e-01
    2.24473876e-01 -1.20189957e-01]
  [ 8.56233481e-01 -9.64833731e-01 -1.08161325e-02 ... -7.35449071e-01
   -8.24752905e-01 -4.65124454e-01]
  [-1.22790037e-01 -6.57875372e-01  2.01479901e-01 ... -1.33336196e+00
    4.83237255e-01 -1.51053821e-01]
  ...
  [-7.94529338e-01  8.78881769e-01 -3.85457797e-01 ... -2.16357405e-01
   -3.45340667e-01 -2.38581965e-01]
  [ 5.73022597e-02 -4.95457507e-01 -3.83596607e-01 ... -2.80684395e-01
   -2.17585214e-01  2.17871129e-01]
  [-2.51641662e-01  4.81965521e-02  1.20703488e+00 ... -6.15998575e-01
   -3.66660657e-01 -1.66160921e-01]]

 [[ 5.44505328e-01  9.91906867e-02  6.16638136e-01 ...  5.20601371e-02
   -6.37745286e-01 -1.40607270e-01]
  [ 1.86402552e-01 -1.23861576e+00 -1.94796452e+00 ...  5.53060252e-01
   -5.04673976e-01  2.19289424e-01]
  [-2.48556453e-01 -8.54962460e-01  1.19687559e+00 ... -1.13932181e-01
    3.61263383e-01 -3.43539223e-01]
  ...
  [-1.46976093e+00  6.1

In [95]:
tenpy.seed(40)
A = tenpy.random((s,R)) 
B = tenpy.random((s,R)) 
C = tenpy.random((s,R)) 

X = [A,B,C]

In [96]:
opt = CP_fastNLS_Optimizer(tenpy,T,X,cg_tol=1e-3,num=0)

Regu = 1

In [97]:
decrease= True
increase=False
total_iters = 0

flag = False

normT = tenpy.vecnorm(T)

for i in range(1):
        start = time.time()
        [delta,iters] = opt.step2(Regu)
        total_iters+= iters
        end = time.time()
        
        print("Comp took :",end-start)

        res = ck.get_residual3(tenpy,T,X[0],X[1],X[2])
        fitness = 1-res/normT

        if fitness > 0.999:
                flag = True
            
        if flag:   
            Regu = 1e-05

        else:
            if Regu < 1e-05:
                increase=True
                decrease=False

            if Regu > 1e-01:
                decrease= True
                increase=False



        if increase:
            Regu = Regu*2

        elif decrease:
            Regu = Regu/2
print(res)

('cg iterations:', 0)
('total cg iterations', 0)
Comp took : 0.8233845233917236
('Residual computation took', 0.28975629806518555, 'seconds')
3795.3217445301902


In [None]:
7.3