In [1]:
import numpy as np
import torch as th

import time
import cProfile

In [2]:
dtype = th.float32

gpuid = 0
device = th.device("cuda:"+ str(gpuid))
#device = th.device("cpu")

print("Execution device: ",device)
print("PyTorch version: ", th.__version__ )
print("CUDA available: ", th.cuda.is_available())
print("CUDA version: ", th.version.cuda)
print("CUDA device:", th.cuda.get_device_name(gpuid))

Execution device:  cuda:0
PyTorch version:  0.4.1
CUDA available:  True
CUDA version:  9.0.176
CUDA device: Graphics Device


In [787]:
# Utility functions

# return the lower triangle of A in column order i.e. vech(A)
def vech(A):
    count = 0
    c = A.shape[0]
    v = th.zeros(c * (c + 1) // 2,)
    for j in range(c):
        for i in range(j,c):
            v[count] = A[i,j]
            count += 1
    return th.tensor(v , device=device, dtype=dtype)

# vech2L   create lower triangular matrix L from vechA
def vech2L(v,n):
    count = 0
    L = th.zeros((n,n))
    for j in range(n):
        for i in range(j,n):
            L[i,j]=v[count]
            count += 1
    return th.tensor(L , device=device, dtype=dtype)

# batched vech2L input is "X" as V nb x n(n+1)/2
def bvech2L(V,nb,n):
    count = 0
    L = th.zeros((nb,n,n))
    for j in range(n):
        for i in range(j,n):
            L[...,i,j]=V[...,count]
            count += 1
    return th.tensor(L , device=device, dtype=dtype)

In [893]:
n=4;
nb=2;
nn = int(n*(n+1)/2)

th.manual_seed(42)
x = th.randn(int(nb*n*(n+1)/2) , device=device, dtype=dtype)

In [894]:
# reshape non-linear variables for easier indexing
X = th.reshape(x[:nb*nn], (nb,nn))
#npX = X.detach().numpy()
L = th.zeros((nb,n,n), device=device, dtype=dtype)
#for i in range(nb):
    #L[i][np.tril_indices(n)]=npX[i,:]
#    L[i][:,:] = vech2L(X[i,:],n)
bL = bvech2L(X,nb,n)
#detL = th.abs(th.prod(th.diagonal(L, offset=0, dim1=-1, dim2=-2),1))
bA = th.matmul(bL,th.transpose(bL, 1, 2))

In [897]:
bL

tensor([[[ 0.2996,  0.0000,  0.0000,  0.0000],
         [ 0.2424, -0.0372,  0.0000,  0.0000],
         [ 0.2832,  1.0352,  0.0895,  0.0000],
         [-0.2329, -0.0101,  0.0384, -0.1182]],

        [[-1.7446,  0.0000,  0.0000,  0.0000],
         [-0.4695,  0.8652,  0.0000,  0.0000],
         [ 0.3924,  0.8552,  0.2616,  0.0000],
         [ 1.1085,  0.2494, -1.5265, -0.3311]]], dtype=torch.float64)

In [898]:
th.prod(th.diagonal(bL, offset=0, dim1=-1, dim2=-2),1)

tensor([0.0001, 0.1307], dtype=torch.float64)

In [901]:
th.prod(th.diag(bL[1]))**2

tensor(0.0171, dtype=torch.float64)

In [902]:
th.det(bL[0])

tensor(0.0001, dtype=torch.float64)

In [790]:
A

tensor([[[ 0.0897,  0.0726,  0.0848, -0.0698],
         [ 0.0726,  0.0601,  0.0301, -0.0561],
         [ 0.0848,  0.0301,  1.1600, -0.0730],
         [-0.0698, -0.0561, -0.0730,  0.0698]],

        [[ 3.0435,  0.8190, -0.6845, -1.9338],
         [ 0.8190,  0.9689,  0.5557, -0.3046],
         [-0.6845,  0.5557,  0.9538,  0.2489],
         [-1.9338, -0.3046,  0.2489,  3.7309]]], dtype=torch.float64)

In [796]:
bA

tensor([[[ 0.0897,  0.0726,  0.0848, -0.0698],
         [ 0.0726,  0.0601,  0.0301, -0.0561],
         [ 0.0848,  0.0301,  1.1600, -0.0730],
         [-0.0698, -0.0561, -0.0730,  0.0698]],

        [[ 3.0435,  0.8190, -0.6845, -1.9338],
         [ 0.8190,  0.9689,  0.5557, -0.3046],
         [-0.6845,  0.5557,  0.9538,  0.2489],
         [-1.9338, -0.3046,  0.2489,  3.7309]]], dtype=torch.float64)

In [791]:
L

tensor([[[ 0.2996,  0.0000,  0.0000,  0.0000],
         [ 0.2424, -0.0372,  0.0000,  0.0000],
         [ 0.2832,  1.0352,  0.0895,  0.0000],
         [-0.2329, -0.0101,  0.0384, -0.1182]],

        [[-1.7446,  0.0000,  0.0000,  0.0000],
         [-0.4695,  0.8652,  0.0000,  0.0000],
         [ 0.3924,  0.8552,  0.2616,  0.0000],
         [ 1.1085,  0.2494, -1.5265, -0.3311]]], dtype=torch.float64)

In [797]:
bL

tensor([[[ 0.2996,  0.0000,  0.0000,  0.0000],
         [ 0.2424, -0.0372,  0.0000,  0.0000],
         [ 0.2832,  1.0352,  0.0895,  0.0000],
         [-0.2329, -0.0101,  0.0384, -0.1182]],

        [[-1.7446,  0.0000,  0.0000,  0.0000],
         [-0.4695,  0.8652,  0.0000,  0.0000],
         [ 0.3924,  0.8552,  0.2616,  0.0000],
         [ 1.1085,  0.2494, -1.5265, -0.3311]]], dtype=torch.float64)

In [12]:
th.potrf(A[1], upper=False)

tensor([[ 1.7446,  0.0000,  0.0000,  0.0000],
        [ 0.4695,  0.8652,  0.0000,  0.0000],
        [-0.3924,  0.8552,  0.2616,  0.0000],
        [-1.1085,  0.2494, -1.5265,  0.3311]], dtype=torch.float64)

In [22]:
cholA = th.zeros_like(A)
for i in range(nb):
    cholA[i][:,:] = th.potrf(A[i], upper=False) 

In [23]:
cholA

tensor([[[ 0.2996,  0.0000,  0.0000,  0.0000],
         [ 0.2424,  0.0372,  0.0000,  0.0000],
         [ 0.2832, -1.0352,  0.0895,  0.0000],
         [-0.2329,  0.0101,  0.0384,  0.1182]],

        [[ 1.7446,  0.0000,  0.0000,  0.0000],
         [ 0.4695,  0.8652,  0.0000,  0.0000],
         [-0.3924,  0.8552,  0.2616,  0.0000],
         [-1.1085,  0.2494, -1.5265,  0.3311]]], dtype=torch.float64)

In [923]:
def cholesky(A):
    L = th.zeros_like(A)
    
    for i in range(A.shape[-1]):
        for j in range(i+1):
            s = 0.0
            for k in range(j):
                s = s + L[...,i,k].clone() * L[...,j,k].clone()
            
            L[...,i,j] = th.sqrt(A[...,i,i] - s) if (i == j) else \
                      (1.0 / L[...,j,j].clone() * (A[...,i,j] - s))
    return L

In [221]:
cholesky(A)

tensor([[[ 0.2996,  0.0000,  0.0000,  0.0000],
         [ 0.2424,  0.0372,  0.0000,  0.0000],
         [ 0.2832, -1.0352,  0.0895,  0.0000],
         [-0.2329,  0.0101,  0.0384,  0.1182]],

        [[ 1.7446,  0.0000,  0.0000,  0.0000],
         [ 0.4695,  0.8652,  0.0000,  0.0000],
         [-0.3924,  0.8552,  0.2616,  0.0000],
         [-1.1085,  0.2494, -1.5265,  0.3311]]], dtype=torch.float64)

In [222]:
cholA = th.zeros_like(A)
for i in range(nb):
    cholA[i][:,:] = cholesky(A[i]) 
cholA

tensor([[[ 0.2996,  0.0000,  0.0000,  0.0000],
         [ 0.2424,  0.0372,  0.0000,  0.0000],
         [ 0.2832, -1.0352,  0.0895,  0.0000],
         [-0.2329,  0.0101,  0.0384,  0.1182]],

        [[ 1.7446,  0.0000,  0.0000,  0.0000],
         [ 0.4695,  0.8652,  0.0000,  0.0000],
         [-0.3924,  0.8552,  0.2616,  0.0000],
         [-1.1085,  0.2494, -1.5265,  0.3311]]], dtype=torch.float64)

In [56]:
A[0]

tensor([[ 0.0897,  0.0726,  0.0848, -0.0698],
        [ 0.0726,  0.0601,  0.0301, -0.0561],
        [ 0.0848,  0.0301,  1.1600, -0.0730],
        [-0.0698, -0.0561, -0.0730,  0.0698]], dtype=torch.float64)

In [156]:
A

tensor([[[ 0.0897,  0.0726,  0.0848, -0.0698],
         [ 0.0726,  0.0601,  0.0301, -0.0561],
         [ 0.0848,  0.0301,  1.1600, -0.0730],
         [-0.0698, -0.0561, -0.0730,  0.0698]],

        [[ 3.0435,  0.8190, -0.6845, -1.9338],
         [ 0.8190,  0.9689,  0.5557, -0.3046],
         [-0.6845,  0.5557,  0.9538,  0.2489],
         [-1.9338, -0.3046,  0.2489,  3.7309]]], dtype=torch.float64)

# Timing tests

In [305]:
n=3;
nb=100000;
nn = int(n*(n+1)/2)

th.manual_seed(42)
x = th.randn(int(nb*n*(n+1)/2) , device=device, dtype=dtype)

In [306]:
# reshape non-linear variables for easier indexing
X = th.reshape(x[:nb*nn], (nb,nn))
#npX = X.detach().numpy()
L = th.zeros((nb,n,n), device=device, dtype=dtype)
for i in range(nb):
    #L[i][np.tril_indices(n)]=npX[i,:]
    L[i][:,:] = vech2L(X[i,:],n)
#L = th.from_numpy(L)
#detL = th.abs(th.prod(th.diagonal(L, offset=0, dim1=-1, dim2=-2),1))
A = th.matmul(L,th.transpose(L, 1, 2))

In [307]:
start_time = time.time()

cholA = th.zeros_like(A)
for i in range(nb):
    cholA[i][:,:] = th.potrf(A[i], upper=False) 

print(" took {} seconds ".format(time.time() - start_time))

 took 0.5366852283477783 seconds 


In [308]:
start_time = time.time()

cholAout = cholesky(A)

print(" took {} seconds ".format(time.time() - start_time))
cholAout.shape

 took 0.014220476150512695 seconds 


torch.Size([100000, 3, 3])

## GPU CUDA timing

In [309]:
dtype = th.float64

gpuid = 0
device = th.device("cuda:"+ str(gpuid))
#device = th.device("cpu")

print("Execution device: ",device)
print("PyTorch version: ", th.__version__ )
print("CUDA available: ", th.cuda.is_available())
print("CUDA version: ", th.version.cuda)
print("CUDA device:", th.cuda.get_device_name(gpuid))

Execution device:  cuda:0
PyTorch version:  0.4.1
CUDA available:  True
CUDA version:  9.0.176
CUDA device: TITAN V


In [315]:
n=3;
nb=100000;
nn = int(n*(n+1)/2)

th.manual_seed(42)
x = th.randn(int(nb*n*(n+1)/2) , device=device, dtype=dtype)

In [316]:
# reshape non-linear variables for easier indexing
X = th.reshape(x[:nb*nn], (nb,nn))
#npX = X.detach().numpy()
L = th.zeros((nb,n,n), device=device, dtype=dtype)
for i in range(nb):
    #L[i][np.tril_indices(n)]=npX[i,:]
    L[i][:,:] = vech2L(X[i,:],n)
#L = th.from_numpy(L)
#detL = th.abs(th.prod(th.diagonal(L, offset=0, dim1=-1, dim2=-2),1))
A = th.matmul(L,th.transpose(L, 1, 2))

In [317]:
start_time = time.time()

cholA = th.zeros_like(A)
for i in range(nb):
    cholA[i][:,:] = th.potrf(A[i], upper=False) 

print(" took {} seconds ".format(time.time() - start_time))

 took 156.54940176010132 seconds 


In [318]:
start_time = time.time()

cholAout = cholesky(A)

print(" took {} seconds ".format(time.time() - start_time))
cholAout.shape

 took 0.0039408206939697266 seconds 


torch.Size([100000, 3, 3])

In [320]:
cholAout[5500,:,:]

tensor([[ 0.9576,  0.0000,  0.0000],
        [-0.2829,  1.5446,  0.0000],
        [ 1.1650, -0.6542,  0.2660]], device='cuda:0', dtype=torch.float64)

In [321]:
0.014220476150512695/0.0039408206939697266

3.608506261721822

In [322]:
156.54940176010132/0.0039408206939697266

39725.07604815778

In [323]:
start_time = time.time()

cholA = th.zeros_like(A)
for i in range(nb):
    cholA[i][:,:] = cholesky(A[i]) 

print(" took {} seconds ".format(time.time() - start_time))

 took 39.27142286300659 seconds 


# Big AKL

In [327]:
dtype = th.float64

gpuid = 0
#device = th.device("cuda:"+ str(gpuid))
device = th.device("cpu")

print("Execution device: ",device)
print("PyTorch version: ", th.__version__ )
print("CUDA available: ", th.cuda.is_available())
print("CUDA version: ", th.version.cuda)
print("CUDA device:", th.cuda.get_device_name(gpuid))

Execution device:  cpu
PyTorch version:  0.4.1
CUDA available:  True
CUDA version:  9.0.176
CUDA device: TITAN V


In [749]:
n=4;
nb=4;
nn = int(n*(n+1)/2)

th.manual_seed(42)
x = th.randn(int(nb*n*(n+1)/2) , device=device, dtype=dtype)

In [750]:
# reshape non-linear variables for easier indexing
X = th.reshape(x[:nb*nn], (nb,nn))
#npX = X.detach().numpy()
L = th.zeros((nb,n,n), device=device, dtype=dtype)
for i in range(nb):
    #L[i][np.tril_indices(n)]=npX[i,:]
    L[i][:,:] = vech2L(X[i,:],n)
#L = th.from_numpy(L)
#detL = th.abs(th.prod(th.diagonal(L, offset=0, dim1=-1, dim2=-2),1))
A = th.matmul(L,th.transpose(L, 1, 2))

In [751]:
A

tensor([[[ 0.0897,  0.0726,  0.0848, -0.0698],
         [ 0.0726,  0.5092,  0.5933, -1.2580],
         [ 0.0848,  0.5933,  3.7483, -1.7689],
         [-0.0698, -1.2580, -1.7689,  3.3608]],

        [[ 0.1836,  0.0994, -0.4377, -0.3545],
         [ 0.0994,  3.0612, -2.8244,  0.5386],
         [-0.4377, -2.8244,  5.5334,  2.1298],
         [-0.3545,  0.5386,  2.1298,  2.8615]],

        [[ 0.0014, -0.0385,  0.0004, -0.0033],
         [-0.0385,  2.5538,  0.5308,  0.4178],
         [ 0.0004,  0.5308,  0.8569, -1.7871],
         [-0.0033,  0.4178, -1.7871,  7.8861]],

        [[ 0.9699, -0.4504,  1.9606,  0.2477],
         [-0.4504,  1.2540, -1.0578, -1.7193],
         [ 1.9606, -1.0578,  5.4200,  0.7207],
         [ 0.2477, -1.7193,  0.7207,  2.5267]]], dtype=torch.float64)

In [752]:
AKL = th.zeros((0,n,n), device=device, dtype=dtype)
for i in range(nb):
    AKL = th.cat( (AKL, th.add(A, A[i]))) 
  

In [753]:
print(AKL.shape)
AKL

torch.Size([16, 4, 4])


tensor([[[ 1.7947e-01,  1.4521e-01,  1.6967e-01, -1.3956e-01],
         [ 1.4521e-01,  1.0184e+00,  1.1867e+00, -2.5161e+00],
         [ 1.6967e-01,  1.1867e+00,  7.4966e+00, -3.5378e+00],
         [-1.3956e-01, -2.5161e+00, -3.5378e+00,  6.7215e+00]],

        [[ 2.7336e-01,  1.7197e-01, -3.5288e-01, -4.2428e-01],
         [ 1.7197e-01,  3.5704e+00, -2.2311e+00, -7.1940e-01],
         [-3.5288e-01, -2.2311e+00,  9.2817e+00,  3.6087e-01],
         [-4.2428e-01, -7.1940e-01,  3.6087e-01,  6.2222e+00]],

        [[ 9.1118e-02,  3.4091e-02,  8.5212e-02, -7.3108e-02],
         [ 3.4091e-02,  3.0630e+00,  1.1242e+00, -8.4021e-01],
         [ 8.5212e-02,  1.1242e+00,  4.6051e+00, -3.5560e+00],
         [-7.3108e-02, -8.4021e-01, -3.5560e+00,  1.1247e+01]],

        [[ 1.0596e+00, -3.7780e-01,  2.0454e+00,  1.7791e-01],
         [-3.7780e-01,  1.7632e+00, -4.6450e-01, -2.9773e+00],
         [ 2.0454e+00, -4.6450e-01,  9.1682e+00, -1.0482e+00],
         [ 1.7791e-01, -2.9773e+00, -1.0482e+00, 

In [754]:
cholAKL = cholesky(AKL)

In [755]:
cholAKL

tensor([[[ 0.4236,  0.0000,  0.0000,  0.0000],
         [ 0.3428,  0.9492,  0.0000,  0.0000],
         [ 0.4005,  1.1056,  2.4726,  0.0000],
         [-0.3294, -2.5319, -0.2453,  0.3774]],

        [[ 0.5228,  0.0000,  0.0000,  0.0000],
         [ 0.3289,  1.8607,  0.0000,  0.0000],
         [-0.6749, -1.0797,  2.7677,  0.0000],
         [-0.8115, -0.2432, -0.1624,  2.3406]],

        [[ 0.3019,  0.0000,  0.0000,  0.0000],
         [ 0.1129,  1.7465,  0.0000,  0.0000],
         [ 0.2823,  0.6254,  2.0333,  0.0000],
         [-0.2422, -0.4654, -1.5721,  2.9155]],

        [[ 1.0294,  0.0000,  0.0000,  0.0000],
         [-0.3670,  1.2761,  0.0000,  0.0000],
         [ 1.9870,  0.2075,  2.2753,  0.0000],
         [ 0.1728, -2.2834, -0.4034,  0.6934]],

        [[ 0.5228,  0.0000,  0.0000,  0.0000],
         [ 0.3289,  1.8607,  0.0000,  0.0000],
         [-0.6749, -1.0797,  2.7677,  0.0000],
         [-0.8115, -0.2432, -0.1624,  2.3406]],

        [[ 0.6060,  0.0000,  0.0000,  0.0000],
   

In [756]:
AKL[0]

tensor([[ 0.1795,  0.1452,  0.1697, -0.1396],
        [ 0.1452,  1.0184,  1.1867, -2.5161],
        [ 0.1697,  1.1867,  7.4966, -3.5378],
        [-0.1396, -2.5161, -3.5378,  6.7215]], dtype=torch.float64)

In [757]:
cholAKL[0]

tensor([[ 0.4236,  0.0000,  0.0000,  0.0000],
        [ 0.3428,  0.9492,  0.0000,  0.0000],
        [ 0.4005,  1.1056,  2.4726,  0.0000],
        [-0.3294, -2.5319, -0.2453,  0.3774]], dtype=torch.float64)

In [758]:
th.det(AKL[15])

tensor(0.0005, dtype=torch.float64)

In [759]:
detLKL = th.prod(th.diagonal(cholAKL, offset=0, dim1=-1, dim2=-2),1)**2
detLKL

tensor([1.4077e-01, 3.9718e+01, 9.7672e+00, 4.2949e+00, 3.9718e+01, 7.6560e+00,
        4.6182e+01, 1.3097e+02, 9.7672e+00, 4.6182e+01, 4.9771e-02, 7.0969e+01,
        4.2949e+00, 1.3097e+02, 7.0969e+01, 4.7051e-04], dtype=torch.float64)

In [760]:
th.potri(cholAKL[0], upper=False)

tensor([[ 19.6909, -25.6447,  -0.9627,  -9.6974],
        [-25.6447,  47.0616,   1.5875,  17.9196],
        [ -0.9627,   1.5875,   0.2327,   0.6967],
        [ -9.6974,  17.9196,   0.6967,   7.0220]], dtype=torch.float64)

In [761]:
th.inverse(AKL[0])

tensor([[ 19.6909, -25.6447,  -0.9627,  -9.6974],
        [-25.6447,  47.0616,   1.5875,  17.9196],
        [ -0.9627,   1.5875,   0.2327,   0.6967],
        [ -9.6974,  17.9196,   0.6967,   7.0220]], dtype=torch.float64)

In [767]:
def inverseL(L):
    n = L.shape[-1]
    invL = th.zeros_like(L)
    for j in range(0,n):
        invL[...,j,j] = 1.0/L[...,j,j]
        for i in range(j+1,n):
            s = 0.0
            for k in range(i+1):
                s -= L[...,i,k]*invL[...,k,j]
            invL[...,i,j] = s/L[...,i,i]

    return invL
            

In [784]:
M = cholAKL[0:2]
print(M)

tensor([[[ 0.4236,  0.0000,  0.0000,  0.0000],
         [ 0.3428,  0.9492,  0.0000,  0.0000],
         [ 0.4005,  1.1056,  2.4726,  0.0000],
         [-0.3294, -2.5319, -0.2453,  0.3774]],

        [[ 0.5228,  0.0000,  0.0000,  0.0000],
         [ 0.3289,  1.8607,  0.0000,  0.0000],
         [-0.6749, -1.0797,  2.7677,  0.0000],
         [-0.8115, -0.2432, -0.1624,  2.3406]]], dtype=torch.float64)


In [785]:
invLKL=inverseL(M)
th.matmul(th.transpose(invLKL, 1, 2),invLKL)
#th.matmul(th.t(invLKL),invLKL)

tensor([[[ 19.6909, -25.6447,  -0.9627,  -9.6974],
         [-25.6447,  47.0616,   1.5875,  17.9196],
         [ -0.9627,   1.5875,   0.2327,   0.6967],
         [ -9.6974,  17.9196,   0.6967,   7.0220]],

        [[  4.3084,  -0.0657,   0.1372,   0.2782],
         [ -0.0657,   0.3377,   0.0775,   0.0301],
         [  0.1372,   0.0775,   0.1312,   0.0107],
         [  0.2782,   0.0301,   0.0107,   0.1825]]], dtype=torch.float64)

In [786]:
 th.inverse(AKL[1])

tensor([[ 4.3084, -0.0657,  0.1372,  0.2782],
        [-0.0657,  0.3377,  0.0775,  0.0301],
        [ 0.1372,  0.0775,  0.1312,  0.0107],
        [ 0.2782,  0.0301,  0.0107,  0.1825]], dtype=torch.float64)

Exception in callback BaseAsyncIOLoop._handle_events(15, 1)
handle: <Handle BaseAsyncIOLoop._handle_events(15, 1)>
Traceback (most recent call last):
  File "/home/kinghorn/anaconda3/envs/pytorch/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/kinghorn/anaconda3/envs/pytorch/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/home/kinghorn/anaconda3/envs/pytorch/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/kinghorn/anaconda3/envs/pytorch/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/kinghorn/anaconda3/envs/pytorch/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/kinghorn/anaconda3/envs/pytorch/lib/python3.6/site-packages/

1. for k = 1 to n
2.   X[k,k] = l/L[k,k]
3.   for i = k+1 to n
4.     X[i,k] = -L[i, k:i-1]*X[k:i-1,k]/L[i,i]
5.   end for i
6. end for k

In [766]:
n=3
for i in range(n):
    print('i',i)
    for j in range(i+1,n):
        print('j',j)
        for k in range(i+1):
            print('k',k)
            print('i,j,k',i,j,k)

i 0
j 1
k 0
i,j,k 0 1 0
j 2
k 0
i,j,k 0 2 0
i 1
j 2
k 0
i,j,k 1 2 0
k 1
i,j,k 1 2 1
i 2


# IJ structure

In [855]:
1000**2

1000000

In [856]:
n=3;
nb=1000000;
nn = int(n*(n+1)/2)

th.manual_seed(42)
x = th.randn(int(nb*n*(n+1)/2) , device=device, dtype=dtype)

In [857]:
# reshape non-linear variables for easier indexing
X = th.reshape(x[:nb*nn], (nb,nn))

#L = th.zeros((nb,n,n), device=device, dtype=dtype)
#for i in range(nb):
    #L[i][np.tril_indices(n)]=npX[i,:]
#    L[i][:,:] = vech2L(X[i,:],n)
L = bvech2L(X,nb,n)
#detL = th.abs(th.prod(th.diagonal(L, offset=0, dim1=-1, dim2=-2),1))
A = th.matmul(L,th.transpose(L, 1, 2))

In [862]:
L.shape

torch.Size([1000000, 3, 3])

In [863]:
A.shape

torch.Size([1000000, 3, 3])

In [858]:
start_time = time.time()

cholAout = cholesky(A)

print(" took {} seconds ".format(time.time() - start_time))
cholAout.shape

 took 0.08464717864990234 seconds 


torch.Size([1000000, 3, 3])

In [860]:
AIJ = th.reshape(A, (1000,1000,n,n))

In [861]:
start_time = time.time()

cholAout = cholesky(AIJ)

print(" took {} seconds ".format(time.time() - start_time))
cholAout.shape

 took 0.08845806121826172 seconds 


torch.Size([1000, 1000, 3, 3])

In [891]:
cholAout = cholesky(AIJ[0:2,0:2])
print(cholAout.shape)
invLKL=inverseL(cholAout)
print(invLKL)
print(th.transpose(invLKL, dim0=-2, dim1=-1))
th.matmul(th.transpose(invLKL, dim0=-2, dim1=-1),invLKL)

torch.Size([2, 2, 3, 3])
tensor([[[[   3.3383,    0.0000,    0.0000],
          [  -3.4735,    4.2929,    0.0000],
          [  -4.1914,    3.6855,    1.2792]],

         [[   0.5586,    0.0000,    0.0000],
          [  -3.6598,    3.7475,    0.0000],
          [   7.1815,   -6.9258,    4.3128]]],


        [[[  29.7390,    0.0000,    0.0000],
          [   7.3784,    0.8407,    0.0000],
          [ -59.0134,   -1.6254,    1.0450]],

         [[  68.4055,    0.0000,    0.0000],
          [-428.1763,   10.1317,    0.0000],
          [ 273.4188,   -3.8425,    1.1510]]]], dtype=torch.float64)
tensor([[[[   3.3383,   -3.4735,   -4.1914],
          [   0.0000,    4.2929,    3.6855],
          [   0.0000,    0.0000,    1.2792]],

         [[   0.5586,   -3.6598,    7.1815],
          [   0.0000,    3.7475,   -6.9258],
          [   0.0000,    0.0000,    4.3128]]],


        [[[  29.7390,    7.3784,  -59.0134],
          [   0.0000,    0.8407,   -1.6254],
          [   0.0000,    0.0000,    1

tensor([[[[ 4.0777e+01, -3.0359e+01, -5.3614e+00],
          [-3.0359e+01,  3.2012e+01,  4.7143e+00],
          [-5.3614e+00,  4.7143e+00,  1.6362e+00]],

         [[ 6.5281e+01, -6.3453e+01,  3.0972e+01],
          [-6.3453e+01,  6.2010e+01, -2.9869e+01],
          [ 3.0972e+01, -2.9869e+01,  1.8600e+01]]],


        [[[ 4.4214e+03,  1.0213e+02, -6.1671e+01],
          [ 1.0213e+02,  3.3488e+00, -1.6986e+00],
          [-6.1671e+01, -1.6986e+00,  1.0921e+00]],

         [[ 2.6277e+05, -5.3887e+03,  3.1470e+02],
          [-5.3887e+03,  1.1742e+02, -4.4227e+00],
          [ 3.1470e+02, -4.4227e+00,  1.3248e+00]]]], dtype=torch.float64)

In [892]:
th.inverse(AIJ[1,0])

tensor([[4421.4311,  102.1262,  -61.6711],
        [ 102.1262,    3.3488,   -1.6986],
        [ -61.6711,   -1.6986,    1.0921]], dtype=torch.float64)

# Gradient testing

In [1425]:
def cholesky2(A):
    L = th.zeros_like(A)
    
    for i in range(A.shape[-1]):
        for j in range(i+1):
            s = 0.0
            for k in range(j):
                s = s + L[...,i,k].clone() * L[...,j,k].clone()
            
            L[...,i,j] = th.sqrt(A[...,i,i] - s) if (i == j) else \
                      (1.0 / L[...,j,j].clone() * (A[...,i,j] - s))
    return L

In [1420]:
def cholesky3(A):
    L = th.zeros_like(A)
    n = A.shape[-1]
    for i in range(n):
        S = A[...,i,i]
        for ip in range(i):
            S = S - th.dot(L[...,i,ip].clone(),L[...,i,ip].clone())
        L[...,i,i] = th.sqrt(S)
        #print(A[...,i,i]/L[...,i,i])
        for j in range(i,n):
            S = A[...,j,i]
            for ip in range(i):
                S = S - th.dot(A[...,i,ip],A[...,j,ip])
            L[...,j,i] = S/L[...,i,i].clone()                        
    return L

In [1435]:
n=4;
nb=4;
nn = int(n*(n+1)/2)

th.manual_seed(42)
x = th.randn(int(nb*n*(n+1)/2) , device=device, dtype=dtype, requires_grad=True)

In [1436]:
# reshape non-linear variables for easier indexing
X = th.reshape(x[:nb*nn], (nb,nn))
#npX = X.detach().numpy()
L = th.zeros((nb,n,n), device=device, dtype=dtype)
#for i in range(nb):
    #L[i][np.tril_indices(n)]=npX[i,:]
#    L[i][:,:] = vech2L(X[i,:],n)
bL = bvech2L(X,nb,n)
#detL = th.abs(th.prod(th.diagonal(L, offset=0, dim1=-1, dim2=-2),1))
bA = th.matmul(bL,th.transpose(bL, 1, 2))

In [1437]:
c3 = cholesky2(bA)
c3

tensor([[[ 0.2996,  0.0000,  0.0000,  0.0000],
         [ 0.2424,  0.6712,  0.0000,  0.0000],
         [ 0.2832,  0.7818,  1.7484,  0.0000],
         [-0.2329, -1.7903, -0.1735,  0.2668]],

        [[ 0.4285,  0.0000,  0.0000,  0.0000],
         [ 0.2319,  1.7342,  0.0000,  0.0000],
         [-1.0215, -1.4921,  1.5046,  0.0000],
         [-0.8273,  0.4212,  1.2716,  0.6187]],

        [[ 0.0372,  0.0000,  0.0000,  0.0000],
         [-1.0352,  1.2174,  0.0000,  0.0000],
         [ 0.0101,  0.4446,  0.8118,  0.0000],
         [-0.0895,  0.2671, -2.3465,  1.5169]],

        [[ 0.9848,  0.0000,  0.0000,  0.0000],
         [-0.4573,  1.0222,  0.0000,  0.0000],
         [ 1.9908, -0.1442,  1.1983,  0.0000],
         [ 0.2515, -1.5695, -0.0052,  0.0045]]],
       dtype=torch.float64, grad_fn=<CopySlices>)

In [1271]:
cholesky(bA)

tensor([[[ 0.2996,  0.0000,  0.0000,  0.0000],
         [ 0.2424,  0.6712,  0.0000,  0.0000],
         [ 0.2832,  0.7818,  1.7484,  0.0000],
         [-0.2329, -1.7903, -0.1735,  0.2668]],

        [[ 0.4285,  0.0000,  0.0000,  0.0000],
         [ 0.2319,  1.7342,  0.0000,  0.0000],
         [-1.0215, -1.4921,  1.5046,  0.0000],
         [-0.8273,  0.4212,  1.2716,  0.6187]],

        [[ 0.0372,  0.0000,  0.0000,  0.0000],
         [-1.0352,  1.2174,  0.0000,  0.0000],
         [ 0.0101,  0.4446,  0.8118,  0.0000],
         [-0.0895,  0.2671, -2.3465,  1.5169]],

        [[ 0.9848,  0.0000,  0.0000,  0.0000],
         [-0.4573,  1.0222,  0.0000,  0.0000],
         [ 1.9908, -0.1442,  1.1983,  0.0000],
         [ 0.2515, -1.5695, -0.0052,  0.0045]]],
       dtype=torch.float64, grad_fn=<CopySlices>)

In [1429]:
nc3 = th.norm(c3)
th.autograd.grad(nc3,x)


(tensor([ 0.0469,  0.0379,  0.0443, -0.0365,  0.1051,  0.1224, -0.2802, -0.2737,
          0.0272,  0.0418,  0.0671,  0.0363, -0.1599, -0.1295,  0.2714, -0.2335,
          0.0659,  0.2355,  0.1990,  0.0968, -0.0058,  0.1620, -0.0016,  0.0140,
         -0.1906, -0.0696, -0.0418, -0.1271,  0.3673, -0.2374, -0.1542,  0.0716,
         -0.3116, -0.0394, -0.1600,  0.0226,  0.2457, -0.1876,  0.0008, -0.0007],
        dtype=torch.float64),)

In [1368]:
#fcn = th.norm(testf(bA))
#fcn = th.norm(cholesky2(bA))
cholA = th.zeros_like(bA)
for i in range(nb):
    cholA[i][:,:] = th.potrf(bA[i], upper=False) 
out = th.norm(cholA)

In [1369]:
th.autograd.grad(out,x)

(tensor([ 0.0469,  0.0379,  0.0443, -0.0365,  0.1051,  0.1224, -0.2802, -0.2737,
          0.0272,  0.0418,  0.0671,  0.0363, -0.1599, -0.1295,  0.2714, -0.2335,
          0.0659,  0.2355,  0.1990,  0.0968, -0.0058,  0.1620, -0.0016,  0.0140,
         -0.1906, -0.0696, -0.0418, -0.1271,  0.3673, -0.2374, -0.1542,  0.0716,
         -0.3116, -0.0394, -0.1600,  0.0226,  0.2457, -0.1876,  0.0008, -0.0007],
        dtype=torch.float64),)

In [1442]:
def inverseL(L):
    n = L.shape[-1]
    invL = th.zeros_like(L)
    for j in range(0,n):
        invL[...,j,j] = 1.0/L[...,j,j]
        for i in range(j+1,n):
            S = 0.0
            for k in range(i+1):
                S = S - L[...,i,k]*invL[...,k,j].clone()
            invL[...,i,j] = S/L[...,i,i]

    return invL
            

In [1443]:
invL = inverseL(c3)
out = th.norm(invL)
th.autograd.grad(out,x)

(tensor([-3.1133e-01,  4.0547e-01,  1.5221e-02,  1.5333e-01, -4.7854e-01,
         -1.6699e-02, -1.8183e-01, -4.3710e-03, -5.4579e-02, -1.1752e+00,
         -4.2422e-02,  3.1394e-03,  6.9645e-04, -6.3647e-03, -4.6774e-03,
         -3.5370e-03,  4.3118e-03, -6.6674e-03,  3.4304e-03, -2.8498e-02,
          1.2148e+02,  2.7574e+00, -3.7150e+00, -9.3669e-01,  2.3479e+00,
         -3.1633e+00, -7.9761e-01,  1.7936e+00,  4.5211e-01,  7.8527e-01,
          2.4009e+01,  8.2088e+01,  2.3072e-01,  5.3439e+01,  3.0724e+02,
          8.7456e-01,  2.0002e+02, -2.7482e-01, -6.4286e+01,  9.2836e+04],
        dtype=torch.float64),)