In [1]:
# original math
# https://docs.google.com/document/d/1SSJD1Fm-h3apVvpAkvrpG9o8OQaXHFUMjBd1AFuiY8Y/edit?usp=sharing

# check the math for full covariance matrix and related lmbda_loss
# conversion to pytorch with batch of data

In [2]:
import numpy as np

In [3]:
# define matrix Q
Q = np.reshape(range(5*5+1)[1:],(5,5))
print(Q)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]
 [21 22 23 24 25]]


In [4]:
# define inverse of Q, but since we want Sigma to be symmetric, will just be transpose
Q_i = np.transpose(Q)
print(Q_i)

[[ 1  6 11 16 21]
 [ 2  7 12 17 22]
 [ 3  8 13 18 23]
 [ 4  9 14 19 24]
 [ 5 10 15 20 25]]


In [5]:
# enforce Q orthogonal
from scipy.stats import ortho_group

In [6]:
Q = ortho_group.rvs(5)
print(Q)

[[-0.75290172 -0.11199583 -0.36059287 -0.42622856  0.32999687]
 [-0.35450321  0.17251317 -0.09293656 -0.04741552 -0.91306147]
 [ 0.5475826  -0.09828842 -0.43018518 -0.69464923 -0.15131374]
 [-0.08674901 -0.45007851  0.7583331  -0.45142952 -0.10510111]
 [-0.0097153   0.86340307  0.31813078 -0.36021498  0.15322769]]


In [7]:
Q_i = np.transpose(Q)
print(Q_i)

[[-0.75290172 -0.35450321  0.5475826  -0.08674901 -0.0097153 ]
 [-0.11199583  0.17251317 -0.09828842 -0.45007851  0.86340307]
 [-0.36059287 -0.09293656 -0.43018518  0.7583331   0.31813078]
 [-0.42622856 -0.04741552 -0.69464923 -0.45142952 -0.36021498]
 [ 0.32999687 -0.91306147 -0.15131374 -0.10510111  0.15322769]]


In [8]:
print(np.round(np.matmul(Q,Q_i)))

[[ 1. -0. -0.  0.  0.]
 [-0.  1.  0.  0. -0.]
 [-0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.]
 [ 0. -0.  0.  0.  1.]]


In [9]:
# define Lambda diagonal matrix: ie the list of eigenvalues
L = np.array([100*np.random.random()**2 for _ in range(5+1)[1:]])
print(L)

[12.55789201  5.55761568  7.33460538  0.13019001 20.6505762 ]


In [10]:
# what we will be using in computation is actually the inverse of L
L_i = 1/L
print(L_i)

[0.0796312  0.17993328 0.13633999 7.68108078 0.0484248 ]


In [11]:
# compute Sigma = Q_i * L * Q
# because L is stored as an array, we have to use einsum
Sigma = np.einsum('xy, y, yz -> xz', Q, L, Q_i)
print(Sigma)

[[10.41444384 -2.72934133 -4.97098174 -1.59647692 -0.22276739]
 [-2.72934133 19.02322236  0.6186124   1.42224614 -2.23272825]
 [-4.97098174  0.6186124   5.71210339 -2.37415812 -1.98843537]
 [-1.59647692  1.42224614 -2.37415812  5.69286035 -0.69102889]
 [-0.22276739 -2.23272825 -1.98843537 -0.69102889  5.38824934]]


In [12]:
# verification with actual diagonal matrix
L2 = L * np.eye(np.shape(Q)[0])
print(L2)

[[12.55789201  0.          0.          0.          0.        ]
 [ 0.          5.55761568  0.          0.          0.        ]
 [ 0.          0.          7.33460538  0.          0.        ]
 [ 0.          0.          0.          0.13019001  0.        ]
 [ 0.          0.          0.          0.         20.6505762 ]]


In [13]:
Sigma2 = np.matmul(np.matmul(Q_i, L2), Q)
print(Sigma2)

[[10.01920486  0.15612606  1.79231379  1.41079421 -1.95844305]
 [ 0.15612606 15.81442201  6.35593242 -5.34132023  1.50772967]
 [ 1.79231379  6.35593242  5.203062    1.73532839  0.4509779 ]
 [ 1.41079421 -5.34132023  1.73532839  8.53916261 -1.88840098]
 [-1.95844305  1.50772967  0.4509779  -1.88840098  6.6550278 ]]


In [14]:
# check that both computations are the same
print(np.round(Sigma-Sigma2))

[[ 0. -3. -7. -3.  2.]
 [-3.  3. -6.  7. -4.]
 [-7. -6.  1. -4. -2.]
 [-3.  7. -4. -3.  1.]
 [ 2. -4. -2.  1. -1.]]


In [15]:
# what we really want is the inverse of sigma
Sigma_i = np.einsum('xy, y, yz -> xz', Q, L_i, Q_i)
print(Sigma_i)

[[1.46582598 0.16298938 2.26209128 1.45324254 1.14929798]
 [0.16298938 0.0741797  0.24662514 0.14792798 0.14746023]
 [2.26209128 0.24662514 3.75836501 2.3691429  1.88650869]
 [1.45324254 0.14792798 2.3691429  1.68130504 1.21129063]
 [1.14929798 0.14746023 1.88650869 1.21129063 1.14573431]]


In [16]:
# check that it's inverse
print(np.round(np.matmul(Sigma, Sigma_i)))

[[ 1.  0. -0. -0. -0.]
 [-0.  1.  0.  0. -0.]
 [-0. -0.  1. -0. -0.]
 [ 0.  0.  0.  1.  0.]
 [ 0.  0.  0.  0.  1.]]


In [17]:
np.linalg.inv(Sigma)

array([[1.46582598, 0.16298938, 2.26209128, 1.45324254, 1.14929798],
       [0.16298938, 0.0741797 , 0.24662514, 0.14792798, 0.14746023],
       [2.26209128, 0.24662514, 3.75836501, 2.3691429 , 1.88650869],
       [1.45324254, 0.14792798, 2.3691429 , 1.68130504, 1.21129063],
       [1.14929798, 0.14746023, 1.88650869, 1.21129063, 1.14573431]])

In [18]:
# testing if trace is sum of eigenval for any matrix

In [19]:
# size = 5
# Sigma = np.reshape([np.random.random() for _ in range(size*size)],(size,size))
print(Sigma)
print('Trace = ', np.trace(Sigma))
print('Sum of Eigenvalues = ', np.sum(np.linalg.eigvals(Sigma)))
np.round(np.trace(Sigma)-np.real(np.sum(np.linalg.eigvals(Sigma))))

[[10.41444384 -2.72934133 -4.97098174 -1.59647692 -0.22276739]
 [-2.72934133 19.02322236  0.6186124   1.42224614 -2.23272825]
 [-4.97098174  0.6186124   5.71210339 -2.37415812 -1.98843537]
 [-1.59647692  1.42224614 -2.37415812  5.69286035 -0.69102889]
 [-0.22276739 -2.23272825 -1.98843537 -0.69102889  5.38824934]]
Trace =  46.23087928116179
Sum of Eigenvalues =  46.23087928116179


0.0

In [20]:
Sigma_i = np.linalg.inv(Sigma)
print(Sigma_i)
print('Trace = ', np.trace(Sigma_i))
print('Sum of Eigenvalues = ', np.sum(np.linalg.eigvals(Sigma_i)))
np.round(np.trace(Sigma_i)-np.real(np.sum(np.linalg.eigvals(Sigma_i))))

[[1.46582598 0.16298938 2.26209128 1.45324254 1.14929798]
 [0.16298938 0.0741797  0.24662514 0.14792798 0.14746023]
 [2.26209128 0.24662514 3.75836501 2.3691429  1.88650869]
 [1.45324254 0.14792798 2.3691429  1.68130504 1.21129063]
 [1.14929798 0.14746023 1.88650869 1.21129063 1.14573431]]
Trace =  8.12541004587375
Sum of Eigenvalues =  8.12541004587375


0.0

In [21]:
### idea 2 - just store Sigma_i, trust that it stays positive definite symmetrical

In [87]:
### first: create a inv_covar of the right size that is positive definite symmetrical


# sizes
# self.inv_covars = nn.Parameter(torch.Tensor(g_out_features, g_in_features, g_in_features,), requires_grad=True)

# number of neurons in layer
g_out_features = 5
# number of inputs in prev layer
g_in_features = 4

inv_covars = np.tile(np.eye(g_in_features),(g_out_features,1,1))
# noise to add
noise = np.array([(lambda n: np.matmul(n, np.transpose(n)))(np.random.rand(g_in_features, g_in_features))
         for _ in range(g_out_features)])

inv_covars = 1000*noise+1/g_in_features*inv_covars

print(inv_covars)

[[[ 366.21890891  799.0068077   246.28862003  717.81297679]
  [ 799.0068077  2781.51165711 1097.0102975  1767.17348089]
  [ 246.28862003 1097.0102975   583.91138088  761.95816664]
  [ 717.81297679 1767.17348089  761.95816664 1740.56965409]]

 [[1901.91454352 2015.2029232  1304.83884998 1523.53573015]
  [2015.2029232  2473.33840467 1420.87248129 1722.06584204]
  [1304.83884998 1420.87248129 1091.65203099 1424.53872469]
  [1523.53573015 1722.06584204 1424.53872469 2014.82377172]]

 [[1083.91349412  844.74117607 1370.93115926  961.2485517 ]
  [ 844.74117607  844.32059963  808.78398212  660.32519556]
  [1370.93115926  808.78398212 2137.46915867 1397.53773153]
  [ 961.2485517   660.32519556 1397.53773153 1043.59836479]]

 [[2163.42141312 1689.83797609  838.7887076  1544.09082228]
  [1689.83797609 1457.89002734  673.2419748  1158.72713411]
  [ 838.7887076   673.2419748   417.67207481  608.31435699]
  [1544.09082228 1158.72713411  608.31435699 1223.74497048]]

 [[1969.04168918  817.82211665 1

In [88]:
np.linalg.inv(inv_covars[0])

array([[ 0.12616276, -0.03499707,  0.07944854, -0.05127737],
       [-0.03499707,  0.01146382, -0.02430681,  0.01343444],
       [ 0.07944854, -0.02430681,  0.05695542, -0.03301936],
       [-0.05127737,  0.01343444, -0.03301936,  0.02253627]])

In [89]:
# input data
batch_size = 5
# inputs  = np.random.randn(batch_size, g_out_features, g_in_features)
inputs = np.reshape(range(batch_size*g_out_features*g_in_features), (batch_size, g_out_features, g_in_features))
print(inputs.shape)
print(inputs)

(5, 5, 4)
[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]
  [12 13 14 15]
  [16 17 18 19]]

 [[20 21 22 23]
  [24 25 26 27]
  [28 29 30 31]
  [32 33 34 35]
  [36 37 38 39]]

 [[40 41 42 43]
  [44 45 46 47]
  [48 49 50 51]
  [52 53 54 55]
  [56 57 58 59]]

 [[60 61 62 63]
  [64 65 66 67]
  [68 69 70 71]
  [72 73 74 75]
  [76 77 78 79]]

 [[80 81 82 83]
  [84 85 86 87]
  [88 89 90 91]
  [92 93 94 95]
  [96 97 98 99]]]


In [90]:
# this was figured out in notebook 1.0
g = np.einsum('Lzi,zik,Lzk->Lz', inputs, inv_covars, inputs)

In [91]:
inv_covars[0]

array([[ 366.21890891,  799.0068077 ,  246.28862003,  717.81297679],
       [ 799.0068077 , 2781.51165711, 1097.0102975 , 1767.17348089],
       [ 246.28862003, 1097.0102975 ,  583.91138088,  761.95816664],
       [ 717.81297679, 1767.17348089,  761.95816664, 1740.56965409]])

In [92]:
H = np.linalg.cholesky(inv_covars[0])
H

array([[19.13684689,  0.        ,  0.        ,  0.        ],
       [41.75227049, 32.22203541,  0.        ,  0.        ],
       [12.86986416, 17.36899114, 10.79796851,  0.        ],
       [37.50946961,  6.24007631, 15.82080533,  6.66129945]])

In [93]:
S = np.diag(H)
S

array([19.13684689, 32.22203541, 10.79796851,  6.66129945])

In [94]:
np.matmul(H, np.transpose(H))

array([[ 366.21890891,  799.0068077 ,  246.28862003,  717.81297679],
       [ 799.0068077 , 2781.51165711, 1097.0102975 , 1767.17348089],
       [ 246.28862003, 1097.0102975 ,  583.91138088,  761.95816664],
       [ 717.81297679, 1767.17348089,  761.95816664, 1740.56965409]])

In [96]:
In [1]: def ldl_decomp(A):
    A = np.matrix(A)
    if not (A.H == A).all():
        print("A must be Hermitian!")
        return None, None
    else:
        S = np.diag(np.diag(A))
        Sinv = np.diag(1/np.diag(A))
        D = np.matrix(S.dot(S))
        Lch = np.linalg.cholesky(A)
        L = np.matrix(Lch.dot(Sinv))
    return L, D

In [97]:
Q,V = ldl_decomp(inv_covars[0])
Q,V

(matrix([[0.05225521, 0.        , 0.        , 0.        ],
         [0.11400905, 0.01158436, 0.        , 0.        ],
         [0.03514254, 0.00624444, 0.01849248, 0.        ],
         [0.10242363, 0.00224341, 0.02709453, 0.00382708]]),
 matrix([[ 134116.28924418,       0.        ,       0.        ,
                0.        ],
         [      0.        , 7736807.09864133,       0.        ,
                0.        ],
         [      0.        ,       0.        ,  340952.50072108,
                0.        ],
         [      0.        ,       0.        ,       0.        ,
          3029582.7207556 ]]))

In [98]:
np.linalg.eigvals(inv_covars[0])

array([4.82038389e+03, 4.62227214e+02, 4.77699061e+00, 1.84823507e+02])

In [99]:
np.matmul(Q, np.matmul(V, np.transpose(Q)))

matrix([[ 366.21890891,  799.0068077 ,  246.28862003,  717.81297679],
        [ 799.0068077 , 2781.51165711, 1097.0102975 , 1767.17348089],
        [ 246.28862003, 1097.0102975 ,  583.91138088,  761.95816664],
        [ 717.81297679, 1767.17348089,  761.95816664, 1740.56965409]])

In [100]:
S = np.diag(inv_covars[0])
print(S)
S = np.diag(S)
print(S)
S.dot(S)

[ 366.21890891 2781.51165711  583.91138088 1740.56965409]
[[ 366.21890891    0.            0.            0.        ]
 [   0.         2781.51165711    0.            0.        ]
 [   0.            0.          583.91138088    0.        ]
 [   0.            0.            0.         1740.56965409]]


array([[ 134116.28924418,       0.        ,       0.        ,
              0.        ],
       [      0.        , 7736807.09864133,       0.        ,
              0.        ],
       [      0.        ,       0.        ,  340952.50072108,
              0.        ],
       [      0.        ,       0.        ,       0.        ,
        3029582.7207556 ]])

In [101]:
V,Q = np.linalg.eig(inv_covars[0])
V,Q

(array([4.82038389e+03, 4.62227214e+02, 4.77699061e+00, 1.84823507e+02]),
 array([[-0.23836905, -0.30102188,  0.77138136, -0.50748086],
        [-0.73733046,  0.55439144, -0.22011064, -0.31708868],
        [-0.30441589,  0.23722418,  0.50598688,  0.77138378],
        [-0.5539449 , -0.73875615, -0.31701638,  0.21652947]]))

In [102]:
np.matmul(Q, np.matmul(np.diag(V), np.transpose(Q)))

array([[ 366.21890891,  799.0068077 ,  246.28862003,  717.81297679],
       [ 799.0068077 , 2781.51165711, 1097.0102975 , 1767.17348089],
       [ 246.28862003, 1097.0102975 ,  583.91138088,  761.95816664],
       [ 717.81297679, 1767.17348089,  761.95816664, 1740.56965409]])

In [103]:
np.fill_diagonal(H,0)
H

array([[ 0.        ,  0.        ,  0.        ,  0.        ],
       [41.75227049,  0.        ,  0.        ,  0.        ],
       [12.86986416, 17.36899114,  0.        ,  0.        ],
       [37.50946961,  6.24007631, 15.82080533,  0.        ]])

In [104]:
np.linalg.eigvals(np.matmul(H, np.transpose(H)))

array([3496.10826725,  138.18549928,  272.47032525,    0.        ])

In [108]:
np.tril(inv_covars)

array([[[ 366.21890891,    0.        ,    0.        ,    0.        ],
        [ 799.0068077 , 2781.51165711,    0.        ,    0.        ],
        [ 246.28862003, 1097.0102975 ,  583.91138088,    0.        ],
        [ 717.81297679, 1767.17348089,  761.95816664, 1740.56965409]],

       [[1901.91454352,    0.        ,    0.        ,    0.        ],
        [2015.2029232 , 2473.33840467,    0.        ,    0.        ],
        [1304.83884998, 1420.87248129, 1091.65203099,    0.        ],
        [1523.53573015, 1722.06584204, 1424.53872469, 2014.82377172]],

       [[1083.91349412,    0.        ,    0.        ,    0.        ],
        [ 844.74117607,  844.32059963,    0.        ,    0.        ],
        [1370.93115926,  808.78398212, 2137.46915867,    0.        ],
        [ 961.2485517 ,  660.32519556, 1397.53773153, 1043.59836479]],

       [[2163.42141312,    0.        ,    0.        ,    0.        ],
        [1689.83797609, 1457.89002734,    0.        ,    0.        ],
        [ 838.