# Testing Covariance

A more precise test of our covariance unit is to generate data having a specific distribution and then see whether our covarinace matrix matches our specified matrix.

There are two approaches - one using a Cholesky decomposition, the other using an eigenvalue decomposition. See https://stats.stackexchange.com/questions/32169/how-can-i-generate-data-with-a-prespecified-correlation-matrix and https://stats.stackexchange.com/questions/120179/generating-data-with-a-given-sample-covariance-matrix.

In [2]:
from src.var_processor.covariance import CovarianceUnit
import numpy as np

We need a positive definite covariance matrix. For this we can:
* generate random matrix
* multiply it by it's own transposition
* you have obtained a positive semi-definite matrix.

In [8]:
cov = np.random.randn(3,3)
cov = np.dot(cov,cov.T)

In [9]:
cov

array([[ 1.12848561, -0.79697223,  1.6590042 ],
       [-0.79697223,  0.82794797, -1.04018027],
       [ 1.6590042 , -1.04018027,  4.65298715]])

In [10]:
mean = 0.5

In [11]:
L = np.linalg.cholesky(cov)

In [12]:
L

array([[ 1.06230203,  0.        ,  0.        ],
       [-0.7502313 ,  0.51487956,  0.        ],
       [ 1.5617067 ,  0.25532373,  1.46590216]])

Now we loop 100 times, generating samples and seeing what our mean and covariance equal...

In [13]:
cov_unit = CovarianceUnit(3)

In [29]:
for _ in range(0, 100):
    sample = np.dot(L, np.random.randn(3, 1)) + mean
    cov_unit.update(sample)
print(cov_unit.mean, cov_unit.covariance, sep="\n")

[[0.54730931]
 [0.47355449]
 [0.5773568 ]]
[[ 1.14993377 -0.78521168  1.68587113]
 [-0.78521168  0.7942798  -1.05060454]
 [ 1.68587113 -1.05060454  4.59816267]]


In [31]:
for _ in range(0, 10000):
    sample = np.dot(L, np.random.randn(3, 1)) + mean
    cov_unit.update(sample)
print(cov_unit.mean, cov_unit.covariance, sep="\n")

[[0.51125101]
 [0.49221394]
 [0.50523693]]
[[ 1.1386086  -0.80263796  1.70602671]
 [-0.80263796  0.82986436 -1.07135361]
 [ 1.70602671 -1.07135361  4.74586408]]


In [32]:
mean = -0.5
cov_unit2 = CovarianceUnit(3)
for _ in range(0, 10000):
    sample = np.dot(L, np.random.randn(3, 1)) + mean
    cov_unit2.update(sample)
print(cov_unit2.mean, cov_unit2.covariance, sep="\n")

[[-0.48874286]
 [-0.51869883]
 [-0.49200419]]
[[ 1.14307288 -0.81324798  1.66545525]
 [-0.81324798  0.84053001 -1.04764234]
 [ 1.66545525 -1.04764234  4.6189823 ]]


We can use numpy's allclose/isclose to determine if close. https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html#numpy.allclose

In [39]:
mean_array = np.ones(shape=(3,1))*0.5
# Within 5%
np.allclose(mean_array, cov_unit.mean, rtol=0.05)

True

In [41]:
np.allclose(cov, cov_unit.covariance, rtol=0.05)

True

In [None]:
def test_covariance_computation():
    """Statistical test that cov unit is determining the covariance."""
    # Generate random positive definite matrix
    cov = np.random.randn(3,3)
    cov = np.dot(cov,cov.T)
    # Generate desired mean
    mean = np.random.randn(3,1)
    # Use Cholesky decomposition to get L
    L = np.linalg.cholesky(cov)
    cov_unit = CovarianceUnit(3)
    for _ in range(0, 1000):
        sample = np.dot(L, np.random.randn(3, 1)) + mean
        cov_unit.update(sample)
    # Check within 5%
    assert np.allclose(mean, cov_unit.mean, rtol=0.05)
    assert np.allclose(cov, cov_unit.covariance, rtol=0.05)

In [46]:
np.random.uniform(size=(3,3))

array([[0.45475868, 0.0813685 , 0.64410836],
       [0.48388041, 0.81378173, 0.6283129 ],
       [0.06086568, 0.22582442, 0.82828134]])

In [53]:
cov = np.random.randn(3, 3)
cov = np.dot(cov, cov.T) 
cov = cov / cov.max()
cov

array([[ 0.65029906, -0.24070865,  0.63803126],
       [-0.24070865,  0.09457014, -0.23644969],
       [ 0.63803126, -0.23644969,  1.        ]])

In [54]:
cov.max()

1.0

In [55]:
cov / cov.max()

array([[ 0.65029906, -0.24070865,  0.63803126],
       [-0.24070865,  0.09457014, -0.23644969],
       [ 0.63803126, -0.23644969,  1.        ]])

# Testing Eigenvalue Estimation

We can use a specified covariance matrix - determine the eigenvectors using numpy - https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.eig.html - and check against those from the power iterator.

In [56]:
from src.var_processor.power_iterator import PowerIterator

In [58]:
# Generate random covariance matrix
cov = np.random.randn(3,3)
cov = np.dot(cov,cov.T)
cov = cov / cov.max()
# Generate test power iterator
power = PowerIterator(3)
power.load_covariance(cov)
for _ in range(0, 1000):
    power.iterate()
evec = power.eigenvector
evalue = power.eigenvalue

# Use numpy linear algebra to determine eigenvectors and values
w, v = np.linalg.eig(cov)

In [59]:
print(evec, evalue, w, v, sep="\n")

[[ 0.29804309]
 [ 0.94252654]
 [-0.15104319]]
[[1.07981685]]
[0.00491879 1.07981685 0.93711051]
[[ 0.93960362 -0.29804309 -0.16827166]
 [-0.26178539 -0.94252654  0.20763464]
 [ 0.22048457  0.15104319  0.96362467]]


In [61]:
v[:, np.argmax(w)]

array([-0.29804309, -0.94252654,  0.15104319])

Pretty good - we then check with abs() and allclose.

In [64]:
assert np.allclose(abs(evec.T), abs(v[:, np.argmax(w)]), rtol=0.05)

In [65]:
print(abs(evec.T), abs(v[:, np.argmax(w)]))

[[0.29804309 0.94252654 0.15104319]] [0.29804309 0.94252654 0.15104319]


# VPU

Let's revise our VPU. We have the folded two layer structure. Is our project the same as our reconstruct?

In [66]:
r = 1
print(np.dot(r, evec), r*evec)

[[ 0.29804309]
 [ 0.94252654]
 [-0.15104319]] [[ 0.29804309]
 [ 0.94252654]
 [-0.15104319]]


In [69]:
assert np.array_equal(np.dot(r, evec), r*evec)

In [70]:
r.T

AttributeError: 'int' object has no attribute 'T'

In [71]:
rand_ints = np.random.randint(2, size=3); print(rand_ints)

[0 1 0]


In [72]:
np.dot(evec.T, rand_ints)

array([0.94252654])

In [75]:
one_val_array = np.asarray([[1.25]])
one_val_array.shape

(1, 1)

In [76]:
one_val_array.item()

1.25

In [78]:
np.arange(0, 10)[:10]

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

# Test Time Stage

In [90]:
from src.var_processor.stage import Stage, pad_array

In [80]:
stages = Stage(3, 10)
assert len(stages.vpus) == 10
assert not stages.causes.any()
assert not stages.pred_inputs.any()
assert "10" in stages.__repr__()
# Check data in
for _ in range(0, 100):
    data_in = np.random.randint(2, size=(stages.size, 1))
    r_backwards = np.random.randint(2, size=(stages.stage_len, 1))
    causes1, pred_inputs1 = stages.iterate(data_in, r_backwards)

In [83]:
print(causes1.T, pred_inputs1.T, sep="\n")

[[0. 0. 1. 0. 0. 0. 0. 1. 0. 1.]]
[[1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  0. 1. 0. 0. 0. 0.]]


In [86]:
# assert stages.causes.any()
# assert stages.pred_inputs.any()
for _ in range(0, 1000):
    data_in = np.random.randint(2, size=(stages.size, 1))
    r_backwards = np.random.randint(2, size=(stages.stage_len, 1))
    causes2, pred_inputs2 = stages.iterate(data_in, r_backwards)

In [87]:
print(causes1.T, pred_inputs1.T, sep="\n")
print(causes2.T, pred_inputs2.T, sep="\n")

[[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]]
[[1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 0. 0. 0.]]
[[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]]
[[1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 0. 0. 0.]]


Why are the causes and predicted inputs the same?

In [89]:
print(data_in.T, r_backwards.T)

[[1 0 1 0 0 0 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 0 1 0 0 1 1 0 1 1]] [[1 1 0 0 0 0 0 0 1 0]]


In [92]:
self = stages

input_array = pad_array(data_in, self.size); print(input_array.T)

[[1 0 1 0 0 0 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 0 1 0 0 1 1 0 1 1]]


In [95]:
# Iterate through VPUs, passing data in
for i, vpu in enumerate(self.vpus):
    start = i*self.vec_len
    end = (i+1)*self.vec_len
    input_segment = input_array[start:end]
    feedback_segment = r_backwards[i]
    print(start, end, input_segment, feedback_segment, vpu.pi.ev, sep="\n", end="\n")
    vpu.update_cov(input_segment)
    cause, pred_input = vpu.iterate(
                input_segment,
                feedback_segment
    )
    print(i, cause, pred_input)
    self.causes[i] = cause
    self.pred_inputs[start:end] = pred_input

0
3
[[1]
 [0]
 [1]]
[1]
[[ 0.79037307]
 [-0.55754513]
 [-0.25387761]]
0 [[1]] [[0]
 [0]
 [0]]
3
6
[[0]
 [0]
 [0]]
[1]
[[ 0.54705236]
 [-0.67502972]
 [ 0.49504403]]
1 [[0]] [[1]
 [0]
 [0]]
6
9
[[0]
 [0]
 [1]]
[0]
[[-0.46236631]
 [ 0.63620798]
 [ 0.61762189]]
2 [[1]] [[0]
 [0]
 [0]]
9
12
[[1]
 [1]
 [0]]
[0]
[[-0.67991935]
 [ 0.44155816]
 [-0.58543664]]
3 [[0]] [[0]
 [0]
 [0]]
12
15
[[0]
 [0]
 [0]]
[0]
[[-0.69689261]
 [-0.29170901]
 [-0.65516909]]
4 [[0]] [[0]
 [0]
 [0]]
15
18
[[1]
 [1]
 [0]]
[0]
[[ 0.46582648]
 [-0.61812438]
 [ 0.63318871]]
5 [[0]] [[0]
 [0]
 [0]]
18
21
[[1]
 [0]
 [0]]
[0]
[[-0.48473432]
 [ 0.5667841 ]
 [-0.66617447]]
6 [[0]] [[0]
 [0]
 [0]]
21
24
[[0]
 [1]
 [0]]
[0]
[[0.27251545]
 [0.6452655 ]
 [0.71370005]]
7 [[1]] [[0]
 [0]
 [0]]
24
27
[[0]
 [1]
 [1]]
[1]
[[ 0.48845576]
 [-0.61086087]
 [-0.6231051 ]]
8 [[0]] [[0]
 [0]
 [0]]
27
30
[[0]
 [1]
 [1]]
[0]
[[-0.66623466]
 [-0.17333122]
 [-0.72531901]]
9 [[0]] [[0]
 [0]
 [0]]


In [96]:
self.causes

array([[1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.]])

In [97]:
data_in = np.random.randint(2, size=(stages.size, 1))
r_backwards = np.random.randint(2, size=(stages.stage_len, 1))
for i, vpu in enumerate(self.vpus):
    start = i*self.vec_len
    end = (i+1)*self.vec_len
    input_segment = input_array[start:end]
    feedback_segment = r_backwards[i]
    print(start, end, input_segment, feedback_segment, vpu.pi.ev, sep="\n", end="\n")
    vpu.update_cov(input_segment)
    cause, pred_input = vpu.iterate(
                input_segment,
                feedback_segment
    )
    print(i, cause, pred_input)
    self.causes[i] = cause
    self.pred_inputs[start:end] = pred_input

0
3
[[1]
 [0]
 [1]]
[1]
[[ 0.7905105 ]
 [-0.55820589]
 [-0.25199073]]
0 [[1]] [[0]
 [0]
 [0]]
3
6
[[0]
 [0]
 [0]]
[1]
[[ 0.54751368]
 [-0.6743897 ]
 [ 0.4954062 ]]
1 [[0]] [[0]
 [0]
 [1]]
6
9
[[0]
 [0]
 [1]]
[0]
[[-0.46555767]
 [ 0.6349305 ]
 [ 0.61653817]]
2 [[1]] [[0]
 [0]
 [0]]
9
12
[[1]
 [1]
 [0]]
[0]
[[-0.67968216]
 [ 0.44081427]
 [-0.58627208]]
3 [[0]] [[0]
 [0]
 [0]]
12
15
[[0]
 [0]
 [0]]
[0]
[[-0.69693826]
 [-0.29126824]
 [-0.65531662]]
4 [[0]] [[0]
 [0]
 [0]]
15
18
[[1]
 [1]
 [0]]
[0]
[[ 0.46531548]
 [-0.61835289]
 [ 0.63334131]]
5 [[0]] [[0]
 [0]
 [0]]
18
21
[[1]
 [0]
 [0]]
[1]
[[-0.4866324 ]
 [ 0.5671122 ]
 [-0.66450933]]
6 [[0]] [[0]
 [1]
 [0]]
21
24
[[0]
 [1]
 [0]]
[1]
[[0.27251055]
 [0.64475252]
 [0.71416538]]
7 [[1]] [[1]
 [0]
 [1]]
24
27
[[0]
 [1]
 [1]]
[1]
[[ 0.48832464]
 [-0.6108484 ]
 [-0.62322009]]
8 [[0]] [[0]
 [0]
 [0]]
27
30
[[0]
 [1]
 [1]]
[1]
[[-0.66579186]
 [-0.17365174]
 [-0.72564886]]
9 [[0]] [[0]
 [0]
 [0]]


In [99]:
self.pred_inputs.T

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.]])

In [100]:
self.causes.T

array([[1., 0., 1., 0., 0., 0., 0., 1., 0., 0.]])

In [101]:
np.arange(0,10)[:-1]

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [None]:
assert not np.array_equal(causes1, causes2)
assert not np.array_equal(pred_inputs1, pred_inputs2)