In [10]:
import numpy as np

from src.var_processor.pb_threshold import ternary_pbt

In [69]:
class CovarianceUnit:
    """Variation where the mean is assumed to be 0."""

    def __init__(self, size, stages=8):
        """Initialise.

        Args:
            size: integer setting the 1D size of an input.
            stages: integer setting the number of stages.
        """
        self.size = size
        # Set max value for signed int
        self.max_value = 127
        self.stages = stages
        # Initialise Square Sums
        self.square_sum = np.zeros(
            shape=(size, size, self.stages), dtype=np.int8
        )
        # Initialise Store for last full values
        self.complete = np.zeros(
            shape=(size, size, self.stages), dtype=np.int8
        )
        # Define counter for each stage
        self.stage_counter = np.zeros(self.stages, dtype=np.uint8)
        # Define index for current cov
        self.cov_index = 0

    def update_cov(self, data_array):
        """Add a data array to the covariance data.

        This will involve a recursive check.

        Args:
            data_array is a 1D numpy array of length 'size'.
        """
        assert max(np.abs(data_array)) == 0 or 1
        # Cast data_array to 8 bit - also check binary here?
        data_array = data_array.astype(np.int8)
        # Increment current stage counter
        self.stage_counter[0] += 1
        # Add square of input array
        self.square_sum[:, :, 0] += np.dot(data_array, data_array.T)
        self.recursive_update(0)

    def recursive_update(self, i):
        """Update with recursive method.

        Args:
            i - stage to update - integer.
        """
        # Check i is within range
        if i > (self.stages - 1):
            return
        if i < self.stages:
            # If i is within range check counter
            if self.stage_counter[i] >= self.max_value:
                # Add to completed estimate
                self.complete[:, :, i] = self.square_sum[:, :, i]
                # Reset the previous counter and stage
                self.stage_counter[i] = 0
                self.square_sum[:, :, i] = 0
                # Set cov index as highest available
                if self.cov_index < i:
                    self.cov_index = i
                # If higher stages PBT and add to higher stages
                if i < (self.stages-1):
                    # Apply ternary PBT to square sum
                    thresholded = ternary_pbt(self.complete[:, :, i], self.max_value)
                    # Add to next square sum
                    self.square_sum[:, :, i+1] += thresholded
                    # Increment next stage counter
                    self.stage_counter[i+1] += 1
                    self.recursive_update(i+1)
                
    @property
    def covariance(self):
        """Compute covariance when requested."""
        # Return highest non_zero self.complete[:, :, i]
        return self.complete[:, :, self.cov_index]
    
    def __repr__(self):
        """String representation of covariance unit state."""
        string = (
            f"""There are {self.stages} stages to process """ 
            f"""1D arrays of length {self.size}.\nData is assumed to """
            f"""have a maximum absolute value of {self.max_value}.\n"""
            f"""-------\nCounter: {self.stage_counter}\nRunning sum of squares:\n"""
        )
        for i in np.nonzero(self.stage_counter)[0]:
            string += f"""{self.square_sum[:, :, i]}\n"""
        string += """Complete covariance estimates:\n"""
        complete_range = max(np.nonzero(self.stage_counter)[0])
        for i in range(0, complete_range):
            string += f"""{self.complete[:, :, i]}\n"""
        string += (
            f"""\n---------\nCurrent covariance estimate """
            f"""(index: {self.cov_index}):\n{self.covariance}\n"""
        )
        return string

## Testing 8bit Covariance Unit

### Test with Same Sign Values

The functions below don't have negative values - rand_same outputs 0 and 1

In [70]:
# Try with same and different length 2 data
from src.tests.test_vpu import rand_same, rand_diff

cov_unit = CovarianceUnit(2)
for _ in range(0, 1000):
    cov_unit.update_cov(rand_same())
    
print(cov_unit.covariance)
assert np.allclose(cov_unit.covariance, np.ones(shape=(2,2))*(127//2), atol=15)

[[58 58]
 [58 58]]


In [71]:
np.nonzero(cov_unit.stage_counter)[0][:-1]

array([0])

In [72]:
cov_unit

There are 8 stages to process 1D arrays of length 2.
Data is assumed to have a maximum absolute value of 127.
-------
Counter: [111   7   0   0   0   0   0   0]
Running sum of squares:
[[50 50]
 [50 50]]
[[6 1]
 [5 4]]
Complete covariance estimates:
[[58 58]
 [58 58]]

---------
Current covariance estimate (index: 0):
[[58 58]
 [58 58]]

In [73]:
cov_unit = CovarianceUnit(2)
for _ in range(0, 127**2+10):
    cov_unit.update_cov(rand_same())
    
print(cov_unit.covariance)
assert np.allclose(cov_unit.covariance, np.ones(shape=(2,2))*(127//2), atol=15)

[[65 76]
 [70 65]]


In [74]:
cov_unit

There are 8 stages to process 1D arrays of length 2.
Data is assumed to have a maximum absolute value of 127.
-------
Counter: [10  0  1  0  0  0  0  0]
Running sum of squares:
[[4 4]
 [4 4]]
[[1 0]
 [1 0]]
Complete covariance estimates:
[[73 73]
 [73 73]]
[[65 76]
 [70 65]]

---------
Current covariance estimate (index: 1):
[[65 76]
 [70 65]]

In [77]:
cov_unit = CovarianceUnit(3)
for _ in range(0, 1000):
    cov_unit.update_cov(rand_same(size=3))
    
print(cov_unit)

There are 8 stages to process 1D arrays of length 3.
Data is assumed to have a maximum absolute value of 127.
-------
Counter: [111   7   0   0   0   0   0   0]
Running sum of squares:
[[58 58 58]
 [58 58 58]
 [58 58 58]]
[[6 3 3]
 [3 3 3]
 [2 7 5]]
Complete covariance estimates:
[[62 62 62]
 [62 62 62]
 [62 62 62]]

---------
Current covariance estimate (index: 0):
[[62 62 62]
 [62 62 62]
 [62 62 62]]



In [78]:
assert np.allclose(cov_unit.covariance, np.ones(shape=(3,3))*(127//2), atol=15)

Ah - these estimates should be divided by 254 - the scaled versions we had for size = 2 were 0.25. For size = 3 the entries were 0.67591868. 

In [81]:
def test_covariance_same_positive():
    """Test covariance of same values."""
    cov_unit = CovarianceUnit(2)
    for _ in range(0, 1000):
        cov_unit.update_cov(rand_same())
    assert np.allclose(cov_unit.covariance, np.ones(shape=(2,2))*(127//2), atol=15)
    assert cov_unit.stage_counter[0] == 111
    assert cov_unit.stage_counter[1] == 7
    assert cov_unit.cov_index == 0
    assert not cov_unit.complete[:, :, 1].any()
    cov_unit = CovarianceUnit(2)
    for _ in range(0, 127**2+10):
        cov_unit.update_cov(rand_same())
    assert np.allclose(cov_unit.covariance, np.ones(shape=(2,2))*(127//2), atol=15)
    assert cov_unit.stage_counter[0] == 10
    assert cov_unit.stage_counter[1] == 0
    assert cov_unit.stage_counter[2] == 1
    assert cov_unit.cov_index == 1
    assert cov_unit.complete[:, :, 1].any()
    assert not cov_unit.complete[:, :, 2].any()
    cov_unit = CovarianceUnit(3)
    for _ in range(0, 1000):
        cov_unit.update_cov(rand_same(size=3))
    assert cov_unit.stage_counter[0] == 111
    assert cov_unit.stage_counter[1] == 7
    assert cov_unit.cov_index == 0
    assert np.allclose(cov_unit.covariance, np.ones(shape=(3,3))*(127//2), atol=15)

In [82]:
test_covariance_same()

### Test With Negative Values

In [113]:
# We need to be careful that our random values are selected from a uniform distribution
def rand_same(size=2, negative=False):
    """Create 1D array of same binary values."""
    a = np.empty([size, 1])
    if not negative:
        # Choose value that is 0 or 1
        rand_int = np.random.randint(2)
    else:
        # Choose value that is -1, 0 or 1 with uniform distribution
        rand_int = np.random.randint(3)
    if rand_int == 0:
        a.fill(0)
    if rand_int == 1:
        a.fill(1)
    if rand_int == 2:
        a.fill(-1)
    return a

def rand_diff(size=2, negative=False):
    """Create 1D array with single 1 and rest 0."""
    a = np.zeros([size, 1])
    index = np.random.randint(size)
    if negative and np.random.randint(2):
        a[index] = -1
    else:
        a[index] = 1
    return a

In [114]:
assert rand_same().all() >= 0

In [163]:
rand_same(negative=True).ravel().shape

(2,)

In [181]:
# Test rand_same with negative numbers
for i in range(0, 100):
    assert (rand_same() >= 0).all()
neg_sum = 0
for i in range(0, 100):
    if (rand_same(negative=True) < 0).all():
        neg_sum += 1
assert neg_sum > 0
# Check zero mean
rolling_sum = np.zeros(shape=(2,1))
for i in range(0, 1000):
    rolling_sum = rolling_sum + rand_same(negative=True)
assert np.allclose(rolling_sum/1000, np.zeros(shape=(2,1)), atol=0.1)

In [151]:
size = 2
buf_length = 1000
data_buffer = np.zeros(shape=(size, buf_length))
cov_unit = CovarianceUnit(size)
for i in range(0, buf_length):
    data = rand_same(negative=True)
    cov_unit.update_cov(data)
    data_buffer[:, i] = data.ravel()
    
print(cov_unit)

There are 8 stages to process 1D arrays of length 2.
Data is assumed to have a maximum absolute value of 127.
-------
Counter: [111   7   0   0   0   0   0   0]
Running sum of squares:
[[70 70]
 [70 70]]
[[4 6]
 [5 2]]
Complete covariance estimates:
[[97 97]
 [97 97]]

---------
Current covariance estimate (index: 0):
[[97 97]
 [97 97]]



The covariance value is higher with 3 different values.

In [152]:
data_buffer

array([[-1.,  1.,  1., ..., -1.,  0., -1.],
       [-1.,  1.,  1., ..., -1.,  0., -1.]])

In [153]:
np.cov(data_buffer)

array([[0.69752853, 0.69752853],
       [0.69752853, 0.69752853]])

In [155]:
cov_unit.covariance/127

array([[0.76377953, 0.76377953],
       [0.76377953, 0.76377953]])

In [158]:
data_buffer.mean(axis=1)

array([-0.013, -0.013])

In [197]:
def test_different_sign():
    """Test applying with different sign."""
    size = 2
    buf_length = 1000
    data_buffer = np.zeros(shape=(size, buf_length))
    cov_unit = CovarianceUnit(size)
    for i in range(0, buf_length):
        data = rand_same(negative=True)
        cov_unit.update_cov(data)
        data_buffer[:, i] = data.ravel()
    # Check covariance estimate is within range of actual estimate
    print(cov_unit.covariance/127, np.cov(data_buffer))
    assert np.allclose(cov_unit.covariance/127, np.cov(data_buffer), atol=0.1)

def test_non_neg():
    """Test applying with binary only input (i.e. positive)."""
    size = 2
    buf_length = 1000
    data_buffer = np.zeros(shape=(size, buf_length))
    cov_unit = CovarianceUnit(size)
    for i in range(0, buf_length):
        data = rand_same(negative=False)
        cov_unit.update_cov(data)
        data_buffer[:, i] = data.ravel()
    # Check covariance estimate is within range of actual estimate
    print(cov_unit.covariance/254, np.cov(data_buffer))
    assert np.allclose(cov_unit.covariance/254, np.cov(data_buffer), atol=0.1)

In [198]:
test_different_sign()

[[0.69291339 0.69291339]
 [0.69291339 0.69291339]] [[0.67362462 0.67362462]
 [0.67362462 0.67362462]]


In [204]:
test_non_neg()

[[0.26377953 0.26377953]
 [0.26377953 0.26377953]] [[0.25010611 0.25010611]
 [0.25010611 0.25010611]]


**Cool - these work. We'll need to add these tests once we update our covariance unit code.**

Note the need to divide by 254 for 0, 1 and 127 for -1, 0, 1.

In [259]:
# This test will not work - as the mean is not zero
def test_diff_non_neg():
    """Test applying with binary input with different elements."""
    size = 2
    buf_length = 1000
    data_buffer = np.zeros(shape=(size, buf_length))
    cov_unit = CovarianceUnit(size)
    for i in range(0, buf_length):
        data = rand_diff(negative=False)
        cov_unit.update_cov(data)
        data_buffer[:, i] = data.ravel()
    # Check covariance estimate is within range of actual estimate
    print(data_buffer.mean(axis=1))
    print(cov_unit.covariance/254, np.cov(data_buffer))
    assert np.allclose(data_buffer.mean(axis=1), np.zeros(shape=(2,1)), atol=0.1)
    assert np.allclose(cov_unit.covariance/254, np.cov(data_buffer), atol=0.1)

# This test will work
def test_diff_neg():
    """Test applying with ternary input with different elements."""
    size = 2
    buf_length = 1000
    data_buffer = np.zeros(shape=(size, buf_length))
    cov_unit = CovarianceUnit(size)
    for i in range(0, buf_length):
        data = rand_diff(negative=True)
        cov_unit.update_cov(data)
        data_buffer[:, i] = data.ravel()
    # Check covariance estimate is within range of actual estimate
    print(data_buffer.mean(axis=1), )
    print(cov_unit.covariance/127, np.cov(data_buffer))
    assert np.allclose(data_buffer.mean(axis=1), np.zeros(shape=(2,1)), atol=0.1)
    assert np.allclose(cov_unit.covariance/127, np.cov(data_buffer), atol=0.1)

In [260]:
test_diff_non_neg()

[0.459 0.541]
[[0.19685039 0.        ]
 [0.         0.30314961]] [[ 0.24856757 -0.24856757]
 [-0.24856757  0.24856757]]


AssertionError: 

In [264]:
test_diff_neg()

[0.056 0.016]
[[0.46456693 0.        ]
 [0.         0.53543307]] [[ 0.50336737 -0.0008969 ]
 [-0.0008969   0.49423824]]


So this is zero mean and matches the covariance matrix.

We'll always have one zero - so our dot product will only be positive. Ah it works when we have negative values.

In [269]:
np.random.randint(2, size=(4, 1))

array([[0],
       [1],
       [1],
       [0]])

In [272]:
def rand_opposite(size=2, negative=False):
    """Create a 1D array with opposite values."""
    # Create a random binary of size "size"
    rand_array = np.random.randint(2, size=(size, 1))
    if negative:
        rand_array = np.where(rand_array == 0, -1, 1)
    return rand_array

In [291]:
rand_opposite(size=3, negative=True)

array([[-1],
       [-1],
       [ 1]])

In [208]:
cov_unit = CovarianceUnit(2)
for _ in range(0, 1000):
    cov_unit.update_cov(rand_diff())
    
print(cov_unit, rand_diff())
# assert np.allclose(cov_unit.covariance, np.ones(shape=(2,2))*(127//2), atol=15)

There are 8 stages to process 1D arrays of length 2.
Data is assumed to have a maximum absolute value of 127.
-------
Counter: [111   7   0   0   0   0   0   0]
Running sum of squares:
[[51  0]
 [ 0 60]]
[[6 0]
 [0 5]]
Complete covariance estimates:
[[55  0]
 [ 0 72]]

---------
Current covariance estimate (index: 0):
[[55  0]
 [ 0 72]]
 [[1.]
 [0.]]


The running sum doesn't seem to feature negative values.

In [238]:
square_sum = np.zeros(shape=(2, 2), dtype=np.int8)
data_array = rand_diff()
print("Before conversion:", data_array, data_array.dtype, sep="\n")
assert max(np.abs(data_array)) == 0 or 1
data_array = data_array.astype(np.int8)
print("After conversion:", data_array, data_array.dtype, sep="\n")
dot_product = np.dot(data_array, data_array.T)
square_sum += dot_product
print(f"Dot product: {dot_product}")
print(f"Square sum:\n{square_sum}")

Before conversion:
[[1.]
 [0.]]
float64
After conversion:
[[1]
 [0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[1 0]
 [0 0]]


Ah - it won't work for rand_diff with binary as the value have a non-zero mean. Also if we have one with 0 and one with -1 or 1 this also won't have a non-zero mean.

In [251]:
square_sum = np.zeros(shape=(2, 2), dtype=np.int8)
data_buffer = np.zeros(shape=(2, 1000))
for i in range(0, 1000):
    data_array = rand_diff(negative=True)
    # print("Before conversion:", data_array, data_array.dtype, sep="\n")
    assert max(np.abs(data_array)) == 0 or 1
    data_array = data_array.astype(np.int8)
    print("After conversion:", data_array, data_array.dtype, sep="\n")
    dot_product = np.dot(data_array, data_array.T)
    square_sum = square_sum + dot_product
    print(f"Dot product: {dot_product}")
    print(f"Square sum:\n{square_sum}")
    data_buffer[:, i] = data_array.ravel()

After conversion:
[[0]
 [1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[0 0]
 [0 1]]
After conversion:
[[1]
 [0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[1 0]
 [0 1]]
After conversion:
[[ 0]
 [-1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[1 0]
 [0 2]]
After conversion:
[[-1]
 [ 0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[2 0]
 [0 2]]
After conversion:
[[ 0]
 [-1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[2 0]
 [0 3]]
After conversion:
[[ 0]
 [-1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[2 0]
 [0 4]]
After conversion:
[[ 0]
 [-1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[2 0]
 [0 5]]
After conversion:
[[0]
 [1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[2 0]
 [0 6]]
After conversion:
[[0]
 [1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[2 0]
 [0 7]]
After conversion:
[[0]
 [1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[2 0]
 [0 8]]
After conversion:
[[-1]
 [ 0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[3 0]
 [0 8]]
After con

 [0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-112    0]
 [   0 -114]]
After conversion:
[[0]
 [1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[-112    0]
 [   0 -113]]
After conversion:
[[-1]
 [ 0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-111    0]
 [   0 -113]]
After conversion:
[[0]
 [1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[-111    0]
 [   0 -112]]
After conversion:
[[1]
 [0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-110    0]
 [   0 -112]]
After conversion:
[[1]
 [0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-109    0]
 [   0 -112]]
After conversion:
[[-1]
 [ 0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-108    0]
 [   0 -112]]
After conversion:
[[0]
 [1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[-108    0]
 [   0 -111]]
After conversion:
[[1]
 [0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-107    0]
 [   0 -111]]
After conversion:
[[-1]
 [ 0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-106    0]
 [   0 -111]]
After c

After conversion:
[[ 0]
 [-1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[21  0]
 [ 0 22]]
After conversion:
[[ 0]
 [-1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[21  0]
 [ 0 23]]
After conversion:
[[ 0]
 [-1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[21  0]
 [ 0 24]]
After conversion:
[[1]
 [0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[22  0]
 [ 0 24]]
After conversion:
[[1]
 [0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[23  0]
 [ 0 24]]
After conversion:
[[ 0]
 [-1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[23  0]
 [ 0 25]]
After conversion:
[[0]
 [1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[23  0]
 [ 0 26]]
After conversion:
[[0]
 [1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[23  0]
 [ 0 27]]
After conversion:
[[0]
 [1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[23  0]
 [ 0 28]]
After conversion:
[[0]
 [1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[23  0]
 [ 0 29]]
After conversion:
[[ 0]
 [-1]]
int8
Dot product: [[0 0]
 [0 1]

After conversion:
[[1]
 [0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-120    0]
 [   0 -118]]
After conversion:
[[-1]
 [ 0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-119    0]
 [   0 -118]]
After conversion:
[[-1]
 [ 0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-118    0]
 [   0 -118]]
After conversion:
[[0]
 [1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[-118    0]
 [   0 -117]]
After conversion:
[[1]
 [0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-117    0]
 [   0 -117]]
After conversion:
[[0]
 [1]]
int8
Dot product: [[0 0]
 [0 1]]
Square sum:
[[-117    0]
 [   0 -116]]
After conversion:
[[-1]
 [ 0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-116    0]
 [   0 -116]]
After conversion:
[[-1]
 [ 0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-115    0]
 [   0 -116]]
After conversion:
[[1]
 [0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-114    0]
 [   0 -116]]
After conversion:
[[1]
 [0]]
int8
Dot product: [[1 0]
 [0 0]]
Square sum:
[[-113   

In [253]:
data_buffer.mean(axis=1)

array([-0.015,  0.013])

In [252]:
data_buffer

array([[ 0.,  1.,  0., ..., -1., -1.,  0.],
       [ 1.,  0., -1., ...,  0.,  0.,  1.]])

In [307]:
size = 3
buf_length = 1000
data_buffer = np.zeros(shape=(size, buf_length))
cov_unit = CovarianceUnit(size)
for i in range(0, buf_length):
    data = rand_opposite(size=size, negative=True)
    cov_unit.update_cov(data)
    data_buffer[:, i] = data.ravel()
# Check covariance estimate is within range of actual estimate
print(data_buffer.mean(axis=1), "\n" )
print(cov_unit.covariance/127, "\n\n", np.cov(data_buffer))
assert np.allclose(data_buffer.mean(axis=1), np.zeros(shape=(2,1)), atol=0.1)
assert np.allclose(cov_unit.covariance/127, np.cov(data_buffer), atol=0.2)

[-0.03   0.02   0.044] 

[[ 1.         -0.03937008 -0.1496063 ]
 [-0.03937008  1.         -0.18110236]
 [-0.1496063  -0.18110236  1.        ]] 

 [[ 1.0001001   0.03463463 -0.0247047 ]
 [ 0.03463463  1.0006006  -0.04492492]
 [-0.0247047  -0.04492492  0.99906306]]


In [308]:
def test_opposite():
    """Test with opposite signs."""
    size = 3
    buf_length = 1000
    data_buffer = np.zeros(shape=(size, buf_length))
    cov_unit = CovarianceUnit(size)
    for i in range(0, buf_length):
        data = rand_opposite(size=size, negative=True)
        cov_unit.update_cov(data)
        data_buffer[:, i] = data.ravel()
    # Check covariance estimate is within range of actual estimate
    print(data_buffer.mean(axis=1), "\n" )
    print(cov_unit.covariance/127, "\n\n", np.cov(data_buffer))
    assert np.allclose(data_buffer.mean(axis=1), np.zeros(shape=(2,1)), atol=0.1)
    assert np.allclose(cov_unit.covariance/127, np.cov(data_buffer), atol=0.2)

In [298]:
data_buffer

array([[ 1., -1., -1., ..., -1.,  1., -1.],
       [-1.,  1.,  1., ..., -1., -1., -1.],
       [-1.,  1., -1., ...,  1.,  1.,  1.]])

In [299]:
cov_unit

There are 8 stages to process 1D arrays of length 3.
Data is assumed to have a maximum absolute value of 127.
-------
Counter: [111   7   0   0   0   0   0   0]
Running sum of squares:
[[111  -7   7]
 [ -7 111  -3]
 [  7  -3 111]]
[[ 7 -1  0]
 [ 0  7  0]
 [-1  0  7]]
Complete covariance estimates:
[[127 -23  -9]
 [-23 127  -3]
 [ -9  -3 127]]

---------
Current covariance estimate (index: 0):
[[127 -23  -9]
 [-23 127  -3]
 [ -9  -3 127]]

## Random Data

Let's try just with random data.

In [330]:
def test_random():
    """Test applying with random ternary input."""
    size = 4
    buf_length = 1000
    data_buffer = np.zeros(shape=(size, buf_length))
    cov_unit = CovarianceUnit(size)
    for i in range(0, buf_length):
        data = np.random.randint(low=-1, high=2, size=(size, 1))
        cov_unit.update_cov(data)
        data_buffer[:, i] = data.ravel()
    # Check covariance estimate is within range of actual estimate
    print(data_buffer.mean(axis=1), )
    print(cov_unit.covariance/127, "\n", np.cov(data_buffer))
    assert np.allclose(data_buffer.mean(axis=1), np.zeros(shape=(2,1)), atol=0.1)
    assert np.allclose(cov_unit.covariance/127, np.cov(data_buffer), atol=0.15)

In [331]:
np.random.randint(low=-1, high=2, size=(size, 1))

array([[1],
       [0],
       [0]])

In [332]:
test_random()

[-0.002  0.038 -0.03  -0.048]
[[ 0.63779528 -0.03937008  0.00787402 -0.00787402]
 [-0.03937008  0.69291339 -0.03149606 -0.03937008]
 [ 0.00787402 -0.03149606  0.66929134  0.1023622 ]
 [-0.00787402 -0.03937008  0.1023622   0.5984252 ]] 
 [[ 0.65665265 -0.00392793  0.01095095 -0.02311912]
 [-0.00392793  0.66121722 -0.02488488 -0.00918519]
 [ 0.01095095 -0.02488488  0.66376376  0.02058058]
 [-0.02311912 -0.00918519  0.02058058  0.66636236]]


**If we have zero mean data - we don't generally have negative covariance matrix entries.**

## Half Random Half Same

In [335]:
def test_half_half():
    """Test applying with random ternary input."""
    size = 4
    buf_length = 1000
    data_buffer = np.zeros(shape=(size, buf_length))
    cov_unit = CovarianceUnit(size)
    for i in range(0, buf_length):
        coin_flip = np.random.randint(2)
        if coin_flip:
            # Make all entries the same
            data = rand_same(size=4, negative=True)
        else:
            # Make entries random
            data = np.random.randint(low=-1, high=2, size=(size, 1))
        cov_unit.update_cov(data)
        data_buffer[:, i] = data.ravel()
    # Check covariance estimate is within range of actual estimate
    print(data_buffer.mean(axis=1), )
    print(cov_unit.covariance/127, "\n", np.cov(data_buffer))
    print(cov_unit)
    assert np.allclose(data_buffer.mean(axis=1), np.zeros(shape=(2,1)), atol=0.1)
    assert np.allclose(cov_unit.covariance/127, np.cov(data_buffer), atol=0.15)

In [336]:
test_half_half()

[0.011 0.064 0.073 0.022]
[[0.5984252  0.22834646 0.2992126  0.2519685 ]
 [0.22834646 0.60629921 0.23622047 0.2992126 ]
 [0.2992126  0.23622047 0.61417323 0.27559055]
 [0.2519685  0.2992126  0.27559055 0.66929134]] 
 [[0.64952853 0.3035996  0.31451151 0.31407207]
 [0.3035996  0.63453854 0.27960761 0.29989189]
 [0.31451151 0.27960761 0.6323033  0.3097037 ]
 [0.31407207 0.29989189 0.3097037  0.64816416]]
There are 8 stages to process 1D arrays of length 4.
Data is assumed to have a maximum absolute value of 127.
-------
Counter: [111   7   0   0   0   0   0   0]
Running sum of squares:
[[69 38 30 32]
 [38 68 37 26]
 [30 37 62 47]
 [32 26 47 71]]
[[6 3 2 1]
 [2 5 2 3]
 [1 2 3 2]
 [3 4 2 4]]
Complete covariance estimates:
[[76 29 38 32]
 [29 77 30 38]
 [38 30 78 35]
 [32 38 35 85]]

---------
Current covariance estimate (index: 0):
[[76 29 38 32]
 [29 77 30 38]
 [38 30 78 35]
 [32 38 35 85]]



# Timing Profile

Let's also have a look at where the time goes.

In [338]:
from pyinstrument import Profiler

profiler = Profiler()
profiler.start()
# ----- #
size = 4
buf_length = 127**3+100
cov_unit = CovarianceUnit(size)
for i in range(0, buf_length):
    coin_flip = np.random.randint(2)
    if coin_flip:
        # Make all entries the same
        data = rand_same(size=4, negative=True)
    else:
        # Make entries random
        data = np.random.randint(low=-1, high=2, size=(size, 1))
    cov_unit.update_cov(data)

# ----- #

profiler.stop()

print(profiler.output_text(unicode=True, color=True))


  _     ._   __/__   _ _  _  _ _/_   Recorded: 13:37:13  Samples:  85388
 /_//_/// /_\ / //_// / //_'/ //     Duration: 85.690    CPU time: 86.720
/   _/                      v3.1.3

Program: /home/ben/anaconda3/envs/var_processor/lib/python3.6/site-packages/ipykernel_launcher.py -f /run/user/1000/jupyter/kernel-3cfca925-d0e1-4bf5-86b6-4b965dc6fa83.json

[31m85.689[0m run_code[0m  [2mIPython/core/interactiveshell.py:3254[0m
└─ [31m85.689[0m [48;5;24m[38;5;15m<module>[0m  [2m<ipython-input-338-c9f50eb2ed96>:9[0m
   ├─ [31m66.184[0m [48;5;24m[38;5;15mupdate_cov[0m  [2m<ipython-input-69-ffb4d8176a3d>:28[0m
   │  ├─ [31m56.095[0m [self][0m  [2m[0m
   │  └─ [32m10.089[0m [48;5;24m[38;5;15mrecursive_update[0m  [2m<ipython-input-69-ffb4d8176a3d>:45[0m
   │     └─ [32m9.661[0m [self][0m  [2m[0m
   ├─ [32m14.142[0m [self][0m  [2m[0m
   └─ [32m5.364[0m [48;5;24m[38;5;15mrand_same[0m  [2m<ipython-input-113-ad8eb84f9640>:2[0m




In [339]:
%%timeit
cov_unit.update_cov(data)

28.6 µs ± 1.27 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


# Tests to Add to Covariance

In [337]:
def test_different_sign():
    """Test applying with different sign."""
    size = 2
    buf_length = 1000
    data_buffer = np.zeros(shape=(size, buf_length))
    cov_unit = CovarianceUnit(size)
    for i in range(0, buf_length):
        data = rand_same(negative=True)
        cov_unit.update_cov(data)
        data_buffer[:, i] = data.ravel()
    # Check covariance estimate is within range of actual estimate
    # print(cov_unit.covariance/127, np.cov(data_buffer))
    assert np.allclose(cov_unit.covariance/127, np.cov(data_buffer), atol=0.1)

def test_non_neg():
    """Test applying with binary only input (i.e. positive)."""
    size = 2
    buf_length = 1000
    data_buffer = np.zeros(shape=(size, buf_length))
    cov_unit = CovarianceUnit(size)
    for i in range(0, buf_length):
        data = rand_same(negative=False)
        cov_unit.update_cov(data)
        data_buffer[:, i] = data.ravel()
    # Check covariance estimate is within range of actual estimate
    # print(cov_unit.covariance/254, np.cov(data_buffer))
    assert np.allclose(cov_unit.covariance/254, np.cov(data_buffer), atol=0.1)
    
def test_opposite():
    """Test with opposite signs."""
    size = 3
    buf_length = 1000
    data_buffer = np.zeros(shape=(size, buf_length))
    cov_unit = CovarianceUnit(size)
    for i in range(0, buf_length):
        data = rand_opposite(size=size, negative=True)
        cov_unit.update_cov(data)
        data_buffer[:, i] = data.ravel()
    # Check covariance estimate is within range of actual estimate
    # print(data_buffer.mean(axis=1), "\n" )
    # print(cov_unit.covariance/127, "\n\n", np.cov(data_buffer))
    assert np.allclose(data_buffer.mean(axis=1), np.zeros(shape=(2,1)), atol=0.1)
    assert np.allclose(cov_unit.covariance/127, np.cov(data_buffer), atol=0.2)
    
def test_diff_neg():
    """Test applying with ternary input with different elements."""
    size = 2
    buf_length = 1000
    data_buffer = np.zeros(shape=(size, buf_length))
    cov_unit = CovarianceUnit(size)
    for i in range(0, buf_length):
        data = rand_diff(negative=True)
        cov_unit.update_cov(data)
        data_buffer[:, i] = data.ravel()
    # Check covariance estimate is within range of actual estimate
    # print(data_buffer.mean(axis=1), )
    # print(cov_unit.covariance/127, np.cov(data_buffer))
    assert np.allclose(data_buffer.mean(axis=1), np.zeros(shape=(2,1)), atol=0.1)
    assert np.allclose(cov_unit.covariance/127, np.cov(data_buffer), atol=0.1)

def test_random():
    """Test applying with random ternary input."""
    size = 4
    buf_length = 1000
    data_buffer = np.zeros(shape=(size, buf_length))
    cov_unit = CovarianceUnit(size)
    for i in range(0, buf_length):
        data = np.random.randint(low=-1, high=2, size=(size, 1))
        cov_unit.update_cov(data)
        data_buffer[:, i] = data.ravel()
    # Check covariance estimate is within range of actual estimate
    # print(data_buffer.mean(axis=1), )
    # print(cov_unit.covariance/127, "\n", np.cov(data_buffer))
    assert np.allclose(data_buffer.mean(axis=1), np.zeros(shape=(2,1)), atol=0.1)
    assert np.allclose(cov_unit.covariance/127, np.cov(data_buffer), atol=0.15)    
    
def test_half_half():
    """Test applying with random ternary input."""
    size = 4
    buf_length = 1000
    data_buffer = np.zeros(shape=(size, buf_length))
    cov_unit = CovarianceUnit(size)
    for i in range(0, buf_length):
        coin_flip = np.random.randint(2)
        if coin_flip:
            # Make all entries the same
            data = rand_same(size=4, negative=True)
        else:
            # Make entries random
            data = np.random.randint(low=-1, high=2, size=(size, 1))
        cov_unit.update_cov(data)
        data_buffer[:, i] = data.ravel()
    # Check covariance estimate is within range of actual estimate
    # print(data_buffer.mean(axis=1), )
    # print(cov_unit.covariance/127, "\n", np.cov(data_buffer))
    # print(cov_unit)
    assert np.allclose(data_buffer.mean(axis=1), np.zeros(shape=(2,1)), atol=0.1)
    assert np.allclose(cov_unit.covariance/127, np.cov(data_buffer), atol=0.15)