# Speeding Up with Cython

Let's start a new notebook to keep things clear. 

In [56]:
%load_ext Cython

To "reconstruct", we can just take the last estimate from the highest stage.

But what happens at the last stage? When we reach 127 we just reset?

If we have 8 stages, we have an index of 0 to 7. We are okay until index = 7, i.e. index < stage-1

In [3]:
import numpy as np

In [54]:
from src.var_processor.pb_threshold import ternary_pbt

In [55]:
class CovarianceUnit:
    """Variation where the mean is assumed to be 0."""

    def __init__(self, size, stages=8):
        """Initialise.

        Args:
            size: integer setting the 1D size of an input.
            stages: integer setting the number of stages.
        """
        self.size = size
        # Set max value for signed int
        self.max_value = 127
        self.stages = stages
        # Initialise Square Sums
        self.square_sum = np.zeros(
            shape=(size, size, self.stages), dtype=np.int8
        )
        # Initialise Store for last full values
        self.complete = np.zeros(
            shape=(size, size, self.stages), dtype=np.int8
        )
        # Define counter for each stage
        self.stage_counter = np.zeros(self.stages, dtype=np.uint8)
        # Define index for current cov
        self.cov_index = 0

    def update(self, data_array):
        """Add a data array to the covariance data.

        This will involve a recursive check.

        Args:
            data_array is a 1D numpy array of length 'size'.
        """
        # Increment current stage counter
        self.stage_counter[0] += 1
        # Add square of input array
        self.square_sum[:, :, 0] += np.dot(data_array, data_array.T)
        self.recursive_update(0)

    def recursive_update(self, i):
        """Update with recursive method.

        Args:
            i - stage to update - integer.
        """
        # Check i is within range
        if i > (self.stages - 1):
            return
        if i < self.stages:
            # If i is within range check counter
            if self.stage_counter[i] >= self.max_value:
                # Add to completed estimate
                self.complete[:, :, i] = self.square_sum[:, :, i]
                # Reset the previous counter and stage
                self.stage_counter[i] = 0
                self.square_sum[:, :, i] = 0
                # Set cov index as highest available
                if self.cov_index < i:
                    self.cov_index = i
                # If higher stages PBT and add to higher stages
                if i < (self.stages-1):
                    # Apply ternary PBT to square sum
                    thresholded = ternary_pbt(self.complete[:, :, i], self.max_value)
                    # Add to next square sum
                    self.square_sum[:, :, i+1] += thresholded
                    # Increment next stage counter
                    self.stage_counter[i+1] += 1
                    self.recursive_update(i+1)
                
    @property
    def covariance(self):
        """Compute covariance when requested."""
        # Return highest non_zero self.complete[:, :, i]
        return self.complete[:, :, self.cov_index]

## Cython Definition

Start with this tutorial: https://cython.readthedocs.io/en/latest/src/userguide/numpy_tutorial.html#numpy-tutorial

Here for Numpy interactions - https://cython.readthedocs.io/en/latest/src/userguide/numpy_tutorial.html#efficient-indexing-with-memoryviews

Is it worth writing in Cython rather than C or C++? The default ints are 32-bit so our 8-bit restrictions here are no use.

For an actual implementation:
* We could fix the size at 4.
* We could fix the stages at 8.
* We could fix the max_value at 127.

In [62]:
%%cython

cdef unsigned int size
size = -1
print(size)

4294967295


In [63]:
%%cython

cdef int size
size = -1
print(size)

-1


So Cython unsigned integers are by default int32.

In [60]:
size

NameError: name 'size' is not defined

In [55]:
%%cython 

cdef class CovarianceUnit:
    """Variation where the mean is assumed to be 0."""
    cdef unsigned int8_t size, max_value, stages, cov_index
    

    cpdef __init__(self, size, stages=8):
        """Initialise.

        Args:
            size: integer setting the 1D size of an input.
            stages: integer setting the number of stages.
        """
        self.size = size
        # Set max value for signed int
        self.max_value = 127
        self.stages = stages
        # Initialise Square Sums
        self.square_sum = np.zeros(
            shape=(size, size, self.stages), dtype=np.int8
        )
        # Initialise Store for last full values
        self.complete = np.zeros(
            shape=(size, size, self.stages), dtype=np.int8
        )
        # Define counter for each stage
        self.stage_counter = np.zeros(self.stages, dtype=np.uint8)
        # Define index for current cov
        self.cov_index = 0

    cdef update(self, data_array):
        """Add a data array to the covariance data.

        This will involve a recursive check.

        Args:
            data_array is a 1D numpy array of length 'size'.
        """
        # Increment current stage counter
        self.stage_counter[0] += 1
        # Add square of input array
        self.square_sum[:, :, 0] += np.dot(data_array, data_array.T)
        self.recursive_update(0)

    cdef recursive_update(self, i):
        """Update with recursive method.

        Args:
            i - stage to update - integer.
        """
        # Check i is within range
        if i > (self.stages - 1):
            return
        if i < self.stages:
            # If i is within range check counter
            if self.stage_counter[i] >= self.max_value:
                # Add to completed estimate
                self.complete[:, :, i] = self.square_sum[:, :, i]
                # Reset the previous counter and stage
                self.stage_counter[i] = 0
                self.square_sum[:, :, i] = 0
                # Set cov index as highest available
                if self.cov_index < i:
                    self.cov_index = i
                # If higher stages PBT and add to higher stages
                if i < (self.stages-1):
                    # Apply ternary PBT to square sum
                    thresholded = ternary_pbt(self.complete[:, :, i], self.max_value)
                    # Add to next square sum
                    self.square_sum[:, :, i+1] += thresholded
                    # Increment next stage counter
                    self.stage_counter[i+1] += 1
                    self.recursive_update(i+1)
                
    @property
    cdef covariance(self):
        """Compute covariance when requested."""
        # Return highest non_zero self.complete[:, :, i]
        return self.complete[:, :, self.cov_index]