In [None]:
import polars as pl
import numpy as np

In [None]:
df = pl.DataFrame({
    "a":range(12)
})
df

In [None]:
x = pl.col("a")
m = 2
max_shift = -m + 1
n = x.len() - m + 1

df.select(
    pl.concat_list(x, *(x.shift(-i) for i in range(1,m)))\
    .filter(x.shift(max_shift).is_not_null())
)




In [1]:
from tsfresh.examples.robot_execution_failures import download_robot_execution_failures, load_robot_execution_failures
import numpy as np 
import polars as pl


download_robot_execution_failures()
timeseries, y = load_robot_execution_failures()

In [2]:
def _into_subchunks(x, subchunk_length, every_n=1):
    """
    Split the time series x into subwindows of length "subchunk_length", starting every "every_n".

    For example, the input data if [0, 1, 2, 3, 4, 5, 6] will be turned into a matrix

        0  2  4
        1  3  5
        2  4  6

    with the settings subchunk_length = 3 and every_n = 2
    """
    len_x = len(x)

    assert subchunk_length > 1
    assert every_n > 0

    # how often can we shift a window of size subchunk_length over the input?
    num_shifts = (len_x - subchunk_length) // every_n + 1
    shift_starts = every_n * np.arange(num_shifts)
    indices = np.arange(subchunk_length)

    indexer = np.expand_dims(indices, axis=0) + np.expand_dims(shift_starts, axis=1)
    return np.asarray(x)[indexer]

def tsfresh_sample_entropy(x):
    """
    Calculate and return sample entropy of x.

    .. rubric:: References

    |  [1] http://en.wikipedia.org/wiki/Sample_Entropy
    |  [2] https://www.ncbi.nlm.nih.gov/pubmed/10843903?dopt=Abstract

    :param x: the time series to calculate the feature of
    :type x: numpy.ndarray

    :return: the value of this feature
    :return type: float
    """
    x = np.array(x)

    # if one of the values is NaN, we can not compute anything meaningful
    if np.isnan(x).any():
        return np.nan

    m = 2  # common value for m, according to wikipedia...
    tolerance = 0.2 * np.std(
        x
    )  # 0.2 is a common value for r, according to wikipedia...

    # Split time series and save all templates of length m
    # Basically we turn [1, 2, 3, 4] into [1, 2], [2, 3], [3, 4]
    xm = _into_subchunks(x, m)

    # Now calculate the maximum distance between each of those pairs
    #   np.abs(xmi - xm).max(axis=1)
    # and check how many are below the tolerance.
    # For speed reasons, we are not doing this in a nested for loop,
    # but with numpy magic.
    # Example:
    # if x = [1, 2, 3]
    # then xm = [[1, 2], [2, 3]]
    # so we will substract xm from [1, 2] => [[0, 0], [-1, -1]]
    # and from [2, 3] => [[1, 1], [0, 0]]
    # taking the abs and max gives us:
    # [0, 1] and [1, 0]
    # as the diagonal elements are always 0, we substract 1.
    B = np.sum([np.sum(np.abs(xmi - xm).max(axis=1) <= tolerance) - 1 for xmi in xm])
    # print(B)
    # Similar for computing A
    xmp1 = _into_subchunks(x, m + 1)

    A = np.sum(
        [np.sum(np.abs(xmi - xmp1).max(axis=1) <= tolerance) - 1 for xmi in xmp1]
    )
    # print(A)

    # Return SampEn
    return -np.log(A / B)

In [3]:
%%timeit
_into_subchunks(timeseries[timeseries["time"] == 0]["F_x"], 2)

195 µs ± 2.1 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [None]:
%%timeit
tsfresh_sample_entropy(timeseries[timeseries["time"] == 0]["F_x"])

In [4]:
from functime.feature_extraction.tsfresh import sample_entropy, _into_sequential_chunks
import polars as pl

In [6]:
df = pl.from_pandas(timeseries)

In [7]:
%%timeit
_into_sequential_chunks(df.filter(pl.col("time") == 0)["F_x"], 2)

256 µs ± 6.86 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [None]:
%%timeit
sample_entropy(df.filter(pl.col("time") == 0)["F_x"])