In [None]:
import polars as pl
import numpy as np

In [None]:
df = pl.read_parquet("./data/commodities.parquet")
df_pd = df.to_pandas()

In [None]:
def _into_subchunks(x, subchunk_length, every_n=1):
    """
    Split the time series x into subwindows of length "subchunk_length", starting every "every_n".

    For example, the input data if [0, 1, 2, 3, 4, 5, 6] will be turned into a matrix

        0  2  4
        1  3  5
        2  4  6

    with the settings subchunk_length = 3 and every_n = 2
    """
    len_x = len(x)

    assert subchunk_length > 1
    assert every_n > 0

    # how often can we shift a window of size subchunk_length over the input?
    num_shifts = (len_x - subchunk_length) // every_n + 1
    shift_starts = every_n * np.arange(num_shifts)
    indices = np.arange(subchunk_length)

    indexer = np.expand_dims(indices, axis=0) + np.expand_dims(shift_starts, axis=1)
    return np.asarray(x)[indexer]

def tsfresh_sample_entropy(x):
    """
    Calculate and return sample entropy of x.

    .. rubric:: References

    |  [1] http://en.wikipedia.org/wiki/Sample_Entropy
    |  [2] https://www.ncbi.nlm.nih.gov/pubmed/10843903?dopt=Abstract

    :param x: the time series to calculate the feature of
    :type x: numpy.ndarray

    :return: the value of this feature
    :return type: float
    """
    x = np.array(x)

    # if one of the values is NaN, we can not compute anything meaningful
    if np.isnan(x).any():
        return np.nan

    m = 2  # common value for m, according to wikipedia...
    tolerance = 0.2 * np.std(
        x
    )  # 0.2 is a common value for r, according to wikipedia...

    # Split time series and save all templates of length m
    # Basically we turn [1, 2, 3, 4] into [1, 2], [2, 3], [3, 4]
    xm = _into_subchunks(x, m)

    # Now calculate the maximum distance between each of those pairs
    #   np.abs(xmi - xm).max(axis=1)
    # and check how many are below the tolerance.
    # For speed reasons, we are not doing this in a nested for loop,
    # but with numpy magic.
    # Example:
    # if x = [1, 2, 3]
    # then xm = [[1, 2], [2, 3]]
    # so we will substract xm from [1, 2] => [[0, 0], [-1, -1]]
    # and from [2, 3] => [[1, 1], [0, 0]]
    # taking the abs and max gives us:
    # [0, 1] and [1, 0]
    # as the diagonal elements are always 0, we substract 1.
    B = np.sum([np.sum(np.abs(xmi - xm).max(axis=1) <= tolerance) - 1 for xmi in xm])
    # print(B)
    # Similar for computing A
    xmp1 = _into_subchunks(x, m + 1)

    A = np.sum(
        [np.sum(np.abs(xmi - xmp1).max(axis=1) <= tolerance) - 1 for xmi in xmp1]
    )
    # print(A)

    # Return SampEn
    return -np.log(A / B)

In [None]:
# timeseries[timeseries["time"] == 0]["F_x"]
tsfresh_sample_entropy(df_pd["price"][:500]) # [:200])

In [None]:
%%timeit
tsfresh_sample_entropy(df_pd["price"][:200])

In [None]:
from functime.feature_extraction.tsfresh import sample_entropy, _into_sequential_chunks
import polars as pl

In [None]:
%%timeit
sample_entropy(df["price"])

In [None]:
data = _into_sequential_chunks(df["price"], m = 2)
data

In [None]:
from scipy.spatial import KDTree
tree = KDTree(data)
tree 

In [None]:
test = tree.query_ball_point(data, r = 0.2 * df["price"].std(ddof=0), p = 1, workers=-1, return_length=True) # - 1

In [None]:
test 

In [None]:
import tsfresh.feature_extraction.feature_calculators

In [None]:
def _into_sequential_chunks(x:pl.Series, m:int) -> pl.DataFrame:

    cname = x.name
    n_rows = x.len() - m + 1
    df = x.to_frame().select(
        pl.col(cname)
        , *(pl.col(cname).shift(-i).suffix(str(i)) for i in range(1,m))
    ).slice(0, n_rows)
    return df # .to_numpy()

In [None]:
test = _into_sequential_chunks(df.filter(pl.col("time") == 0)["F_x"], 2)
test1 = test.slice(0, 1)
test2 = test.slice(1, None)
test1 

In [None]:
test2.select(
    pl.when(
        pl.max_horizontal(
            *((pl.col(c) - pl.lit(test1.item(0,i))) for i, c in enumerate(test2.columns))
        ).lt(10.0)
    ).then(1).otherwise(0).sum()
)

In [None]:
test = df.filter(pl.col("time") == 0)["F_x"]
test

In [None]:
df_test = pl.DataFrame({
    "a":range(1000),
    "b":range(1000,2000)
})

df_test.slice(100)

In [None]:
%%timeit
df_test.slice(100)

In [None]:
df_test.head()

In [None]:
a = np.array([[1,2], [3,4]])
np.all(a < 3, axis = 1)