## Usage test of the wrapper


In [17]:
# imports
import pandas as pd
import numpy as np

import logging

# from wrapper import RustDataModule
from rust_time_series import (
    ForecastingDataSet,
    ClassificationDataSet,
    SplittingStrategy,
    ImputeStrategy,
)

In [18]:
# set up logging
logging.basicConfig(level=logging.INFO)

In [19]:
# create dummy 3D array
dummy_data = np.random.randint(0, 100, (3, 100, 100)).astype(float)
dummy_copy = dummy_data.copy()

# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(dummy_data, 0.7, 0.2, 0.1)

# call the pipeline methods
forecasting_data_set.impute(ImputeStrategy.Median)

# check whether the dummy data was changed
assert np.array_equal(
    dummy_data, dummy_copy
), "Dummy data should not be modified after imputation."

forecasting_data_set.downsample(2)
forecasting_data_set.split()
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[-1.42095679,  0.63466976,  1.66383405, ..., -0.96151245,
           -0.19334004, -1.66638484],
          [-0.07680848,  1.19852231,  0.64218156, ..., -0.73086243,
            1.40171528,  1.03794913],
          [ 0.05760636,  0.45846583,  0.84651206, ..., -0.59906243,
            0.82169516,  1.40979505]],
  
         [[-0.07680848,  1.19852231,  0.64218156, ..., -0.73086243,
            1.40171528,  1.03794913],
          [ 0.05760636,  0.45846583,  0.84651206, ..., -0.59906243,
            0.82169516,  1.40979505],
          [-0.74888263,  0.63466976,  0.13135532, ..., -1.02741246,
            1.43796653,  1.10555748]],
  
         [[ 0.05760636,  0.45846583,  0.84651206, ..., -0.59906243,
            0.82169516,  1.40979505],
          [-0.74888263,  0.63466976,  0.13135532, ..., -1.02741246,
            1.43796653,  1.10555748],
          [-0.98410859,  1.58617095,  0.98273239, ..., -0.63201243,
           -1.42588278,  0.49708234]],
  
         ...,
  
         [[ 0.091

### Dummy classification dataset temporal split


In [20]:
dummy_labels = np.ones(3, dtype=np.float64)

# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy_data, dummy_labels, 0.7, 0.2, 0.1)
print("shape of dummy_classification:", dummy_data.shape)

# call the pipeline methods
dummy_classification.impute(ImputeStrategy.BackwardFill)
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Random)
dummy_classification.normalize()
dummy_classification.standardize()
classification_data_set_res = dummy_classification.collect()

print(
    "Shape of the resulting classification dataset:",
    classification_data_set_res[0][0].shape,
)

classification_data_set_res

shape of dummy_classification: (3, 100, 100)
Shape of the resulting classification dataset: (2, 50, 100)


((array([[[-1.48567664,  0.63353138,  1.65741643, ..., -0.87173254,
           -0.27385818, -1.57382145],
          [-0.10941296,  1.20076709,  0.61414129, ..., -0.62215676,
            1.32637555,  1.12801795],
          [ 0.02821341,  0.45627022,  0.82279632, ..., -0.47954203,
            0.74447237,  1.49952087],
          ...,
          [-1.07279754, -0.25277441, -0.18570297, ..., -0.72911781,
            0.27167604,  0.14860117],
          [ 0.16583977,  1.48438494, -0.32480632, ..., -1.44219147,
            0.0898313 , -0.52685868],
          [-1.34805028, -0.78455789,  1.03145135, ...,  0.05526321,
           -0.60117872, -1.70891342]],
  
         [[-1.03839095, -0.64274896,  0.09250373, ...,  1.08922001,
           -0.23748923,  1.3644289 ],
          [ 0.88837821,  0.1017479 , -0.32480632, ...,  0.41180004,
           -0.20112028, -0.05403679],
          [-1.6577096 ,  0.34991353,  1.51831308, ...,  1.55271788,
            0.12620025,  1.46574787],
          ...,
          [-

### Dummy Classification Data Set Random Split


In [21]:
# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy_data, dummy_labels, 0.7, 0.2, 0.1)

# call the pipeline methods
dummy_classification.impute(ImputeStrategy.ForwardFill)
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Random)
dummy_classification.normalize()
dummy_classification.standardize()
classification_data_set_res = dummy_classification.collect()

classification_data_set_res

((array([[[ 1.10566312,  1.68705189,  0.42706832, ...,  1.01149555,
           -1.18619948, -0.07194213],
          [ 0.34858507,  0.53939754,  1.18294144, ..., -0.91609609,
           -1.25526495, -1.09481595],
          [-0.67182448, -1.23425008, -0.63802564, ...,  0.61944301,
           -0.25381561,  0.57587796],
          ...,
          [-1.26432034, -0.43436978, -0.98160433, ..., -1.34081968,
           -1.39339589, -1.33348651],
          [-0.07932861, -1.3385823 , -1.53133024, ..., -1.14479341,
           -0.35741382, -1.19710333],
          [-1.52765184,  1.44361006,  0.77064701, ..., -1.47150386,
           -1.42792863,  0.50768638]],
  
         [[-1.06682172, -0.5734794 ,  0.04913175, ...,  0.78279823,
            0.02244628,  1.42827282],
          [ 0.77649875,  0.15684609, -0.36316268, ...,  0.16204838,
            0.05697901, -0.00375054],
          [-1.65931759,  0.40028793,  1.4578044 , ...,  1.20752181,
            0.36777364,  1.5305602 ],
          ...,
          [-

### Forecasting Dataset


In [22]:
# Load the dataset
file_path = "../../data/LD2011_2014.txt"
df = pd.read_csv(file_path, sep=";", decimal=",")
# drop first column (date)
df = df.drop(columns=["date"])
# turn pandas DataFrame into numPy array
data = df.to_numpy(dtype=np.float64)

# add a third dimension (with only one entry)
data = np.expand_dims(data, axis=0)

data

array([[[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        ...,
        [2.53807107e+00, 2.06258890e+01, 1.73761946e+00, ...,
         1.53589316e+02, 6.70087977e+02, 6.86486486e+03],
        [1.26903553e+00, 2.13371266e+01, 1.73761946e+00, ...,
         1.46911519e+02, 6.46627566e+02, 6.54054054e+03],
        [2.53807107e+00, 1.99146515e+01, 1.73761946e+00, ...,
         1.31886477e+02, 6.73020528e+02, 7.13513514e+03]]],
      shape=(1, 140256, 370))

In [23]:
# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(data, 0.7, 0.2, 0.1)

# call the pipeline methods
forecasting_data_set.impute(ImputeStrategy.LeaveNaN)
forecasting_data_set.downsample(2)
forecasting_data_set.split()
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         [[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         [[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         ...,
  
         [[-0.399

### Classification Dataset


In [24]:
from aeon.datasets import load_classification


X, y = load_classification("GunPoint")

# convert y value to float64
y = y.astype(np.float64)

print(type(X))
print(type(y))


print(X.shape)
print(y.shape)
y

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(200, 1, 150)
(200,)


array([2., 2., 1., 1., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 2., 1., 2.,
       2., 1., 2., 1., 1., 1., 2., 1., 2., 1., 1., 2., 1., 1., 2., 2., 1.,
       2., 1., 2., 2., 2., 2., 2., 1., 1., 1., 2., 2., 1., 2., 1., 2., 1.,
       2., 2., 1., 1., 2., 1., 2., 2., 1., 1., 1., 2., 1., 1., 1., 1., 2.,
       2., 2., 1., 2., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2., 2., 1., 1.,
       1., 1., 1., 1., 2., 1., 2., 1., 1., 2., 1., 2., 2., 1., 2., 2., 2.,
       1., 2., 1., 1., 2., 2., 1., 2., 1., 2., 1., 1., 2., 1., 1., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 2., 2., 2., 2.,
       1., 2., 1., 1., 1., 2., 1., 2., 1., 1., 2., 2., 1., 1., 2., 1., 1.,
       2., 2., 1., 1., 2., 1., 2., 2., 2., 1., 2., 1., 1., 2., 1., 1., 2.,
       1., 2., 2., 2., 1., 2., 1., 1., 2., 2., 2., 2., 1., 1., 1., 2., 1.,
       2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 2., 2., 1.])

In [25]:
classification_data_set = ClassificationDataSet(X, y, 0.7, 0.2, 0.1)

classification_data_set.impute(ImputeStrategy.Median)
classification_data_set.downsample(2)
classification_data_set.split(SplittingStrategy.Random)
classification_data_set.normalize()
classification_data_set.standardize()
classification_data_set_res = classification_data_set.collect()

classification_data_set_res

((array([[[ 0.67374004,  0.65732693,  0.64427143, ...,  0.70209976,
            0.70730792,  0.7037743 ]],
  
         [[-0.35723327, -0.35045691, -0.35095462, ..., -0.3793326 ,
           -0.35819962, -0.34366399]],
  
         [[ 0.78641614,  0.81357108,  0.81044308, ...,  0.35455497,
            0.35855005,  0.37435543]],
  
         ...,
  
         [[ 0.62504498,  0.63453206,  0.63087104, ...,  0.64147779,
            0.63454247,  0.62279542]],
  
         [[ 0.72483646,  0.71110738,  0.70763967, ...,  0.58269432,
            0.59514201,  0.60190158]],
  
         [[-0.7008857 , -0.67309694, -0.687581  , ..., -1.0840553 ,
           -1.00790783, -0.96220454]]], shape=(140, 1, 150)),
  array([1., 2., 1., 1., 2., 2., 1., 1., 2., 2., 2., 1., 1., 2., 1., 2., 2.,
         2., 2., 2., 1., 1., 1., 2., 1., 2., 2., 2., 1., 2., 1., 1., 1., 1.,
         1., 1., 2., 2., 1., 2., 1., 2., 2., 1., 2., 2., 1., 2., 1., 2., 2.,
         2., 1., 2., 1., 2., 2., 1., 2., 1., 1., 1., 2., 1., 1., 1., 1.,

Usage of RustDataModule


In [27]:
from wrapper import RustDataModule, DatasetType

Rust Time Series Wrapper Loaded


In [28]:
rust_dm = RustDataModule(data, DatasetType.Forecasting, 3, 3, 3)

In [29]:
rust_dm.setup("")

In [30]:
test_dl = rust_dm.test_dataloader()

test_dl

<torch.utils.data.dataloader.DataLoader at 0x227cc8efbf0>

In [31]:
# test wether the dataloader works
for batch in test_dl:
    print(batch)
    break

[tensor([[[1.6497e+01, 3.4851e+01, 1.7376e+00,  ..., 1.9199e+02,
          1.0843e+03, 1.6865e+04],
         [1.6497e+01, 3.2717e+01, 1.7376e+00,  ..., 1.8698e+02,
          1.0902e+03, 1.8324e+04],
         [1.6497e+01, 3.4851e+01, 1.7376e+00,  ..., 1.8865e+02,
          1.0894e+03, 1.7784e+04]],

        [[1.6497e+01, 3.3428e+01, 1.7376e+00,  ..., 1.9032e+02,
          1.0748e+03, 1.7892e+04],
         [1.5228e+01, 3.2006e+01, 1.7376e+00,  ..., 1.8197e+02,
          1.0506e+03, 1.5568e+04],
         [1.6497e+01, 3.2006e+01, 1.7376e+00,  ..., 1.7529e+02,
          1.0139e+03, 1.4703e+04]],

        [[1.6497e+01, 3.4851e+01, 1.7376e+00,  ..., 1.7529e+02,
          1.0103e+03, 1.4595e+04],
         [1.7766e+01, 3.3428e+01, 1.7376e+00,  ..., 1.6861e+02,
          9.8534e+02, 1.4270e+04],
         [1.6497e+01, 3.2717e+01, 1.7376e+00,  ..., 1.6361e+02,
          9.8387e+02, 1.5135e+04]],

        ...,

        [[1.6497e+01, 3.9118e+01, 1.7376e+00,  ..., 2.5376e+02,
          1.1518e+03, 2.