## Usage test of the wrapper


In [1]:
# imports
import pandas as pd
import numpy as np

import logging

# from wrapper import RustDataModule
from rust_time_series.rust_time_series import (
    ForecastingDataSet,
    ClassificationDataSet,
    SplittingStrategy,
    ImputeStrategy,
)

In [2]:
# set up logging
logging.basicConfig(level=logging.INFO)

In [3]:
# create dummy 3D array
dummy = np.random.randint(0, 100, (3, 100, 100)).astype(float)
dummy_copy = dummy.copy()

# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(dummy, 0.7, 0.2, 0.1)

# call the pipeline methods
forecasting_data_set.impute(ImputeStrategy.Median)

# check whether the dummy data was changed
assert np.array_equal(
    dummy, dummy_copy
), "Dummy data should not be modified after imputation."

forecasting_data_set.downsample(2)
forecasting_data_set.split()
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[-1.4523315 , -0.48785481, -1.06159872, ..., -1.17429725,
            0.10861427, -1.63015341],
          [ 1.60095719, -0.12053258,  0.15357836, ...,  0.96426035,
           -0.94988711,  0.99732068],
          [-0.50241946, -0.22071137, -0.07632   , ...,  1.43206982,
           -0.94988711,  0.06387594]],
  
         [[ 1.60095719, -0.12053258,  0.15357836, ...,  0.96426035,
           -0.94988711,  0.99732068],
          [-0.50241946, -0.22071137, -0.07632   , ...,  1.43206982,
           -0.94988711,  0.06387594],
          [ 0.27786543, -0.88856997, -0.63464461, ...,  0.89743042,
           -1.15475834, -1.73386949]],
  
         [[-0.50241946, -0.22071137, -0.07632   , ...,  1.43206982,
           -0.94988711,  0.06387594],
          [ 0.27786543, -0.88856997, -0.63464461, ...,  0.89743042,
           -1.15475834, -1.73386949],
          [ 1.4991809 ,  1.38214928,  0.97464396, ...,  0.52986583,
           -0.64258025,  0.8936046 ]],
  
         ...,
  
         [[ 0.176

### Dummy classification dataset temporal split


In [4]:
labels = np.ones(100, dtype=np.float64)

# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy, labels, 0.7, 0.2, 0.1)
print("shape of dummy_classification:", dummy.shape)

# call the pipeline methods
dummy_classification.impute(ImputeStrategy.BackwardFill)
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Random)
dummy_classification.normalize()
dummy_classification.standardize()
classification_data_set_res = dummy_classification.collect()

print(
    "Shape of the resulting classification dataset:",
    classification_data_set_res[0][0].shape,
)

classification_data_set_res

shape of dummy_classification: (3, 100, 100)
Shape of the resulting classification dataset: (3, 35, 100)


((array([[[-1.064346  ,  0.46755086,  1.43446371, ...,  0.89220941,
            1.28663596,  0.75668038],
          [-1.16811232,  0.8647957 , -1.40680271, ..., -1.51847791,
           -0.6921897 ,  1.15311168],
          [-1.41023374, -0.39314628, -0.99639756, ..., -1.217142  ,
            0.07921691, -1.58887148],
          ...,
          [ 1.59898962,  1.46066295,  0.96091931, ...,  0.49042819,
           -0.65865029,  0.82275226],
          [-0.99516845,  1.6923891 , -1.09110644, ..., -1.51847791,
            0.14629575, -1.5227996 ],
          [-1.34105619, -1.38625836, -1.43837233, ...,  1.12658179,
            1.65556956, -1.09333236]],
  
         [[-0.09586032,  1.16272933,  0.26638752, ..., -1.41803261,
            1.48787247,  0.02988966],
          [ 0.38838252,  0.66617328, -1.46994196, ...,  0.52390996,
           -0.1220196 , -1.62190743],
          [-1.09893477, -1.02211726, -0.68070129, ..., -1.58544145,
            0.9847812 ,  0.88882415],
          ...,
          [ 

### Dummy Classification Data Set Random Split


In [5]:
# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy, labels, 0.7, 0.2, 0.1)

# call the pipeline methods
dummy_classification.impute(ImputeStrategy.ForwardFill)
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Random)
dummy_classification.normalize()
dummy_classification.standardize()
classification_data_set_res = dummy_classification.collect()

classification_data_set_res

((array([[[-0.40600296, -1.40955788,  0.87626731, ...,  0.57092945,
            1.59631778,  1.34380448],
          [-0.9517223 , -1.31139058,  1.54299244, ..., -1.34266171,
            0.5940453 , -0.46432014],
          [-0.14919386,  1.33912673,  1.47631992, ...,  1.51032875,
            1.69654503,  1.2123045 ],
          ...,
          [ 1.42376189, -0.46060724,  0.70958603, ..., -1.16869888,
           -1.17663607, -0.1684452 ],
          [ 1.29535734, -1.1477784 ,  1.77634623, ..., -1.34266171,
           -1.3102724 , -1.22044498],
          [-1.08012685,  1.47001648,  0.0428609 , ..., -0.61201781,
           -1.21004515, -0.43144515]],
  
         [[ 1.45586303, -1.50772519,  1.30963864, ..., -0.89035835,
            0.05949999,  1.50817944],
          [-0.40600296,  0.61923314, -1.49060689, ...,  1.19719565,
           -1.51072689,  1.04792954],
          [ 0.68543572,  0.91373507, -1.32392561, ..., -1.06432118,
           -1.14322698, -0.00407024],
          ...,
          [ 

### Forecasting Dataset


In [6]:
# Load the dataset
file_path = "../../data/LD2011_2014.txt"
df = pd.read_csv(file_path, sep=";", decimal=",")
# drop first column (date)
df = df.drop(columns=["date"])
# turn pandas DataFrame into numPy array
data = df.to_numpy(dtype=np.float64)

# add a third dimension (with only one entry)
data = np.expand_dims(data, axis=0)

data

array([[[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        ...,
        [2.53807107e+00, 2.06258890e+01, 1.73761946e+00, ...,
         1.53589316e+02, 6.70087977e+02, 6.86486486e+03],
        [1.26903553e+00, 2.13371266e+01, 1.73761946e+00, ...,
         1.46911519e+02, 6.46627566e+02, 6.54054054e+03],
        [2.53807107e+00, 1.99146515e+01, 1.73761946e+00, ...,
         1.31886477e+02, 6.73020528e+02, 7.13513514e+03]]],
      shape=(1, 140256, 370))

In [7]:
# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(data, 0.7, 0.2, 0.1)

# call the pipeline methods
forecasting_data_set.impute(ImputeStrategy.LeaveNaN)
forecasting_data_set.downsample(2)
forecasting_data_set.split()
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         [[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         [[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         ...,
  
         [[-0.399

### Classification Dataset


In [8]:
from aeon.datasets import load_classification


X, y = load_classification("GunPoint")

# switch axis zero and one to match the expected input shape
X = np.swapaxes(X, 0, 1)

# convert y value to float64
y = y.astype(np.float64)

print(type(X))
print(type(y))


print(X.shape)
print(y.shape)
y

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(1, 200, 150)
(200,)


array([2., 2., 1., 1., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 2., 1., 2.,
       2., 1., 2., 1., 1., 1., 2., 1., 2., 1., 1., 2., 1., 1., 2., 2., 1.,
       2., 1., 2., 2., 2., 2., 2., 1., 1., 1., 2., 2., 1., 2., 1., 2., 1.,
       2., 2., 1., 1., 2., 1., 2., 2., 1., 1., 1., 2., 1., 1., 1., 1., 2.,
       2., 2., 1., 2., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2., 2., 1., 1.,
       1., 1., 1., 1., 2., 1., 2., 1., 1., 2., 1., 2., 2., 1., 2., 2., 2.,
       1., 2., 1., 1., 2., 2., 1., 2., 1., 2., 1., 1., 2., 1., 1., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 2., 2., 2., 2.,
       1., 2., 1., 1., 1., 2., 1., 2., 1., 1., 2., 2., 1., 1., 2., 1., 1.,
       2., 2., 1., 1., 2., 1., 2., 2., 2., 1., 2., 1., 1., 2., 1., 1., 2.,
       1., 2., 2., 2., 1., 2., 1., 1., 2., 2., 2., 2., 1., 1., 1., 2., 1.,
       2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 2., 2., 1.])

In [9]:
classification_data_set = ClassificationDataSet(X, y, 0.7, 0.2, 0.1)

classification_data_set.impute(ImputeStrategy.Median)
classification_data_set.downsample(2)
classification_data_set.split(SplittingStrategy.Random)
classification_data_set.normalize()
classification_data_set.standardize()
classification_data_set_res = classification_data_set.collect()

classification_data_set_res

((array([[[-0.37562665, -0.36344894, -0.34950459, ..., -0.26803564,
           -0.27313944, -0.27633988],
          [ 0.93836203,  0.9395622 ,  0.94596408, ...,  0.84132322,
            0.84038188,  0.84330978],
          [-0.33876405, -0.49262668, -0.62923811, ..., -0.94745464,
           -0.93152078, -0.91671194],
          ...,
          [ 0.6609238 ,  0.67543707,  0.68813149, ...,  1.19416772,
            1.19789569,  1.20672109],
          [ 1.12096884,  1.12701428,  1.11796314, ...,  0.92690822,
            0.94944254,  0.96700729],
          [-1.01855122, -1.0006333 , -0.97900501, ..., -1.10639521,
           -1.11586584, -1.10409107]]], shape=(1, 70, 150)),
  array([2., 1., 1., 2., 1., 1., 1., 1., 2., 2., 2., 2., 1., 1., 1., 2., 2.,
         1., 2., 1., 1., 1., 2., 1., 1., 2., 1., 2., 1., 1., 1., 2., 1., 1.,
         1., 2., 2., 2., 1., 2., 1., 2., 2., 2., 1., 1., 1., 1., 2., 2., 1.,
         1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 2., 1., 2., 2., 2., 2., 1.,
         2., 1.]))

Usage of RustDataModule


In [10]:
from wrapper import RustDataModule, DatasetType

Rust Time Series Wrapper Loaded


In [11]:
rust_dm = RustDataModule(data, DatasetType.Forecasting, 3, 3, 3)

In [12]:
rust_dm.setup("")

In [15]:
test_dl = rust_dm.test_dataloader()

test_dl

<torch.utils.data.dataloader.DataLoader at 0x1c9a0ef6360>

In [16]:
# test wether the dataloader works
for batch in test_dl:
    print(batch)
    break

[tensor([[[1.6497e+01, 3.4851e+01, 1.7376e+00,  ..., 1.9199e+02,
          1.0843e+03, 1.6865e+04],
         [1.6497e+01, 3.2717e+01, 1.7376e+00,  ..., 1.8698e+02,
          1.0902e+03, 1.8324e+04],
         [1.6497e+01, 3.4851e+01, 1.7376e+00,  ..., 1.8865e+02,
          1.0894e+03, 1.7784e+04]],

        [[1.6497e+01, 3.3428e+01, 1.7376e+00,  ..., 1.9032e+02,
          1.0748e+03, 1.7892e+04],
         [1.5228e+01, 3.2006e+01, 1.7376e+00,  ..., 1.8197e+02,
          1.0506e+03, 1.5568e+04],
         [1.6497e+01, 3.2006e+01, 1.7376e+00,  ..., 1.7529e+02,
          1.0139e+03, 1.4703e+04]],

        [[1.6497e+01, 3.4851e+01, 1.7376e+00,  ..., 1.7529e+02,
          1.0103e+03, 1.4595e+04],
         [1.7766e+01, 3.3428e+01, 1.7376e+00,  ..., 1.6861e+02,
          9.8534e+02, 1.4270e+04],
         [1.6497e+01, 3.2717e+01, 1.7376e+00,  ..., 1.6361e+02,
          9.8387e+02, 1.5135e+04]],

        ...,

        [[1.6497e+01, 3.9118e+01, 1.7376e+00,  ..., 2.5376e+02,
          1.1518e+03, 2.