## Usage test of the wrapper


In [1]:
# imports
import pandas as pd
import numpy as np

import logging

# from wrapper import RustDataModule
from rust_time_series.rust_time_series import (
    ForecastingDataSet,
    ClassificationDataSet,
    SplittingStrategy,
)

In [2]:
# set up logging
logging.basicConfig(level=logging.INFO)

In [3]:
# create dummy 3D array
dummy = np.random.randint(0, 100, (3, 100, 100)).astype(float)

# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(dummy)

# call the pipeline methods
forecasting_data_set.impute()
forecasting_data_set.downsample(2)
forecasting_data_set.split(0.7, 0.2, 0.1)
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[ 1.57351344,  0.94821821,  0.20258419, ..., -1.15853155,
            1.78929532,  0.02330889],
          [-0.89404173,  1.41220072,  0.40138176, ...,  0.35807325,
            0.25463923,  0.76089168],
          [ 0.42914003, -1.17836827,  1.69356595, ...,  0.25226361,
           -1.04129258,  1.46494797]],
  
         [[-0.89404173,  1.41220072,  0.40138176, ...,  0.35807325,
            0.25463923,  0.76089168],
          [ 0.42914003, -1.17836827,  1.69356595, ...,  0.25226361,
           -1.04129258,  1.46494797],
          [-0.32185502,  1.37353551, -0.29440973, ...,  0.21699373,
           -0.3592232 , -1.55243614]],
  
         [[ 0.42914003, -1.17836827,  1.69356595, ...,  0.25226361,
           -1.04129258,  1.46494797],
          [-0.32185502,  1.37353551, -0.29440973, ...,  0.21699373,
           -0.3592232 , -1.55243614],
          [ 1.07285008, -0.83038139, -1.15586586, ..., -1.33488095,
            0.08412189,  1.43142148]],
  
         ...,
  
         [[ 1.609

### Dummy classification dataset temporal split


In [4]:
labels = np.ones(100, dtype=np.float64)

# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy, labels)
print("shape of dummy_classification:", dummy.shape)

# call the pipeline methods
dummy_classification.impute()
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Temporal, 0.7, 0.2, 0.1)
dummy_classification.normalize()
dummy_classification.standardize()
classification_data_set_res = dummy_classification.collect()

print(
    "Shape of the resulting classification dataset:",
    classification_data_set_res[0][0].shape,
)

classification_data_set_res

shape of dummy_classification: (3, 100, 100)
Shape of the resulting classification dataset: (3, 35, 100)


((array([[[ 1.57351344,  0.94821821,  0.20258419, ..., -1.15853155,
            1.78929532,  0.02330889],
          [-0.89404173,  1.41220072,  0.40138176, ...,  0.35807325,
            0.25463923,  0.76089168],
          [ 0.42914003, -1.17836827,  1.69356595, ...,  0.25226361,
           -1.04129258,  1.46494797],
          ...,
          [ 0.67947171, -1.29436389,  1.69356595, ...,  1.23982023,
           -1.07539605,  1.43142148],
          [ 0.39337836, -1.7583464 ,  0.46764761, ...,  1.27509011,
           -0.18870586, -1.55243614],
          [ 0.64371005,  0.48423571,  0.69957811, ..., -1.68757974,
            0.90260514,  1.0291036 ]],
  
         [[-1.10861174, -0.98504222,  0.63331225, ..., -0.0651653 ,
            0.62977739,  1.12968307],
          [ 0.35761669,  0.60023134,  0.50078054, ..., -0.10043518,
           -0.9048787 , -0.54664144],
          [ 1.3947051 , -0.21173805,  0.9315086 , ...,  1.23982023,
            1.48236411, -0.64722091],
          ...,
          [-

### Dummy Classification Data Set Random Split


In [5]:
# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy, labels)

# call the pipeline methods
dummy_classification.impute()
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Random, 0.7, 0.2, 0.1)
dummy_classification.normalize()
dummy_classification.standardize()
classification_data_set_res = dummy_classification.collect()

classification_data_set_res

((array([[[ 1.26998623, -1.49222466, -0.94243882, ...,  0.2929923 ,
            1.22530516, -0.05488762],
          [ 1.48131516, -0.05644929, -1.57790416, ...,  0.82471908,
           -1.26114589,  0.89434537],
          [ 1.51653665,  0.86391954,  0.15197372, ..., -1.30218802,
            1.66211413,  0.08071709],
          ...,
          [-0.24453776, -0.64548534, -0.02454443, ...,  1.31846537,
           -0.68993417, -1.41093475],
          [ 1.02343581,  1.37932608,  0.3637955 , ..., -1.64401238,
           -1.36194796,  0.04681591],
          [ 0.95299284,  1.12162281,  0.61092092, ..., -1.1122856 ,
           -0.99234037,  0.82654301]],
  
         [[-0.91374604,  0.31169824,  0.08136646, ...,  0.86269956,
           -0.11872244, -0.8685159 ],
          [ 0.53033498,  1.30569658,  1.5288153 , ...,  1.69827021,
            0.18368377,  0.58923477],
          [-1.12507497, -0.97681811,  0.61092092, ..., -0.12479302,
            0.51969067,  1.19945598],
          ...,
          [-

### Forecasting Dataset


In [6]:
# Load the dataset
file_path = "../../data/LD2011_2014.txt"
df = pd.read_csv(file_path, sep=";", decimal=",")
# drop first column (date)
df = df.drop(columns=["date"])
# turn pandas DataFrame into numPy array
data = df.to_numpy(dtype=np.float64)

# add a third dimension (with only one entry)
data = np.expand_dims(data, axis=0)

data

array([[[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        ...,
        [2.53807107e+00, 2.06258890e+01, 1.73761946e+00, ...,
         1.53589316e+02, 6.70087977e+02, 6.86486486e+03],
        [1.26903553e+00, 2.13371266e+01, 1.73761946e+00, ...,
         1.46911519e+02, 6.46627566e+02, 6.54054054e+03],
        [2.53807107e+00, 1.99146515e+01, 1.73761946e+00, ...,
         1.31886477e+02, 6.73020528e+02, 7.13513514e+03]]],
      shape=(1, 140256, 370))

In [7]:
# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(data)

# call the pipeline methods
forecasting_data_set.impute()
forecasting_data_set.downsample(2)
forecasting_data_set.split(0.7, 0.2, 0.1)
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         [[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         [[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         ...,
  
         [[-0.399

### Classification Dataset


In [8]:
from aeon.datasets import load_classification


X, y = load_classification("GunPoint")

# switch axis zero and one to match the expected input shape
X = np.swapaxes(X, 0, 1)

# convert y value to float64
y = y.astype(np.float64)

print(type(X))
print(type(y))


print(X.shape)
print(y.shape)
y

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(1, 200, 150)
(200,)


array([2., 2., 1., 1., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 2., 1., 2.,
       2., 1., 2., 1., 1., 1., 2., 1., 2., 1., 1., 2., 1., 1., 2., 2., 1.,
       2., 1., 2., 2., 2., 2., 2., 1., 1., 1., 2., 2., 1., 2., 1., 2., 1.,
       2., 2., 1., 1., 2., 1., 2., 2., 1., 1., 1., 2., 1., 1., 1., 1., 2.,
       2., 2., 1., 2., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2., 2., 1., 1.,
       1., 1., 1., 1., 2., 1., 2., 1., 1., 2., 1., 2., 2., 1., 2., 2., 2.,
       1., 2., 1., 1., 2., 2., 1., 2., 1., 2., 1., 1., 2., 1., 1., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 2., 2., 2., 2.,
       1., 2., 1., 1., 1., 2., 1., 2., 1., 1., 2., 2., 1., 1., 2., 1., 1.,
       2., 2., 1., 1., 2., 1., 2., 2., 2., 1., 2., 1., 1., 2., 1., 1., 2.,
       1., 2., 2., 2., 1., 2., 1., 1., 2., 2., 2., 2., 1., 1., 1., 2., 1.,
       2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 2., 2., 1.])

In [9]:
classification_data_set = ClassificationDataSet(X, y)

classification_data_set.impute()
classification_data_set.downsample(2)
classification_data_set.split(SplittingStrategy.Random, 0.7, 0.2, 0.1)
classification_data_set.normalize()
classification_data_set.standardize()
classification_data_set_res = classification_data_set.collect()

classification_data_set_res

((array([[[-0.67692851, -0.62881733, -0.60167478, ..., -0.28914779,
           -0.24950315, -0.20476391],
          [-0.35485776, -0.34357399, -0.33054608, ..., -0.83324206,
           -0.80195658, -0.78130998],
          [-2.36685766, -2.33543736, -2.2985474 , ..., -3.17899974,
           -3.09639542, -2.95744405],
          ...,
          [-0.16317358, -0.14515038, -0.1298327 , ..., -0.18243891,
           -0.16955337, -0.15236597],
          [ 0.84197106,  0.82937852,  0.82069292, ...,  0.58811754,
            0.60898699,  0.6389576 ],
          [-0.38813549, -0.36833063, -0.35294604, ..., -0.82210416,
           -0.76147209, -0.723506  ]]], shape=(1, 70, 150)),
  array([1., 2., 2., 1., 1., 1., 1., 2., 1., 1., 1., 2., 1., 1., 1., 1., 2.,
         1., 1., 1., 1., 2., 1., 1., 1., 2., 2., 2., 1., 1., 2., 2., 2., 1.,
         1., 2., 1., 1., 2., 2., 1., 1., 2., 2., 1., 1., 1., 2., 2., 1., 1.,
         2., 2., 2., 1., 1., 1., 2., 1., 2., 1., 1., 2., 2., 2., 2., 1., 2.,
         1., 1.]))

Usage of RustDataModule


In [10]:
from wrapper import RustDataModule, DatasetType

Rust Time Series Wrapper Loaded


In [11]:
rust_dm = RustDataModule(data, DatasetType.Forecasting, 3, 3, 3)

In [12]:
rust_dm.setup("")

In [13]:
train_dl = rust_dm.train_dataloader()

train_dl

<torch.utils.data.dataloader.DataLoader at 0x2030a25a060>

In [14]:
# test wether the dataloader works
for batch in train_dl:
    print(batch)
    break

[tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        ...,

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]]), tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ...,