## Usage test of the wrapper


In [7]:
# imports
import pandas as pd
import numpy as np

import logging

# from wrapper import RustDataModule
from rust_time_series.rust_time_series import (
    ForecastingDataSet,
    ClassificationDataSet,
    SplittingStrategy,
)

In [2]:
# set up logging
logging.basicConfig(level=logging.INFO)

In [3]:
# create dummy 3D array
dummy = np.random.randint(0, 100, (3, 50, 50)).astype(float)

# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(dummy)

# call the pipeline methods
forecasting_data_set.impute()
forecasting_data_set.downsample(2)
forecasting_data_set.split(0.7, 0.2, 0.1)
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[-1.78763453, -0.99421297, -0.81886173, ..., -1.5147945 ,
           -0.84258205,  0.22277603],
          [-0.63722915, -1.24464016, -0.71694887, ...,  0.46543369,
           -1.26673442, -1.63591756],
          [-1.04543751, -1.17308954, -0.71694887, ..., -0.24445944,
            1.56094805, -1.39042972]],
  
         [[-0.63722915, -1.24464016, -0.71694887, ...,  0.46543369,
           -1.26673442, -1.63591756],
          [-1.04543751, -1.17308954, -0.71694887, ..., -0.24445944,
            1.56094805, -1.39042972],
          [-1.00832766, -0.99421297,  1.45719211, ...,  1.62368036,
           -1.40811855, -0.47861777]],
  
         [[-1.04543751, -1.17308954, -0.71694887, ..., -0.24445944,
            1.56094805, -1.39042972],
          [-1.00832766, -0.99421297,  1.45719211, ...,  1.62368036,
           -1.40811855, -0.47861777],
          [ 1.14404369,  0.32947359, -1.39636792, ...,  0.72697326,
           -1.69088679, -0.12792087]],
  
         ...,
  
         [[ 0.810

### Dummy classification dataset temporal split


In [4]:
labels = np.ones(50, dtype=np.float64)

# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy, labels)

# call the pipeline methods
dummy_classification.impute()
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Temporal, 0.7, 0.2, 0.1)
dummy_classification.normalize()
dummy_classification.standardize()
res_classification = dummy_classification.collect()

res_classification

((array([[[-1.78763453, -0.99421297, -0.81886173, ..., -1.5147945 ,
           -0.84258205,  0.22277603],
          [-0.63722915, -1.24464016, -0.71694887, ...,  0.46543369,
           -1.26673442, -1.63591756],
          [-1.04543751, -1.17308954, -0.71694887, ..., -0.24445944,
            1.56094805, -1.39042972],
          ...,
          [-1.52786558,  1.18808109,  0.20026685, ...,  1.99730832,
           -0.80723602,  1.55542427],
          [-0.78566856,  0.43679953,  0.87968591, ...,  1.62368036,
            0.28849094,  0.74882139],
          [ 0.69872548, -1.42351672,  0.20026685, ..., -1.21589213,
            0.42987506, -0.7591753 ]],
  
         [[-1.67630498, -1.38774141, -0.88680363, ...,  1.39950358,
           -1.76157885, -0.44354808],
          [-0.19191094,  0.47257484, -0.58106506, ...,  1.47422918,
            0.39452903, -0.68903592],
          [-1.19387692,  0.97342922,  0.03041209, ..., -0.84226417,
            1.24283377,  1.38007582],
          ...,
          [-

### Dummy Classification Data Set Random Split


In [5]:
# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy, labels)

# call the pipeline methods
dummy_classification.impute()
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Random, 0.7, 0.2, 0.1)
dummy_classification.normalize()
dummy_classification.standardize()
res_classification = dummy_classification.collect()

res_classification

((array([[[ 0.96742214,  1.59611517,  0.79187599, ..., -1.41104492,
           -0.61213818,  0.58955321],
          [ 0.61100345, -1.27775646,  0.14309053, ...,  1.43391722,
           -0.64699421, -1.10352268],
          [-1.38494116,  0.72639086, -0.09593569, ...,  1.02749406,
           -0.75156228,  1.33027391],
          ...,
          [-1.4562249 , -1.61808337, -1.01789398, ..., -1.5218876 ,
           -1.37897072, -1.31515717],
          [-0.74338754, -0.02989115, -1.39350662, ...,  0.58412333,
           -1.27440264,  1.18918426],
          [-0.95723875, -1.23994236, -0.53984154, ...,  0.62107089,
           -1.44868277, -0.85661578]],
  
         [[-0.06619204,  0.87764726,  1.71383428, ..., -0.93072664,
           -0.71670625, -0.75079853],
          [-0.52953633, -0.59710265, -0.67642795, ..., -0.19177543,
           -0.47271408, -0.29225715],
          [ 0.2189429 , -0.89961546,  0.14309053, ...,  0.06685749,
            1.61864738, -0.99770544],
          ...,
          [-

### Forecasting Dataset


In [6]:
# Load the dataset
file_path = "../../data/LD2011_2014.txt"
df = pd.read_csv(file_path, sep=";", decimal=",")
# drop first column (date)
df = df.drop(columns=["date"])
# turn pandas DataFrame into numPy array
data = df.to_numpy(dtype=np.float64)

# add a third dimension (with only one entry)
data = np.expand_dims(data, axis=0)

data

array([[[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        ...,
        [2.53807107e+00, 2.06258890e+01, 1.73761946e+00, ...,
         1.53589316e+02, 6.70087977e+02, 6.86486486e+03],
        [1.26903553e+00, 2.13371266e+01, 1.73761946e+00, ...,
         1.46911519e+02, 6.46627566e+02, 6.54054054e+03],
        [2.53807107e+00, 1.99146515e+01, 1.73761946e+00, ...,
         1.31886477e+02, 6.73020528e+02, 7.13513514e+03]]],
      shape=(1, 140256, 370))

In [7]:
# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(data)

# call the pipeline methods
forecasting_data_set.impute()
forecasting_data_set.downsample(2)
forecasting_data_set.split(0.7, 0.2, 0.1)
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711],
          [-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711],
          [-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711]],
  
         [[-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711],
          [-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711],
          [-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711]],
  
         [[-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711],
          [-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711],
          [-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711]],
  
         ...,
  
         [[-0.617

### Classification Dataset


In [8]:
from aeon.datasets import load_classification


X, y = load_classification("GunPoint")

# switch axis zero and one to match the expected input shape
X = np.swapaxes(X, 0, 1)

#convert y value to float64
y = y.astype(np.float64)

print(type(X))
print(type(y))


print(X.shape)
print(y.shape)
y

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(1, 200, 150)
(200,)


array([2., 2., 1., 1., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 2., 1., 2.,
       2., 1., 2., 1., 1., 1., 2., 1., 2., 1., 1., 2., 1., 1., 2., 2., 1.,
       2., 1., 2., 2., 2., 2., 2., 1., 1., 1., 2., 2., 1., 2., 1., 2., 1.,
       2., 2., 1., 1., 2., 1., 2., 2., 1., 1., 1., 2., 1., 1., 1., 1., 2.,
       2., 2., 1., 2., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2., 2., 1., 1.,
       1., 1., 1., 1., 2., 1., 2., 1., 1., 2., 1., 2., 2., 1., 2., 2., 2.,
       1., 2., 1., 1., 2., 2., 1., 2., 1., 2., 1., 1., 2., 1., 1., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 2., 2., 2., 2.,
       1., 2., 1., 1., 1., 2., 1., 2., 1., 1., 2., 2., 1., 1., 2., 1., 1.,
       2., 2., 1., 1., 2., 1., 2., 2., 2., 1., 2., 1., 1., 2., 1., 1., 2.,
       1., 2., 2., 2., 1., 2., 1., 1., 2., 2., 2., 2., 1., 1., 1., 2., 1.,
       2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 2., 2., 1.])

In [9]:
classification_data_set = ClassificationDataSet(X, y)

Usage of RustDataModule


In [10]:
from wrapper import RustDataModule, DatasetType

Rust Time Series Wrapper Loaded


In [11]:
rust_dm = RustDataModule(data, DatasetType.Forecasting, 3, 3, 3)

In [12]:
rust_dm.setup("")

In [13]:
train_dl = rust_dm.train_dataloader()

train_dl

<torch.utils.data.dataloader.DataLoader at 0x20020ba6b10>

In [None]:
# test wether the dataloader works
for batch in train_dl:
    print(batch)
    break

[tensor([[[-0.6171, -1.2364, -0.2617,  ..., -1.0082, -1.2931, -0.6068],
         [-0.6171, -1.2364, -0.2617,  ..., -1.0082, -1.2931, -0.6068],
         [-0.6171, -1.2364, -0.2617,  ..., -1.0082, -1.2931, -0.6068]],

        [[-0.6171, -1.2364, -0.2617,  ..., -1.0082, -1.2931, -0.6068],
         [-0.6171, -1.2364, -0.2617,  ..., -1.0082, -1.2931, -0.6068],
         [-0.6171, -1.2364, -0.2617,  ..., -1.0082, -1.2931, -0.6068]],

        [[-0.6171, -1.2364, -0.2617,  ..., -1.0082, -1.2931, -0.6068],
         [-0.6171, -1.2364, -0.2617,  ..., -1.0082, -1.2931, -0.6068],
         [-0.6171, -1.2364, -0.2617,  ..., -1.0082, -1.2931, -0.6068]],

        ...,

        [[-0.6171, -1.2364, -0.2617,  ..., -1.0082, -1.2931, -0.6068],
         [-0.6171, -1.2364, -0.2617,  ..., -1.0082, -1.2931, -0.6068],
         [-0.6171, -1.2364, -0.2617,  ..., -1.0082, -1.2931, -0.6068]],

        [[-0.6171, -1.2364, -0.2617,  ..., -1.0082, -1.2931, -0.6068],
         [-0.6171, -1.2364, -0.2617,  ..., -1.0082, -1

AttributeError: 'list' object has no attribute 'shape'