## Usage test of the wrapper


In [1]:
# imports
import pandas as pd
import numpy as np

import logging

# from wrapper import RustDataModule
from rust_time_series.rust_time_series import (
    ForecastingDataSet,
    ClassificationDataSet,
    SplittingStrategy,
)

In [2]:
# set up logging
logging.basicConfig(level=logging.INFO)

In [3]:
# create dummy 3D array
dummy = np.random.randint(0, 100, (3, 50, 50)).astype(float)

# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(dummy)

# call the pipeline methods
forecasting_data_set.impute()
forecasting_data_set.downsample(2)
forecasting_data_set.split(0.7, 0.2, 0.1)
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[ 0.02365287, -0.27481705, -1.34070565, ...,  0.83686791,
            0.49362353,  0.75813548],
          [ 0.40308434,  0.69542746, -0.95892187, ...,  0.30041412,
           -0.39562474, -0.37075985],
          [-0.04533467,  1.01884229,  0.8458742 , ..., -1.20165648,
           -0.68145454, -1.49965518]],
  
         [[ 0.40308434,  0.69542746, -0.95892187, ...,  0.30041412,
           -0.39562474, -0.37075985],
          [-0.04533467,  1.01884229,  0.8458742 , ..., -1.20165648,
           -0.68145454, -1.49965518],
          [-1.18362909, -0.81384178,  0.18642948, ...,  0.97992225,
           -0.332107  ,  1.40810552]],
  
         [[-0.04533467,  1.01884229,  0.8458742 , ..., -1.20165648,
           -0.68145454, -1.49965518],
          [-1.18362909, -0.81384178,  0.18642948, ...,  0.97992225,
           -0.332107  ,  1.40810552],
          [-0.63172876, -0.13107712,  1.22765798, ..., -0.9155478 ,
            0.39834693, -0.6102225 ]],
  
         ...,
  
         [[-1.011

### Dummy classification dataset temporal split


In [4]:
labels = np.ones(50, dtype=np.float64)

# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy, labels)

# call the pipeline methods
dummy_classification.impute()
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Temporal, 0.7, 0.2, 0.1)
dummy_classification.normalize()
dummy_classification.standardize()
res_classification = dummy_classification.collect()

res_classification

((array([[[ 0.02365287, -0.27481705, -1.34070565, ...,  0.83686791,
            0.49362353,  0.75813548],
          [ 0.40308434,  0.69542746, -0.95892187, ...,  0.30041412,
           -0.39562474, -0.37075985],
          [-0.04533467,  1.01884229,  0.8458742 , ..., -1.20165648,
           -0.68145454, -1.49965518],
          ...,
          [ 0.16162795,  0.73136244, -0.95892187, ...,  0.05006902,
            1.09704199, -0.02867036],
          [ 0.85150336,  0.22827269,  0.11701425, ...,  0.83686791,
           -1.25311413, -1.60228203],
          [-0.56274122, -1.06538665, -1.13245995, ..., -0.98707497,
           -1.284873  ,  0.34762809]],
  
         [[ 1.09295975,  0.44388259,  1.43590368, ..., -0.59367553,
           -0.7132134 , -0.81547619],
          [ 0.9204909 , -0.7779068 ,  1.12353513, ..., -0.77249345,
           -0.4273836 ,  0.45025493],
          [-1.39059171, -0.05920716,  0.0128914 , ..., -0.77249345,
           -1.3801496 , -0.30234195],
          ...,
          [ 

### Dummy Classification Data Set Random Split


In [5]:
# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy, labels)

# call the pipeline methods
dummy_classification.impute()
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Random, 0.7, 0.2, 0.1)
dummy_classification.normalize()
dummy_classification.standardize()
res_classification = dummy_classification.collect()

res_classification

((array([[[ 1.2938327 , -0.30959241, -1.00946149, ..., -1.74189595,
           -1.08688602, -1.2960409 ],
          [-0.63435174,  0.99210294, -1.44413497, ...,  0.64544829,
           -1.54376977, -1.02250463],
          [ 0.0422042 , -0.90766865,  0.72923243, ...,  0.53855228,
            1.16489815, -0.33866395],
          ...,
          [-1.14176869,  0.78101721, -0.4076059 , ...,  0.85924032,
            1.62178189, -1.05669666],
          [-0.12693478,  0.21812192, -0.14011453, ...,  0.28912826,
           -0.85844415, -1.12508073],
          [-0.97262971,  0.0773981 , -1.6447535 , ..., -1.63499994,
            0.96909083, -1.53538513]],
  
         [[-0.70200733, -1.43538298,  1.33108802, ..., -0.67293584,
           -0.66263683, -0.81735242],
          [-1.58153006,  1.55499823, -0.50791516, ..., -1.74189595,
            1.1975327 , -0.64639226],
          [-1.71684124,  0.99210294, -0.00636884, ..., -1.2786799 ,
           -0.10784943,  0.51613689],
          ...,
          [-

### Forecasting Dataset


In [None]:
# Load the dataset
file_path = "../../data/LD2011_2014.txt"
df = pd.read_csv(file_path, sep=";", decimal=",")
# drop first column (date)
df = df.drop(columns=["date"])
# turn pandas DataFrame into numPy array
data = df.to_numpy(dtype=np.float64)

# add a third dimension (with only one entry)
data = np.expand_dims(data, axis=0)

data

array([[[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        ...,
        [2.53807107e+00, 2.06258890e+01, 1.73761946e+00, ...,
         1.53589316e+02, 6.70087977e+02, 6.86486486e+03],
        [1.26903553e+00, 2.13371266e+01, 1.73761946e+00, ...,
         1.46911519e+02, 6.46627566e+02, 6.54054054e+03],
        [2.53807107e+00, 1.99146515e+01, 1.73761946e+00, ...,
         1.31886477e+02, 6.73020528e+02, 7.13513514e+03]]],
      shape=(1, 140256, 370))

In [7]:
# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(data)

# call the pipeline methods
forecasting_data_set.impute()
forecasting_data_set.downsample(2)
forecasting_data_set.split(0.7, 0.2, 0.1)
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711],
          [-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711],
          [-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711]],
  
         [[-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711],
          [-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711],
          [-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711]],
  
         [[-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711],
          [-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711],
          [-0.6170722 , -1.23641006, -0.26170146, ..., -1.00818535,
           -1.29309618, -0.60680711]],
  
         ...,
  
         [[-0.617

### Classification Dataset


In [8]:
from aeon.datasets import load_classification


X, y = load_classification("GunPoint")


X = X.reshape(X.shape[0], -1)


print(type(X))
print(type(y))


print(X.shape)

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(200, 150)


In [None]:
# classification_data_set = ClassificationDataSet(X, y)

TypeError: argument 'data': 'ndarray' object cannot be converted to 'PyArray<T, D>'

Usage of RustDataModule


In [10]:
from wrapper import RustDataModule, DatasetType

Rust Time Series Wrapper Loaded


In [11]:
rust_dm = RustDataModule(data, DatasetType.Forecasting, 3, 3, 3)

In [13]:
rust_dm.setup("")

TypeError: 'int' object is not callable

In [None]:
train_dl = rust_dm.train_dataloader()

train_dl

<torch.utils.data.dataloader.DataLoader at 0x140e4c620>

In [None]:
# test wether the dataloader works
for batch in train_dl:
    print(batch)
    print(batch.shape)
    break

tensor([[100.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        ...,
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.]], dtype=torch.float64)
torch.Size([32, 370])
