## Usage test of the wrapper


In [1]:
# imports
import pandas as pd
import numpy as np

import logging

# from wrapper import RustDataModule
from rust_time_series.rust_time_series import (
    ForecastingDataSet,
    ClassificationDataSet,
    SplittingStrategy,
)

In [2]:
# set up logging
logging.basicConfig(level=logging.INFO)

In [3]:
# create dummy 3D array
dummy = np.random.randint(0, 100, (3, 50, 50)).astype(float)

# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(dummy)

# call the pipeline methods
forecasting_data_set.impute()
forecasting_data_set.downsample(2)
forecasting_data_set.split(0.7, 0.2, 0.1)
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[ 0.93715899,  1.24001362, -0.91770022, ...,  0.83345681,
            0.10349014,  0.08164891],
          [-0.76885664,  0.58780503, -1.66491735, ..., -1.16704738,
            0.66945187, -0.2345733 ],
          [-0.80104562, -0.47632477,  0.28434474, ...,  0.6879656 ,
           -1.36801034,  1.48708094]],
  
         [[-0.76885664,  0.58780503, -1.66491735, ..., -1.16704738,
            0.66945187, -0.2345733 ],
          [-0.80104562, -0.47632477,  0.28434474, ...,  0.6879656 ,
           -1.36801034,  1.48708094],
          [ 0.90497002,  1.65193484,  0.05693083, ..., -0.83969215,
           -1.78304894,  0.50327852]],
  
         [[-0.80104562, -0.47632477,  0.28434474, ...,  0.6879656 ,
           -1.36801034,  1.48708094],
          [ 0.90497002,  1.65193484,  0.05693083, ..., -0.83969215,
           -1.78304894,  0.50327852],
          [ 0.61526925,  1.03405302, -1.50247884, ...,  1.30630326,
           -0.61339471, -1.14810412]],
  
         ...,
  
         [[ 1.548

### Dummy classification dataset temporal split


In [None]:
labels = np.ones(50, dtype=np.float64)

# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy, labels)

# call the pipeline methods
dummy_classification.impute()
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Temporal, 0.7, 0.2, 0.1)
dummy_classification.normalize()
dummy_classification.standardize()
res_classification = dummy_classification.collect()

res_classification

((array([[[ 0.93715899,  1.24001362, -0.91770022, ...,  0.83345681,
            0.10349014,  0.08164891],
          [-0.76885664,  0.58780503, -1.66491735, ..., -1.16704738,
            0.66945187, -0.2345733 ],
          [-0.80104562, -0.47632477,  0.28434474, ...,  0.6879656 ,
           -1.36801034,  1.48708094],
          ...,
          [ 0.74402515,  0.38184443, -0.6253109 , ..., -1.2761658 ,
            0.48079796,  1.27626613],
          [-0.83323459, -0.06440355, -0.72277401, ...,  0.46972878,
            1.68818298,  1.17085873],
          [ 1.00153694, -1.67776164, -1.34004034, ...,  0.43335597,
           -1.70758738, -1.60486953]],
  
         [[-0.41477793, -0.03007679, -1.50247884, ..., -1.38528421,
            1.31087516, -1.00756091],
          [ 0.32556848,  0.34751766, -0.46287239, ..., -1.49440262,
           -1.6698566 ,  1.48708094],
          [-1.4448251 ,  1.06837978,  0.08941853, ..., -0.40321851,
           -0.65112549,  0.50327852],
          ...,
          [-

### Dummy Classification Data Set Random Split


In [13]:
# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy, labels)

# call the pipeline methods
dummy_classification.impute()
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Random, 0.7, 0.2, 0.1)
dummy_classification.normalize()
dummy_classification.standardize()
res_classification = dummy_classification.collect()

res_classification

((array([[[ 1.09176254, -0.28011459,  1.31795316, ...,  1.46047183,
            0.41969127, -0.93157709],
          [ 1.12343844,  1.13525313, -0.19170228, ...,  0.67369713,
            1.79228812, -1.00268985],
          [-1.12555017,  0.686478  ,  0.46877198, ...,  0.60217215,
           -0.26660716,  0.24178337],
          ...,
          [-0.30197687,  0.37578753,  1.00344161, ...,  1.13860945,
            1.17823164,  1.55736934],
          [-1.03052248,  0.72099916,  0.9405393 , ..., -0.18460255,
            0.88926388,  0.0284451 ],
          [ 1.4085215 ,  0.34126636,  1.03489277, ..., -0.36341498,
            1.75616715,  1.37958745]],
  
         [[-0.68208762,  0.23770287,  1.25505085, ...,  0.28030977,
           -1.49472013, -0.22044954],
          [-0.36532867,  1.51498593, -0.94653   , ..., -0.07731509,
            1.32271552, -0.86046434],
          [ 1.12343844,  0.37578753, -0.16025112, ..., -0.79256482,
           -1.38635722,  0.56179077],
          ...,
          [ 

### Forecasting Dataset


In [5]:
# Load the dataset
file_path = "../../data/LD2011_2014.txt"
df = pd.read_csv(file_path, sep=";", decimal=",")
# drop first column (date)
df = df.drop(columns=["date"])
# turn pandas DataFrame into numPy array
data = df.to_numpy(dtype=np.float64)

data

array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [2.53807107e+00, 2.06258890e+01, 1.73761946e+00, ...,
        1.53589316e+02, 6.70087977e+02, 6.86486486e+03],
       [1.26903553e+00, 2.13371266e+01, 1.73761946e+00, ...,
        1.46911519e+02, 6.46627566e+02, 6.54054054e+03],
       [2.53807107e+00, 1.99146515e+01, 1.73761946e+00, ...,
        1.31886477e+02, 6.73020528e+02, 7.13513514e+03]],
      shape=(140256, 370))

In [6]:
# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(dummy)

# call the pipeline methods
forecasting_data_set.impute()
forecasting_data_set.downsample(2)
forecasting_data_set.split(0.7, 0.2, 0.1)
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[ 0.93715899,  1.24001362, -0.91770022, ...,  0.83345681,
            0.10349014,  0.08164891],
          [-0.76885664,  0.58780503, -1.66491735, ..., -1.16704738,
            0.66945187, -0.2345733 ],
          [-0.80104562, -0.47632477,  0.28434474, ...,  0.6879656 ,
           -1.36801034,  1.48708094]],
  
         [[-0.76885664,  0.58780503, -1.66491735, ..., -1.16704738,
            0.66945187, -0.2345733 ],
          [-0.80104562, -0.47632477,  0.28434474, ...,  0.6879656 ,
           -1.36801034,  1.48708094],
          [ 0.90497002,  1.65193484,  0.05693083, ..., -0.83969215,
           -1.78304894,  0.50327852]],
  
         [[-0.80104562, -0.47632477,  0.28434474, ...,  0.6879656 ,
           -1.36801034,  1.48708094],
          [ 0.90497002,  1.65193484,  0.05693083, ..., -0.83969215,
           -1.78304894,  0.50327852],
          [ 0.61526925,  1.03405302, -1.50247884, ...,  1.30630326,
           -0.61339471, -1.14810412]],
  
         ...,
  
         [[ 1.548

### Classification Dataset


In [None]:
from aeon.datasets import load_classification


X, y = load_classification("GunPoint")


X = X.reshape(X.shape[0], -1)



print(type(X))
print(type(y))


print(X.shape)

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(200, 150)


In [None]:
classification_data_set = ClassificationDataSet(X, y)


len(part1), len(part2), len(part3)

TypeError: argument 'data': 'ndarray' object cannot be converted to 'PyArray<T, D>'

Usage of RustDataModule


In [None]:
from wrapper import RustDataModule

Rust Time Series Wrapper Loaded


In [None]:
rust_dm = RustDataModule(data, DatasetType.Forecasting, 3, 3, 3)

In [None]:
rust_dm.setup()

In [None]:
train_dl = rust_dm.train_dataloader()

train_dl

<torch.utils.data.dataloader.DataLoader at 0x140e4c620>

In [None]:
# test wether the dataloader works
for batch in train_dl:
    print(batch)
    print(batch.shape)
    break

tensor([[100.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        ...,
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.]], dtype=torch.float64)
torch.Size([32, 370])
