## Usage test of the wrapper


In [1]:
# imports
import pandas as pd
import numpy as np

import logging

# from wrapper import RustDataModule
from rust_time_series import (
    ForecastingDataSet,
    ClassificationDataSet,
    SplittingStrategy,
    ImputeStrategy,
)

In [7]:
# set up logging
logging.basicConfig(level=logging.INFO)

In [8]:
# create dummy 3D array
dummy_data = np.random.randint(0, 100, (3, 100, 100)).astype(float)
dummy_copy = dummy_data.copy()

# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(dummy_data, 0.7, 0.2, 0.1)

# call the pipeline methods
forecasting_data_set.impute(ImputeStrategy.Median)

# check whether the dummy data was changed
assert np.array_equal(
    dummy_data, dummy_copy
), "Dummy data should not be modified after imputation."

forecasting_data_set.downsample(2)
forecasting_data_set.split()
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[-1.21809114, -1.15232148,  1.50467603, ...,  0.47357761,
            0.02990147,  0.48110325],
          [-1.64872942,  0.10492837,  0.03362405, ..., -0.13239804,
            0.47354825,  1.58937719],
          [-0.32368856, -0.85455178, -1.03288864, ..., -0.8809562 ,
            1.46322184, -1.41368768]],
  
         [[-1.64872942,  0.10492837,  0.03362405, ..., -0.13239804,
            0.47354825,  1.58937719],
          [-0.32368856, -0.85455178, -1.03288864, ..., -0.8809562 ,
            1.46322184, -1.41368768],
          [ 0.93510027,  1.06440852, -1.25354644, ..., -0.09675241,
           -1.64230563,  0.01634321]],
  
         [[-0.32368856, -0.85455178, -1.03288864, ..., -0.8809562 ,
            1.46322184, -1.41368768],
          [ 0.93510027,  1.06440852, -1.25354644, ..., -0.09675241,
           -1.64230563,  0.01634321],
          [ 0.50446199,  0.56812568, -1.95229613, ..., -0.56014556,
            1.32671514, -0.37691529]],
  
         ...,
  
         [[ 0.935

### Dummy classification dataset temporal split


In [4]:
dummy_labels = np.ones(3, dtype=np.float64)

# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy_data, dummy_labels, 0.7, 0.2, 0.1)
print("shape of dummy_classification:", dummy_data.shape)

# call the pipeline methods
dummy_classification.impute(ImputeStrategy.BackwardFill)
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Random)
dummy_classification.normalize()
dummy_classification.standardize()
classification_data_set_res = dummy_classification.collect()

print(
    "Shape of the resulting classification dataset:",
    classification_data_set_res[0][0].shape,
)

classification_data_set_res

shape of dummy_classification: (3, 100, 100)
Shape of the resulting classification dataset: (2, 50, 100)


((array([[[-0.02901875,  0.0789744 , -0.50602246, ..., -1.55541072,
           -1.11633409,  0.60237172],
          [-1.33323244, -0.47580447,  1.2894229 , ..., -1.13284915,
            1.12473811, -0.51313146],
          [-1.23541642, -1.22638766, -0.13960504, ...,  0.28849431,
            0.91463759, -0.1892757 ],
          ...,
          [-1.13760039,  0.14424251, -0.94572336, ..., -0.05723789,
           -1.7116189 , -0.29722762],
          [-0.4528882 ,  1.25380026, -0.72587291, ..., -0.90236102,
            1.47490565,  1.35803516],
          [ 1.79688041, -0.70424283, -1.71519995, ...,  1.67142308,
           -1.60656864, -1.19682696]],
  
         [[ 1.34040562, -0.24736611,  0.22681238, ..., -0.44138477,
            0.77457058,  0.96221145],
          [ 0.75350946,  0.66638733, -0.90908162, ..., -0.86394633,
            0.70453707,  0.56638774],
          [ 0.42745604,  0.20951061, -0.46938072, ..., -0.0188232 ,
            0.03921876,  0.78229159],
          ...,
          [ 

### Dummy Classification Data Set Random Split


In [5]:
# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy_data, dummy_labels, 0.7, 0.2, 0.1)

# call the pipeline methods
dummy_classification.impute(ImputeStrategy.ForwardFill)
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Random)
dummy_classification.normalize()
dummy_classification.standardize()
classification_data_set_res = dummy_classification.collect()

classification_data_set_res

((array([[[-0.02901875,  0.0789744 , -0.50602246, ..., -1.55541072,
           -1.11633409,  0.60237172],
          [-1.33323244, -0.47580447,  1.2894229 , ..., -1.13284915,
            1.12473811, -0.51313146],
          [-1.23541642, -1.22638766, -0.13960504, ...,  0.28849431,
            0.91463759, -0.1892757 ],
          ...,
          [-1.13760039,  0.14424251, -0.94572336, ..., -0.05723789,
           -1.7116189 , -0.29722762],
          [-0.4528882 ,  1.25380026, -0.72587291, ..., -0.90236102,
            1.47490565,  1.35803516],
          [ 1.79688041, -0.70424283, -1.71519995, ...,  1.67142308,
           -1.60656864, -1.19682696]],
  
         [[ 1.34040562, -0.24736611,  0.22681238, ..., -0.44138477,
            0.77457058,  0.96221145],
          [ 0.75350946,  0.66638733, -0.90908162, ..., -0.86394633,
            0.70453707,  0.56638774],
          [ 0.42745604,  0.20951061, -0.46938072, ..., -0.0188232 ,
            0.03921876,  0.78229159],
          ...,
          [ 

### Forecasting Dataset


In [6]:
# Load the dataset
file_path = "../../data/LD2011_2014.txt"
df = pd.read_csv(file_path, sep=";", decimal=",")
# drop first column (date)
df = df.drop(columns=["date"])
# turn pandas DataFrame into numPy array
data = df.to_numpy(dtype=np.float64)

# add a third dimension (with only one entry)
data = np.expand_dims(data, axis=0)

data

FileNotFoundError: [Errno 2] No such file or directory: '../../data/LD2011_2014.txt'

In [None]:
# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(data, 0.7, 0.2, 0.1)

# call the pipeline methods
forecasting_data_set.impute(ImputeStrategy.LeaveNaN)
forecasting_data_set.downsample(2)
forecasting_data_set.split()
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         [[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         [[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         ...,
  
         [[-0.399

### Classification Dataset


In [None]:
from aeon.datasets import load_classification


X, y = load_classification("GunPoint")

# convert y value to float64
y = y.astype(np.float64)

print(type(X))
print(type(y))


print(X.shape)
print(y.shape)
y

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(200, 1, 150)
(200,)


array([2., 2., 1., 1., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 2., 1., 2.,
       2., 1., 2., 1., 1., 1., 2., 1., 2., 1., 1., 2., 1., 1., 2., 2., 1.,
       2., 1., 2., 2., 2., 2., 2., 1., 1., 1., 2., 2., 1., 2., 1., 2., 1.,
       2., 2., 1., 1., 2., 1., 2., 2., 1., 1., 1., 2., 1., 1., 1., 1., 2.,
       2., 2., 1., 2., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2., 2., 1., 1.,
       1., 1., 1., 1., 2., 1., 2., 1., 1., 2., 1., 2., 2., 1., 2., 2., 2.,
       1., 2., 1., 1., 2., 2., 1., 2., 1., 2., 1., 1., 2., 1., 1., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 2., 2., 2., 2.,
       1., 2., 1., 1., 1., 2., 1., 2., 1., 1., 2., 2., 1., 1., 2., 1., 1.,
       2., 2., 1., 1., 2., 1., 2., 2., 2., 1., 2., 1., 1., 2., 1., 1., 2.,
       1., 2., 2., 2., 1., 2., 1., 1., 2., 2., 2., 2., 1., 1., 1., 2., 1.,
       2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 2., 2., 1.])

In [None]:
classification_data_set = ClassificationDataSet(X, y, 0.7, 0.2, 0.1)

classification_data_set.impute(ImputeStrategy.Median)
classification_data_set.downsample(2)
classification_data_set.split(SplittingStrategy.Random)
classification_data_set.normalize()
classification_data_set.standardize()
classification_data_set_res = classification_data_set.collect()

classification_data_set_res

((array([[[ 0.67374004,  0.65732693,  0.64427143, ...,  0.70209976,
            0.70730792,  0.7037743 ]],
  
         [[-0.35723327, -0.35045691, -0.35095462, ..., -0.3793326 ,
           -0.35819962, -0.34366399]],
  
         [[ 0.78641614,  0.81357108,  0.81044308, ...,  0.35455497,
            0.35855005,  0.37435543]],
  
         ...,
  
         [[ 0.62504498,  0.63453206,  0.63087104, ...,  0.64147779,
            0.63454247,  0.62279542]],
  
         [[ 0.72483646,  0.71110738,  0.70763967, ...,  0.58269432,
            0.59514201,  0.60190158]],
  
         [[-0.7008857 , -0.67309694, -0.687581  , ..., -1.0840553 ,
           -1.00790783, -0.96220454]]], shape=(140, 1, 150)),
  array([1., 2., 1., 1., 2., 2., 1., 1., 2., 2., 2., 1., 1., 2., 1., 2., 2.,
         2., 2., 2., 1., 1., 1., 2., 1., 2., 2., 2., 1., 2., 1., 1., 1., 1.,
         1., 1., 2., 2., 1., 2., 1., 2., 2., 1., 2., 2., 1., 2., 1., 2., 2.,
         2., 1., 2., 1., 2., 2., 1., 2., 1., 1., 1., 2., 1., 1., 1., 1.,

## Usage of Wrapper


In [2]:
from wrapper import RustClassificationDataSet, RustForecastingDataSet
import torch

In [3]:
# create dummy torch tensor
tensor_data = torch.randn((3, 100, 100), dtype=torch.float64)
tensor_labels = torch.randn((3,), dtype=torch.float64)

In [4]:
rust_fore = RustForecastingDataSet(tensor_data, 0.7, 0.2, 0.1)

In [5]:
type(rust_fore)

wrapper.RustForecastingDataSet

In [6]:
rust_fore.impute(ImputeStrategy.Median)
rust_fore.downsample(2)
rust_fore.split()
rust_fore.normalize()

In [7]:
rust_fore_res = rust_fore.collect(3, 1, 1)

rust_fore_res

((tensor([[[0.5815, 0.2524, 0.2538,  ..., 0.3473, 0.4392, 0.7835],
           [0.7636, 0.3716, 0.4083,  ..., 0.5176, 0.2485, 0.9809],
           [0.6080, 0.7373, 0.4484,  ..., 0.5704, 0.3782, 0.3013]],
  
          [[0.7636, 0.3716, 0.4083,  ..., 0.5176, 0.2485, 0.9809],
           [0.6080, 0.7373, 0.4484,  ..., 0.5704, 0.3782, 0.3013],
           [0.4601, 0.3735, 0.4187,  ..., 0.7289, 0.1683, 0.5308]],
  
          [[0.6080, 0.7373, 0.4484,  ..., 0.5704, 0.3782, 0.3013],
           [0.4601, 0.3735, 0.4187,  ..., 0.7289, 0.1683, 0.5308],
           [0.4438, 0.7759, 0.3962,  ..., 0.8514, 0.3747, 0.6854]],
  
          ...,
  
          [[0.7412, 0.5721, 0.2523,  ..., 0.2948, 0.5334, 0.1132],
           [0.5734, 0.9257, 0.1954,  ..., 0.2758, 0.3304, 0.5983],
           [0.7338, 0.5066, 0.3112,  ..., 0.6415, 0.4491, 0.6060]],
  
          [[0.5734, 0.9257, 0.1954,  ..., 0.2758, 0.3304, 0.5983],
           [0.7338, 0.5066, 0.3112,  ..., 0.6415, 0.4491, 0.6060],
           [0.5961, 0.6157, 

In [8]:
rust_clas = RustClassificationDataSet(tensor_data, tensor_labels, 0.7, 0.2, 0.1)

In [9]:
rust_clas.impute(ImputeStrategy.Median)
rust_clas.downsample(2)
rust_clas.split(SplittingStrategy.Random)
rust_clas.normalize()

In [10]:
rust_clas_res = rust_clas.collect()

rust_clas_res

((tensor([[[0.6835, 0.2116, 0.4904,  ..., 0.6165, 0.1860, 0.3928],
           [0.2630, 0.4720, 0.3909,  ..., 0.6090, 0.5507, 0.0767],
           [0.5961, 0.5935, 0.5257,  ..., 0.6855, 0.4557, 0.3544],
           ...,
           [0.6205, 0.7592, 0.3010,  ..., 0.6035, 0.2413, 0.3138],
           [0.5528, 0.3077, 0.5709,  ..., 0.7275, 0.7158, 0.3717],
           [0.2076, 0.2047, 0.2312,  ..., 0.7774, 0.6284, 0.0493]],
  
          [[0.7344, 0.3073, 0.4169,  ..., 0.4964, 0.2487, 0.4125],
           [0.3664, 0.3789, 0.4888,  ..., 0.8642, 0.4647, 0.3503],
           [0.4143, 0.4158, 0.6197,  ..., 0.8521, 0.4962, 0.4042],
           ...,
           [0.2225, 0.5628, 0.8225,  ..., 0.7646, 0.4445, 0.0929],
           [0.0000, 0.6327, 0.6488,  ..., 0.3704, 0.2811, 0.5654],
           [0.3407, 0.4726, 0.0000,  ..., 0.8676, 0.2192, 0.2108]]],
         dtype=torch.float64),
  tensor([-0.3383, -0.6479], dtype=torch.float64)),
 (tensor([[[0.5621, 0.2524, 0.3472,  ..., 0.3936, 0.4596, 0.7835],
        

Usage of RustDataModule


In [None]:
from lightning_integration import RustDataModule, DatasetType

Rust Time Series Wrapper Loaded


In [None]:
rust_dm = RustDataModule(data, DatasetType.Forecasting, 3, 3, 3)

In [None]:
rust_dm.setup("")

In [None]:
test_dl = rust_dm.test_dataloader()

test_dl

<torch.utils.data.dataloader.DataLoader at 0x227cc8efbf0>

In [None]:
# test wether the dataloader works
for batch in test_dl:
    print(batch)
    break

[tensor([[[1.6497e+01, 3.4851e+01, 1.7376e+00,  ..., 1.9199e+02,
          1.0843e+03, 1.6865e+04],
         [1.6497e+01, 3.2717e+01, 1.7376e+00,  ..., 1.8698e+02,
          1.0902e+03, 1.8324e+04],
         [1.6497e+01, 3.4851e+01, 1.7376e+00,  ..., 1.8865e+02,
          1.0894e+03, 1.7784e+04]],

        [[1.6497e+01, 3.3428e+01, 1.7376e+00,  ..., 1.9032e+02,
          1.0748e+03, 1.7892e+04],
         [1.5228e+01, 3.2006e+01, 1.7376e+00,  ..., 1.8197e+02,
          1.0506e+03, 1.5568e+04],
         [1.6497e+01, 3.2006e+01, 1.7376e+00,  ..., 1.7529e+02,
          1.0139e+03, 1.4703e+04]],

        [[1.6497e+01, 3.4851e+01, 1.7376e+00,  ..., 1.7529e+02,
          1.0103e+03, 1.4595e+04],
         [1.7766e+01, 3.3428e+01, 1.7376e+00,  ..., 1.6861e+02,
          9.8534e+02, 1.4270e+04],
         [1.6497e+01, 3.2717e+01, 1.7376e+00,  ..., 1.6361e+02,
          9.8387e+02, 1.5135e+04]],

        ...,

        [[1.6497e+01, 3.9118e+01, 1.7376e+00,  ..., 2.5376e+02,
          1.1518e+03, 2.