## Usage test of the wrapper


In [1]:
# imports
import pandas as pd
import numpy as np

import logging

# from wrapper import RustDataModule
from rust_time_series import (
    ForecastingDataSet,
    ClassificationDataSet,
    SplittingStrategy,
    ImputeStrategy,
)

In [2]:
# set up logging
logging.basicConfig(level=logging.INFO)

In [2]:
# create dummy 3D array
dummy_data = np.random.randint(0, 100, (3, 100, 100)).astype(float)
dummy_copy = dummy_data.copy()

# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(dummy_data, 0.7, 0.2, 0.1)

# call the pipeline methods
forecasting_data_set.impute(ImputeStrategy.Median)

# check whether the dummy data was changed
assert np.array_equal(
    dummy_data, dummy_copy
), "Dummy data should not be modified after imputation."

forecasting_data_set.downsample(2)
forecasting_data_set.split()
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[ 0.62326374, -1.00657077,  1.61214395, ...,  0.06910641,
            0.16091969, -0.24703792],
          [-0.70516698,  1.3885729 , -0.74322904, ..., -0.33812778,
           -0.89968734, -0.44182377],
          [ 1.3573965 ,  0.00546176, -1.50526148, ..., -1.37472388,
            1.36781734, -0.89632407]],
  
         [[-0.70516698,  1.3885729 , -0.74322904, ..., -0.33812778,
           -0.89968734, -0.44182377],
          [ 1.3573965 ,  0.00546176, -1.50526148, ..., -1.37472388,
            1.36781734, -0.89632407],
          [ 0.86797466, -1.44511821, -1.22815877, ...,  0.99463864,
            1.22152671, -1.0261813 ]],
  
         [[ 1.3573965 ,  0.00546176, -1.50526148, ..., -1.37472388,
            1.36781734, -0.89632407],
          [ 0.86797466, -1.44511821, -1.22815877, ...,  0.99463864,
            1.22152671, -1.0261813 ],
          [ 1.63706613, -0.06200707, -0.67395336, ..., -1.70791548,
           -0.57053343,  1.47357038]],
  
         ...,
  
         [[-0.705

### Dummy classification dataset temporal split


In [3]:
dummy_labels = np.ones(3, dtype=np.float64)

# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy_data, dummy_labels, 0.7, 0.2, 0.1)
print("shape of dummy_classification:", dummy_data.shape)

# call the pipeline methods
dummy_classification.impute(ImputeStrategy.BackwardFill)
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Random)
dummy_classification.normalize()
dummy_classification.standardize()
classification_data_set_res = dummy_classification.collect()

print(
    "Shape of the resulting classification dataset:",
    classification_data_set_res[0][0].shape,
)

classification_data_set_res

shape of dummy_classification: (3, 100, 100)
Shape of the resulting classification dataset: (2, 50, 100)


((array([[[-1.31511571,  0.61219657, -0.50493236, ...,  1.24519065,
            1.28818822,  0.58891602],
          [-1.51687214, -0.70473791,  0.46858761, ..., -0.239831  ,
            0.10800468,  0.26336819],
          [-0.10457709, -1.38100156,  1.05269959, ..., -0.49970978,
            1.68158273, -0.61561094],
          ...,
          [-1.18061142,  0.54101092,  1.53945958, ..., -1.98473142,
            0.50139919, -1.23415182],
          [ 1.13958761, -0.52677379, -1.08904434, ...,  0.72543307,
           -0.21386356, -0.84349442],
          [-1.04610712,  0.54101092,  1.44210758, ..., -0.01707775,
            0.0364784 , -1.46203529]],
  
         [[ 0.70244866, -1.13185179,  1.44210758, ..., -0.12845437,
            0.17953095, -0.22495355],
          [-0.57534211,  1.39523869, -0.76453768, ..., -0.53683532,
           -0.85760004, -0.42028225],
          [ 1.40859619, -0.06406708, -1.47845233, ..., -1.57635047,
            1.35971449, -0.8760492 ],
          ...,
          [ 

### Dummy Classification Data Set Random Split


In [5]:
# create a ClassificationDataSet instance
dummy_classification = ClassificationDataSet(dummy_data, dummy_labels, 0.7, 0.2, 0.1)

# call the pipeline methods
dummy_classification.impute(ImputeStrategy.ForwardFill)
dummy_classification.downsample(2)
dummy_classification.split(SplittingStrategy.Random)
dummy_classification.normalize()
dummy_classification.standardize()
classification_data_set_res = dummy_classification.collect()

classification_data_set_res

((array([[[-1.06115728, -0.95444716,  0.17217131, ...,  1.20259622,
            0.84831281, -1.0111749 ],
          [ 0.27615498, -1.34372854, -0.03652119, ...,  1.30184762,
            0.24405359,  0.07535619],
          [ 1.34600479,  0.95657051, -0.45390618, ..., -1.57644318,
            0.61332312, -0.02979198],
          ...,
          [ 0.84451269,  1.77052248,  0.72868463, ..., -0.61701291,
           -0.66233525,  1.0567391 ],
          [ 1.51316882, -0.49438735,  1.28519796, ..., -1.44410798,
            1.486142  , -0.62563161],
          [ 1.37943759,  0.53189992,  0.55477422, ..., -1.37794037,
           -1.40087431, -1.43176757]],
  
         [[-0.89399324, -1.44989618, -1.18432992, ..., -1.64261079,
           -0.19235585,  0.21555375],
          [ 1.04510953,  0.39034305, -1.67127907, ..., -1.64261079,
            0.04263385, -0.62563161],
          [ 0.81107988,  1.16890581, -0.87129117, ...,  0.9379258 ,
            0.74760294,  0.21555375],
          ...,
          [-

### Forecasting Dataset


In [12]:
# Load the dataset
file_path = "../../data/LD2011_2014.txt"
df = pd.read_csv(file_path, sep=";", decimal=",")
# drop first column (date)
df = df.drop(columns=["date"])
# turn pandas DataFrame into numPy array
data = df.to_numpy(dtype=np.float64)

# add a third dimension (with only one entry)
data = np.expand_dims(data, axis=0)

data

FileNotFoundError: [Errno 2] No such file or directory: '../../data/LD2011_2014.txt'

In [None]:
# Create a ForecastingDataSet instance
forecasting_data_set = ForecastingDataSet(data, 0.7, 0.2, 0.1)

# call the pipeline methods
forecasting_data_set.impute(ImputeStrategy.LeaveNaN)
forecasting_data_set.downsample(2)
forecasting_data_set.split()
forecasting_data_set.normalize()
forecasting_data_set.standardize()
forecasting_data_set_res = forecasting_data_set.collect(3, 1, 1)

forecasting_data_set_res

((array([[[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         [[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         [[-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632],
          [-0.6175737 , -1.23521919, -0.26182084, ..., -1.00726487,
           -1.29334291, -0.60677632]],
  
         ...,
  
         [[-0.399

### Classification Dataset


In [None]:
from aeon.datasets import load_classification


X, y = load_classification("GunPoint")

# convert y value to float64
y = y.astype(np.float64)

print(type(X))
print(type(y))


print(X.shape)
print(y.shape)
y

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(200, 1, 150)
(200,)


array([2., 2., 1., 1., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 2., 1., 2.,
       2., 1., 2., 1., 1., 1., 2., 1., 2., 1., 1., 2., 1., 1., 2., 2., 1.,
       2., 1., 2., 2., 2., 2., 2., 1., 1., 1., 2., 2., 1., 2., 1., 2., 1.,
       2., 2., 1., 1., 2., 1., 2., 2., 1., 1., 1., 2., 1., 1., 1., 1., 2.,
       2., 2., 1., 2., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2., 2., 1., 1.,
       1., 1., 1., 1., 2., 1., 2., 1., 1., 2., 1., 2., 2., 1., 2., 2., 2.,
       1., 2., 1., 1., 2., 2., 1., 2., 1., 2., 1., 1., 2., 1., 1., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 2., 2., 2., 2.,
       1., 2., 1., 1., 1., 2., 1., 2., 1., 1., 2., 2., 1., 1., 2., 1., 1.,
       2., 2., 1., 1., 2., 1., 2., 2., 2., 1., 2., 1., 1., 2., 1., 1., 2.,
       1., 2., 2., 2., 1., 2., 1., 1., 2., 2., 2., 2., 1., 1., 1., 2., 1.,
       2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 2., 2., 1.])

In [None]:
classification_data_set = ClassificationDataSet(X, y, 0.7, 0.2, 0.1)

classification_data_set.impute(ImputeStrategy.Median)
classification_data_set.downsample(2)
classification_data_set.split(SplittingStrategy.Random)
classification_data_set.normalize()
classification_data_set.standardize()
classification_data_set_res = classification_data_set.collect()

classification_data_set_res

((array([[[ 0.67374004,  0.65732693,  0.64427143, ...,  0.70209976,
            0.70730792,  0.7037743 ]],
  
         [[-0.35723327, -0.35045691, -0.35095462, ..., -0.3793326 ,
           -0.35819962, -0.34366399]],
  
         [[ 0.78641614,  0.81357108,  0.81044308, ...,  0.35455497,
            0.35855005,  0.37435543]],
  
         ...,
  
         [[ 0.62504498,  0.63453206,  0.63087104, ...,  0.64147779,
            0.63454247,  0.62279542]],
  
         [[ 0.72483646,  0.71110738,  0.70763967, ...,  0.58269432,
            0.59514201,  0.60190158]],
  
         [[-0.7008857 , -0.67309694, -0.687581  , ..., -1.0840553 ,
           -1.00790783, -0.96220454]]], shape=(140, 1, 150)),
  array([1., 2., 1., 1., 2., 2., 1., 1., 2., 2., 2., 1., 1., 2., 1., 2., 2.,
         2., 2., 2., 1., 1., 1., 2., 1., 2., 2., 2., 1., 2., 1., 1., 1., 1.,
         1., 1., 2., 2., 1., 2., 1., 2., 2., 1., 2., 2., 1., 2., 1., 2., 2.,
         2., 1., 2., 1., 2., 2., 1., 2., 1., 1., 1., 2., 1., 1., 1., 1.,

## Usage of Wrapper


In [4]:
import sys
from pathlib import Path

parent_dir = Path.cwd().parent
sys.path.append(str(parent_dir))

from wrapper import RustClassificationDataSet, RustForecastingDataSet
import torch

In [10]:
# create dummy torch tensor
tensor_data = torch.randn((3, 100, 100), dtype=torch.float64)
tensor_labels = torch.randn((3,), dtype=torch.float64)

In [11]:
rust_fore = RustForecastingDataSet(tensor_data, 0.7, 0.2, 0.1)

In [12]:
type(rust_fore)

wrapper.RustForecastingDataSet

In [13]:
rust_fore.impute(ImputeStrategy.Median)
rust_fore.downsample(2)
rust_fore.split()
rust_fore.normalize()

In [14]:
rust_fore_res = rust_fore.collect(3, 1, 1)

rust_fore_res

((tensor([[[0.4534, 0.7416, 0.6058,  ..., 0.3893, 0.4502, 0.0598],
           [0.5160, 0.6266, 0.7421,  ..., 0.5722, 0.1830, 0.9197],
           [0.5731, 0.2444, 0.4230,  ..., 0.6948, 0.4355, 0.3198]],
  
          [[0.5160, 0.6266, 0.7421,  ..., 0.5722, 0.1830, 0.9197],
           [0.5731, 0.2444, 0.4230,  ..., 0.6948, 0.4355, 0.3198],
           [0.4057, 0.4317, 0.4028,  ..., 0.6071, 0.1018, 0.5767]],
  
          [[0.5731, 0.2444, 0.4230,  ..., 0.6948, 0.4355, 0.3198],
           [0.4057, 0.4317, 0.4028,  ..., 0.6071, 0.1018, 0.5767],
           [0.7422, 1.0000, 0.4436,  ..., 0.0000, 0.0828, 0.7082]],
  
          ...,
  
          [[0.5345, 0.6620, 0.1798,  ..., 0.7291, 0.4729, 0.4240],
           [1.0000, 0.2465, 0.3322,  ..., 0.8028, 0.5810, 0.4230],
           [0.4946, 0.7491, 0.1843,  ..., 0.4950, 0.7287, 0.8798]],
  
          [[1.0000, 0.2465, 0.3322,  ..., 0.8028, 0.5810, 0.4230],
           [0.4946, 0.7491, 0.1843,  ..., 0.4950, 0.7287, 0.8798],
           [0.5692, 0.2307, 

In [15]:
rust_clas = RustClassificationDataSet(tensor_data, tensor_labels, 0.7, 0.2, 0.1)

In [16]:
rust_clas.impute(ImputeStrategy.Median)
rust_clas.downsample(2)
rust_clas.split(SplittingStrategy.Random)
rust_clas.normalize()

In [17]:
rust_clas_res = rust_clas.collect()

rust_clas_res

((tensor([[[0.5196, 0.7701, 0.6449,  ..., 0.4913, 0.4318, 0.3817],
           [0.5563, 0.3735, 0.5780,  ..., 0.7313, 0.3797, 0.3889],
           [0.8091, 0.3472, 0.5268,  ..., 1.0000, 0.7256, 0.4853],
           ...,
           [0.3981, 0.7257, 0.5767,  ..., 0.5456, 0.6594, 0.4433],
           [0.4779, 0.8146, 0.3556,  ..., 0.5295, 0.4765, 0.3012],
           [0.5751, 0.8742, 0.5578,  ..., 0.4975, 0.2308, 0.1325]],
  
          [[0.4534, 0.7416, 0.5438,  ..., 0.3893, 0.4475, 0.0490],
           [0.5160, 0.6266, 0.6676,  ..., 0.5722, 0.1819, 0.7525],
           [0.5731, 0.2444, 0.3778,  ..., 0.6948, 0.4329, 0.2617],
           ...,
           [0.3727, 0.6848, 0.3332,  ..., 0.3547, 0.4446, 0.3155],
           [0.6174, 0.6931, 0.4909,  ..., 0.2205, 0.6840, 0.2157],
           [0.6985, 0.4870, 0.0658,  ..., 0.2083, 0.6422, 0.4177]]],
         dtype=torch.float64),
  tensor([-0.1442, -0.3140], dtype=torch.float64)),
 (tensor([[[0.7441, 0.3607, 0.6395,  ..., 0.3772, 0.8611, 0.4436],
        

Usage of RustDataModule


In [5]:
from lightning_integration import RustDataModule, DatasetType

Rust Time Series Wrapper Loaded


In [7]:
rust_dm = RustDataModule(dummy_data, DatasetType.Forecasting, 3, 3, 3)

In [8]:
rust_dm.setup("")

In [9]:
test_dl = rust_dm.test_dataloader()

test_dl

<torch.utils.data.dataloader.DataLoader at 0x28a6acc4590>

In [10]:
# test wether the dataloader works
for batch in test_dl:
    print(batch)
    break

[tensor([[[10., 52., 46.,  ..., 27., 91., 11.],
         [67., 91., 54.,  ..., 37., 33.,  2.],
         [16., 46., 32.,  ..., 28., 26., 94.]],

        [[79., 21., 98.,  ..., 11., 86., 26.],
         [96., 48.,  5.,  ..., 18., 65., 65.],
         [73.,  2., 47.,  ...,  2., 84., 93.]],

        [[82., 66.,  9.,  ...,  2., 41., 76.],
         [64., 21., 27.,  ..., 14., 14., 81.],
         [76., 22., 94.,  ..., 75., 14., 30.]],

        [[47., 49., 74.,  ..., 32., 81.,  9.],
         [ 4., 26., 55.,  ..., 90.,  2., 63.],
         [ 5., 34., 39.,  ..., 36., 83., 67.]],

        [[41., 57., 94.,  ..., 55., 93., 85.],
         [86., 61., 26.,  ..., 39., 42., 87.],
         [ 7., 29., 32.,  ..., 88., 21., 73.]],

        [[24., 47., 92.,  ..., 20., 40., 17.],
         [12., 67., 96.,  ...,  1., 65., 12.],
         [82., 29.,  6.,  ..., 71., 51., 51.]]]), tensor([[[79., 21., 98.,  ..., 11., 86., 26.],
         [96., 48.,  5.,  ..., 18., 65., 65.],
         [73.,  2., 47.,  ...,  2., 84., 93.]]