## Usage test of the wrapper


In [1]:
# imports
import pandas as pd
import numpy as np

import logging

# from wrapper import RustDataModule
from rust_time_series.rust_time_series import (
    BaseDataSet,
    DatasetType,
    ImputeStrategy,
    SplittingStrategy,
)

In [2]:
# set up logging
logging.basicConfig(level=logging.INFO)

In [3]:
# create dummy 2D array
dummy = np.random.randint(0, 100, (100, 10)).astype(float)

# Create a RustTimeSeries instance
dummy_ts = BaseDataSet(dummy, 3, 3, 1)
dummy_ts.set_to_100()


dummy

array([[100.,   3.,   2.,  34.,  53.,  95.,  88.,  59.,  63.,  36.],
       [ 70.,  24.,  50.,  27.,  84.,  66.,  26.,   0.,  86.,  17.],
       [ 63.,  49.,  21.,   9.,  13.,  86.,  13.,  80.,  85.,  75.],
       [ 77.,  10.,  99.,  37.,  21.,  97.,   4.,  74.,  20.,  95.],
       [ 29.,  58.,  89.,  54.,  12.,  96.,  31.,  39.,  98.,  73.],
       [ 98.,  54.,  45.,  31.,  74.,   8.,  67.,  56.,  45.,  76.],
       [ 70.,  26.,  53.,  44.,  69.,  89.,  91.,  25.,   8.,  53.],
       [ 70.,  16.,  77.,  88.,  35.,  76.,  69.,  75.,  53.,  89.],
       [ 36.,  19.,   7.,   1.,  55.,  19.,  84.,  94.,  37.,  73.],
       [ 34.,  79.,  41.,  21.,  78.,  38.,  61.,  87.,  71.,  48.],
       [ 69.,  49.,  71.,  56.,  67.,  55.,  83.,  56.,  59.,   3.],
       [ 46.,  52.,  77.,  18.,  79.,  38.,  44.,  33.,  16.,  90.],
       [ 20.,  64.,   1.,  58.,  80.,  63.,  16.,  92.,  78.,   9.],
       [ 43.,  65.,  80.,  43.,  62.,  82.,  23.,  41.,  57.,  50.],
       [ 54.,  15.,  81.,  68.,  3

In [4]:
dummy_ts.get(1).sequence()

array([[70., 24., 50., 27., 84., 66., 26.,  0., 86., 17.],
       [63., 49., 21.,  9., 13., 86., 13., 80., 85., 75.],
       [77., 10., 99., 37., 21., 97.,  4., 74., 20., 95.]])

In [5]:
split = (0.7, 0.2, 0.1)
# confirm that split adds up to 1
# assert sum(split) == 1.0, "Split proportions must sum to 1."

(part1, part2, part3) = dummy_ts.split(SplittingStrategy.Temporal, *split)

print(len(part1), len(part2), len(part3))

70 20 10


### Dummy classification dataset

In [6]:
labels = np.ones((100, 1), dtype=np.float64)
labels = labels.astype(str)
labels_list = labels.flatten().tolist()

dummy_class = BaseDataSet.new_classification(
    dummy, labels_list
)

dummy_class.len()

100

In [7]:
split = (0.7, 0.2, 0.1)
# confirm that split adds up to 1
# assert sum(split) == 1.0, "Split proportions must sum to 1."

(part1, part2, part3) = dummy_class.split(SplittingStrategy.Random, *split)

print(len(part1), len(part2), len(part3))

70 20 10


### Forecasting Dataset

In [8]:
# Load the dataset
file_path = "LD2011_2014.txt"
df = pd.read_csv(file_path, sep=";", decimal=",")
# drop first column (date)
df = df.drop(columns=["date"])
# turn pandas DataFrame into numPy array
data = df.to_numpy(dtype=np.float64)

data

array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [2.53807107e+00, 2.06258890e+01, 1.73761946e+00, ...,
        1.53589316e+02, 6.70087977e+02, 6.86486486e+03],
       [1.26903553e+00, 2.13371266e+01, 1.73761946e+00, ...,
        1.46911519e+02, 6.46627566e+02, 6.54054054e+03],
       [2.53807107e+00, 1.99146515e+01, 1.73761946e+00, ...,
        1.31886477e+02, 6.73020528e+02, 7.13513514e+03]])

In [9]:
ts = BaseDataSet(data, 3, 3, 3)
ts.set_to_100()

print(type(data))
print(data.shape)

<class 'numpy.ndarray'>
(140256, 370)


In [10]:
(part1, part2, part3) = ts.split(SplittingStrategy.Temporal, 0.7, 0.2, 0.1)

len(part1), len(part2), len(part3)

(98179, 28051, 14026)

### Classification Dataset

In [11]:
from aeon.datasets import load_classification
X, y = load_classification("GunPoint")
X = X.reshape(X.shape[0], -1)

print(type(X))
print(X.shape)

<class 'numpy.ndarray'>
(200, 150)


In [12]:
ts = BaseDataSet.new_classification(X, y)

(part1, part2, part3) = ts.split(SplittingStrategy.Random, 0.7, 0.2, 0.1)

len(part1), len(part2), len(part3)

(140, 40, 20)

Usage of RustDataModule


In [13]:
from wrapper import RustDataModule

Rust Time Series Wrapper Loaded


In [14]:
rust_dm = RustDataModule(data, DatasetType.Forecasting, 3, 3, 3)

In [15]:
rust_dm.setup()

In [16]:
train_dl = rust_dm.train_dataloader()

train_dl

<torch.utils.data.dataloader.DataLoader at 0x1343506b0>

In [17]:
# test wether the dataloader works
for batch in train_dl:
    print(batch)
    print(batch.shape)
    break

tensor([[100.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        ...,
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.]], dtype=torch.float64)
torch.Size([32, 370])
