## Usage test of the wrapper


In [1]:
# imports
import pandas as pd
import numpy as np

import logging

# from wrapper import RustDataModule
from rust_time_series.rust_time_series import (
    RustTimeSeries,
    DatasetType,
    ImputeStrategy,
    SplittingStrategy,
)

In [2]:
# set up logging
logging.basicConfig(level=logging.INFO)

In [3]:
# create dummy 2D array
dummy = np.ones((100, 10), dtype=np.float64)

# Create a RustTimeSeries instance
dummy_ts = RustTimeSeries(dummy, DatasetType.Forecasting)
dummy_ts.set_to_100()


dummy

array([[100.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
       [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
       [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
       [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
       [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
       [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
       [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
       [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
       [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
       [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
       [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
       [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
       [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
       [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
       [  1.,   1.,   1.,   1.,   

In [4]:
split = (0.7, 0.2, 0.1)
# confirm that split adds up to 1
# assert sum(split) == 1.0, "Split proportions must sum to 1."

(part1, part2, part3) = dummy_ts.split(SplittingStrategy.Temporal, *split)

print(len(part1), len(part2), len(part3))

70 20 10


In [5]:
# Load the dataset
file_path = "../../data/LD2011_2014.txt"
df = pd.read_csv(file_path, sep=";", decimal=",")
# drop first column (date)
df = df.drop(columns=["date"])
# turn pandas DataFrame into numPy array
data = df.to_numpy(dtype=np.float64)

data

array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [2.53807107e+00, 2.06258890e+01, 1.73761946e+00, ...,
        1.53589316e+02, 6.70087977e+02, 6.86486486e+03],
       [1.26903553e+00, 2.13371266e+01, 1.73761946e+00, ...,
        1.46911519e+02, 6.46627566e+02, 6.54054054e+03],
       [2.53807107e+00, 1.99146515e+01, 1.73761946e+00, ...,
        1.31886477e+02, 6.73020528e+02, 7.13513514e+03]],
      shape=(140256, 370))

In [6]:
ts = RustTimeSeries(data, DatasetType.Forecasting)
ts.set_to_100()

data

array([[1.00000000e+02, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [2.53807107e+00, 2.06258890e+01, 1.73761946e+00, ...,
        1.53589316e+02, 6.70087977e+02, 6.86486486e+03],
       [1.26903553e+00, 2.13371266e+01, 1.73761946e+00, ...,
        1.46911519e+02, 6.46627566e+02, 6.54054054e+03],
       [2.53807107e+00, 1.99146515e+01, 1.73761946e+00, ...,
        1.31886477e+02, 6.73020528e+02, 7.13513514e+03]],
      shape=(140256, 370))

In [7]:
(part1, part2, part3) = ts.split(SplittingStrategy.Temporal, 0.7, 0.2, 0.1)

len(part1), len(part2), len(part3)

(98179, 28051, 14026)

Usage of RustDataModule


In [8]:
from wrapper import RustDataModule

In [9]:
rust_dm = RustDataModule(data, DatasetType.Forecasting)

In [10]:
rust_dm.setup()

In [None]:
train_dl = rust_dm.train_dataloader()

train_dl

<torch.utils.data.dataloader.DataLoader at 0x1638411cf20>

In [13]:
# test wether the dataloader works
for batch in train_dl:
    print(batch)
    print(batch.shape)
    break

tensor([[100.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        ...,
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.]], dtype=torch.float64)
torch.Size([32, 370])
