# Select Data

In [2]:
from spflows.configs_classes.forecasting_configs import ForecastingModelConfig
from spflows.data.datamodules import ForecastingDataModule

from gluonts.dataset.field_names import FieldName
from gluonts.transform import (
    Transformation,
    Chain,
    InstanceSplitter,
    ExpectedNumInstanceSampler,
    ValidationSplitSampler,
    TestSplitSampler,
    RenameFields,
    AsNumpyArray,
    ExpandDimArray,
    AddObservedValuesIndicator,
    AddTimeFeatures,
    VstackFeatures,
    SetFieldIfNotPresent,
    TargetDimIndicator,
)
from gluonts.dataset.multivariate_grouper import MultivariateGrouper
from gluonts.dataset.repository.datasets import get_dataset
from itertools import islice

  from .autonotebook import tqdm as notebook_tqdm


# Datasets

In [4]:
config = ForecastingModelConfig(prefetch_factor=None,
                                batch_size=19)
datamodule = ForecastingDataModule(config)
datamodule.setup()
config, all_datasets = ForecastingDataModule.get_data_and_update_config(config)
training_data,test_data,validation_data = all_datasets
dataset = get_dataset(config.dataset_str_name, regenerate=False)

In [7]:
training_data[0].keys()

dict_keys(['target', 'start', 'feat_static_cat'])

In [8]:
training_data[0]["feat_static_cat"]

array([0])

In [9]:
dataset.metadata

MetaData(freq='h', target=None, feat_static_cat=[CategoricalFeatureInfo(name='feat_static_cat_0', cardinality='370')], feat_static_real=[], feat_dynamic_real=[], feat_dynamic_cat=[], prediction_length=24)

In [10]:
for idx,value in enumerate(islice(dataset.train,100)):
    print(value["target"].shape)
    break


(5521,)


# Study Tranforms

In [11]:
from pts.dataset.loader import TransformedIterableDataset

In [12]:
training_transforms =  Chain(
    [
        AsNumpyArray( #0
            field=FieldName.TARGET,
            expected_ndim=2,
        ),
        # maps the target to (1, T)
        # if the target data is uni dimensional
        ExpandDimArray( #1
            field=FieldName.TARGET,
            axis=None,
        ),
        AddObservedValuesIndicator( #2
            target_field=FieldName.TARGET,
            output_field=FieldName.OBSERVED_VALUES,
        ),
        AddTimeFeatures( #3
            start_field=FieldName.START,
            target_field=FieldName.TARGET,
            output_field=FieldName.FEAT_TIME,
            time_features=config.time_features,
            pred_length=config.prediction_length,
        ),
        VstackFeatures( #4
            output_field=FieldName.FEAT_TIME,
            input_fields=[FieldName.FEAT_TIME],
        ),
        SetFieldIfNotPresent(field=FieldName.FEAT_STATIC_CAT, value=[0]), #5
        TargetDimIndicator(#6
            field_name="target_dimension_indicator",
            target_field=FieldName.TARGET,
        ),
        AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1)#7
    ]
)

In [32]:
transformations_2 = datamodule.create_instance_splitter("training")

In [21]:
training_iter_dataset = TransformedIterableDataset(
            dataset=training_data,
            transform=training_transforms+transformations_2,
            is_train=True,
            shuffle_buffer_length=config.shuffle_buffer_length,
            cache_data=config.cache_data,
        )

In [28]:
databatch = next(training_iter_dataset.__iter__())
[k for k in databatch.keys() if "feat" in k]

['feat_static_cat', 'past_time_feat', 'future_time_feat']

In [31]:
databatch["feat_static_cat"]

array([0.], dtype=float32)

# Dataloaders

In [30]:
databatch = datamodule.get_train_databatch()

In [32]:
databatch.keys()

dict_keys(['target_dimension_indicator', 'past_time_feat', 'past_target_cdf', 'past_observed_values', 'past_is_pad', 'future_time_feat', 'future_target_cdf', 'future_observed_values'])

In [34]:
databatch["past_target_cdf"].shape

torch.Size([19, 192, 370])

In [58]:
databatch["future_observed_values"].shape

torch.Size([19, 24, 370])

In [25]:
data_batch["target_dimension_indicator"].shape

torch.Size([19, 370])

In [60]:
config.prediction_length

24