In [1]:
# transformer-decoder with no Seq2Seq component (autoregressive)

# no value embedding
# sine-cosine positional encoding on the hour, day, and month of timestamp
# modified transformer-encoder layer for masked self-attention

# conduct an architecture test similar to the one on the transformer-encoder in
# deep_transformer_model_for_tsf_XX.ipynb

In [1]:
import numpy as np
import pandas as pd

In [2]:
import tensorflow as tf

In [3]:
print(tf.__version__)

2.4.1


In [5]:
# required for TFA MultiHeadAttention
import typing
import warnings

In [6]:
# MHA class from TensorFlow AddOns source
# it is compatible with TF 1.15 for CloudTPU usage

In [7]:
class MultiHeadAttention(tf.keras.layers.Layer):
    r"""MultiHead Attention layer.
    Defines the MultiHead Attention operation as described in
    [Attention Is All You Need](https://arxiv.org/abs/1706.03762) which takes
    in the tensors `query`, `key`, and `value`, and returns the dot-product attention
    between them:
    >>> mha = MultiHeadAttention(head_size=128, num_heads=12)
    >>> query = np.random.rand(3, 5, 4) # (batch_size, query_elements, query_depth)
    >>> key = np.random.rand(3, 6, 5) # (batch_size, key_elements, key_depth)
    >>> value = np.random.rand(3, 6, 6) # (batch_size, key_elements, value_depth)
    >>> attention = mha([query, key, value]) # (batch_size, query_elements, value_depth)
    >>> attention.shape
    TensorShape([3, 5, 6])
    If `value` is not given then internally `value = key` will be used:
    >>> mha = MultiHeadAttention(head_size=128, num_heads=12)
    >>> query = np.random.rand(3, 5, 5) # (batch_size, query_elements, query_depth)
    >>> key = np.random.rand(3, 6, 10) # (batch_size, key_elements, key_depth)
    >>> attention = mha([query, key]) # (batch_size, query_elements, key_depth)
    >>> attention.shape
    TensorShape([3, 5, 10])
    Args:
        head_size: int, dimensionality of the `query`, `key` and `value` tensors
            after the linear transformation.
        num_heads: int, number of attention heads.
        output_size: int, dimensionality of the output space, if `None` then the
            input dimension of `value` or `key` will be used,
            default `None`.
        dropout: float, `rate` parameter for the dropout layer that is
            applied to attention after softmax,
        default `0`.
        use_projection_bias: bool, whether to use a bias term after the linear
            output projection.
        return_attn_coef: bool, if `True`, return the attention coefficients as
            an additional output argument.
        kernel_initializer: initializer, initializer for the kernel weights.
        kernel_regularizer: regularizer, regularizer for the kernel weights.
        kernel_constraint: constraint, constraint for the kernel weights.
        bias_initializer: initializer, initializer for the bias weights.
        bias_regularizer: regularizer, regularizer for the bias weights.
        bias_constraint: constraint, constraint for the bias weights.
    Call Args:
        inputs:  List of `[query, key, value]` where
            * `query`: Tensor of shape `(..., query_elements, query_depth)`
            * `key`: `Tensor of shape '(..., key_elements, key_depth)`
            * `value`: Tensor of shape `(..., key_elements, value_depth)`, optional, if not given `key` will be used.
        mask: a binary Tensor of shape `[batch_size?, num_heads?, query_elements, key_elements]`
        which specifies which query elements can attend to which key elements,
        `1` indicates attention and `0` indicates no attention.
    Output shape:
        * `(..., query_elements, output_size)` if `output_size` is given, else
        * `(..., query_elements, value_depth)` if `value` is given, else
        * `(..., query_elements, key_depth)`
    """

    def __init__(
        self,
        head_size: int,
        num_heads: int,
        output_size: int = None,
        dropout: float = 0.0,
        use_projection_bias: bool = True,
        return_attn_coef: bool = False,
        kernel_initializer: typing.Union[str, typing.Callable] = "glorot_uniform",
        kernel_regularizer: typing.Union[str, typing.Callable] = None,
        kernel_constraint: typing.Union[str, typing.Callable] = None,
        bias_initializer: typing.Union[str, typing.Callable] = "zeros",
        bias_regularizer: typing.Union[str, typing.Callable] = None,
        bias_constraint: typing.Union[str, typing.Callable] = None,
        **kwargs,
    ):
        warnings.warn(
            "`MultiHeadAttention` will be deprecated in Addons 0.13. "
            "Please use `tf.keras.layers.MultiHeadAttention` instead.",
            DeprecationWarning,
        )

        super().__init__(**kwargs)

        if output_size is not None and output_size < 1:
            raise ValueError("output_size must be a positive number")

        self.head_size = head_size
        self.num_heads = num_heads
        self.output_size = output_size
        self.use_projection_bias = use_projection_bias
        self.return_attn_coef = return_attn_coef

        self.kernel_initializer = tf.keras.initializers.get(kernel_initializer)
        self.kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer)
        self.kernel_constraint = tf.keras.constraints.get(kernel_constraint)
        self.bias_initializer = tf.keras.initializers.get(bias_initializer)
        self.bias_regularizer = tf.keras.regularizers.get(bias_regularizer)
        self.bias_constraint = tf.keras.constraints.get(bias_constraint)

        self.dropout = tf.keras.layers.Dropout(dropout)
        self._dropout_rate = dropout

    def build(self, input_shape):

        num_query_features = input_shape[0][-1]
        num_key_features = input_shape[1][-1]
        num_value_features = (
            input_shape[2][-1] if len(input_shape) > 2 else num_key_features
        )
        output_size = (
            self.output_size if self.output_size is not None else num_value_features
        )

        self.query_kernel = self.add_weight(
            name="query_kernel",
            shape=[self.num_heads, num_query_features, self.head_size],
            initializer=self.kernel_initializer,
            regularizer=self.kernel_regularizer,
            constraint=self.kernel_constraint,
        )
        self.key_kernel = self.add_weight(
            name="key_kernel",
            shape=[self.num_heads, num_key_features, self.head_size],
            initializer=self.kernel_initializer,
            regularizer=self.kernel_regularizer,
            constraint=self.kernel_constraint,
        )
        self.value_kernel = self.add_weight(
            name="value_kernel",
            shape=[self.num_heads, num_value_features, self.head_size],
            initializer=self.kernel_initializer,
            regularizer=self.kernel_regularizer,
            constraint=self.kernel_constraint,
        )
        self.projection_kernel = self.add_weight(
            name="projection_kernel",
            shape=[self.num_heads, self.head_size, output_size],
            initializer=self.kernel_initializer,
            regularizer=self.kernel_regularizer,
            constraint=self.kernel_constraint,
        )

        if self.use_projection_bias:
            self.projection_bias = self.add_weight(
                name="projection_bias",
                shape=[output_size],
                initializer=self.bias_initializer,
                regularizer=self.bias_regularizer,
                constraint=self.bias_constraint,
            )
        else:
            self.projection_bias = None

        super().build(input_shape)

    def call(self, inputs, training=None, mask=None):

        # einsum nomenclature
        # ------------------------
        # N = query elements
        # M = key/value elements
        # H = heads
        # I = input features
        # O = output features

        query = inputs[0]
        key = inputs[1]
        value = inputs[2] if len(inputs) > 2 else key

        # verify shapes
        if key.shape[-2] != value.shape[-2]:
            raise ValueError(
                "the number of elements in 'key' must be equal to the same as the number of elements in 'value'"
            )

        if mask is not None:
            if len(mask.shape) < 2:
                raise ValueError("'mask' must have atleast 2 dimensions")
            if query.shape[-2] != mask.shape[-2]:
                raise ValueError(
                    "mask's second to last dimension must be equal to the number of elements in 'query'"
                )
            if key.shape[-2] != mask.shape[-1]:
                raise ValueError(
                    "mask's last dimension must be equal to the number of elements in 'key'"
                )

        # Linear transformations
        query = tf.einsum("...NI , HIO -> ...NHO", query, self.query_kernel)
        key = tf.einsum("...MI , HIO -> ...MHO", key, self.key_kernel)
        value = tf.einsum("...MI , HIO -> ...MHO", value, self.value_kernel)

        # Scale dot-product, doing the division to either query or key
        # instead of their product saves some computation
        depth = tf.constant(self.head_size, dtype=query.dtype)
        query /= tf.sqrt(depth)

        # Calculate dot product attention
        logits = tf.einsum("...NHO,...MHO->...HNM", query, key)

        # apply mask
        if mask is not None:
            mask = tf.cast(mask, tf.float32)

            # possibly expand on the head dimension so broadcasting works
            if len(mask.shape) != len(logits.shape):
                mask = tf.expand_dims(mask, -3)

            logits += -10e9 * (1.0 - mask)

        attn_coef = tf.nn.softmax(logits)

        # attention dropout
        attn_coef_dropout = self.dropout(attn_coef, training=training)

        # attention * value
        multihead_output = tf.einsum("...HNM,...MHI->...NHI", attn_coef_dropout, value)

        # Run the outputs through another linear projection layer. Recombining heads
        # is automatically done.
        output = tf.einsum(
            "...NHI,HIO->...NO", multihead_output, self.projection_kernel
        )

        if self.projection_bias is not None:
            output += self.projection_bias

        if self.return_attn_coef:
            return output, attn_coef
        else:
            return output

    def compute_output_shape(self, input_shape):
        num_value_features = (
            input_shape[2][-1] if len(input_shape) > 2 else input_shape[1][-1]
        )
        output_size = (
            self.output_size if self.output_size is not None else num_value_features
        )

        output_shape = input_shape[0][:-1] + (output_size,)

        if self.return_attn_coef:
            num_query_elements = input_shape[0][-2]
            num_key_elements = input_shape[1][-2]
            attn_coef_shape = input_shape[0][:-2] + (
                self.num_heads,
                num_query_elements,
                num_key_elements,
            )

            return output_shape, attn_coef_shape
        else:
            return output_shape

    def get_config(self):
        config = super().get_config()

        config.update(
            head_size=self.head_size,
            num_heads=self.num_heads,
            output_size=self.output_size,
            dropout=self._dropout_rate,
            use_projection_bias=self.use_projection_bias,
            return_attn_coef=self.return_attn_coef,
            kernel_initializer=tf.keras.initializers.serialize(self.kernel_initializer),
            kernel_regularizer=tf.keras.regularizers.serialize(self.kernel_regularizer),
            kernel_constraint=tf.keras.constraints.serialize(self.kernel_constraint),
            bias_initializer=tf.keras.initializers.serialize(self.bias_initializer),
            bias_regularizer=tf.keras.regularizers.serialize(self.bias_regularizer),
            bias_constraint=tf.keras.constraints.serialize(self.bias_constraint),
        )

        return config

In [8]:
# the autoregressive version of the transformer-decoder does not use the Seq2Seq intermediate layer
# as there is no transformer-encoder component sending hidden states, therefore
# having only a self-attention layer and a position-wise feed-forward layer,
# the autoregressive transformer-decoder is, in fact, a transformer-encoder

# the only important modification is the masked self-attention layer

# masked self-attention layer seems to be already implemented in
# MHA module from TensorFlow AddOns, then will be added to the EncoderLayer class

In [9]:
# base transformer encoder layer from # https://keras.io/examples/nlp/text_classification_with_transformer/
# modified to include masked self attention

# ToDo: get the number of timesteps from the input shape
# in the meantime, pass this value as an argument for the encoder layer
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, n_timesteps, embed_dim, num_heads, ff_dim, dropout=0.1):
        super(EncoderLayer, self).__init__()
        # multi-head attention initialization
        self.attention_layer = MultiHeadAttention(head_size=embed_dim, num_heads=num_heads)
        self.ff_layer = tf.keras.Sequential(
            [tf.keras.layers.Dense(ff_dim, activation="relu"),
             tf.keras.layers.Dense(embed_dim)]
        )
        self.add_norm_layer_1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.add_norm_layer_2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.dropout_1 = tf.keras.layers.Dropout(dropout)
        self.dropout_2 = tf.keras.layers.Dropout(dropout)
        # mask for self-attention
        self.mask = tf.convert_to_tensor(np.tril(np.ones([n_timesteps, n_timesteps]), 0), dtype=tf.float32)

        
    def call(self, inputs, training):
        # mask for self-attention is passed to MHA on call
        attention_output = self.attention_layer([inputs, inputs], mask=self.mask)
        attention_output = self.dropout_1(attention_output, training=training)
        input_to_ffn = self.add_norm_layer_1(inputs + attention_output)
        ffn_output = self.ff_layer(input_to_ffn)
        ffn_output = self.dropout_2(ffn_output, training=training)
        return self.add_norm_layer_2(input_to_ffn + ffn_output)

In [10]:
ts = pd.read_pickle("/home/developer/gcp/cbidmltsf/timeseries/CPE04115_H_kw_20201021084001/ts.pkl")

In [11]:
ts.head()

Unnamed: 0_level_0,kw_scaled
timestamp,Unnamed: 1_level_1
2016-01-01 00:00:00,0.274317
2016-01-01 01:00:00,0.217363
2016-01-01 02:00:00,0.168545
2016-01-01 03:00:00,0.122996
2016-01-01 04:00:00,0.08044


In [12]:
# length of time series
total_lectures = ts['kw_scaled'].count()
total_lectures

22629

In [13]:
# build a training dataset
# features: m consecutive lectures with their timestamps
# target: m consecutive lectures (lectures in features, shifted by 1 to the future)

In [14]:
# FEATURE ENGINEERING

In [15]:
# define the embedding dimension, the lenght of tranining, evaluation, and test datasets
# use the most of the 22K+ lectures in the original time series

In [16]:
# length of input sequence, hours in a week
m = 168

In [17]:
# prepare sine-cosine positional encoding for the input sequence
hours_in_day = 24
days_in_month = 30
months_in_year = 12

In [18]:
total_lectures*np.array([0.7, 0.2, 0.1])

array([15840.3,  4525.8,  2262.9])

In [19]:
# from the previous dataset split, use the following indexes for building datasets

# 15000 rows in training dataset
train_start = 0
train_end = 15000

# 4000 rows in evaluation dataset
eval_start = 16000
eval_end = 20000

# 1000 rows in test dataset
test_start = 21000
test_end = 22000

In [26]:
# get a collection of examples for training
# use a range on the time series index

train_range = np.arange(train_start, train_end)

features_list = list()
targets_list = list()

for start in train_range:
    end = start + m
    values = np.expand_dims(ts[start:end]['kw_scaled'].values, axis=1)
    target_values = np.expand_dims(ts[1+start:1+end]['kw_scaled'].values, axis=1)
    
    timestamps_hour = ts[start:end].index.hour
    timestamps_day = ts[start:end].index.day
    timestamps_month = ts[start:end].index.month
    
    sin_hour = np.expand_dims(np.sin(2*np.pi*timestamps_hour/hours_in_day), axis=1)
    cos_hour = np.expand_dims(np.sin(2*np.pi*timestamps_hour/hours_in_day), axis=1)
    sin_day = np.expand_dims(np.sin(2*np.pi*timestamps_day/days_in_month), axis=1)
    cos_day = np.expand_dims(np.cos(2*np.pi*timestamps_day/days_in_month), axis=1)
    sin_month = np.expand_dims(np.sin(2*np.pi*timestamps_month/months_in_year), axis=1)
    cos_month = np.expand_dims(np.cos(2*np.pi*timestamps_month/months_in_year), axis=1)
    
    feature_row = np.concatenate((values, sin_hour, cos_hour, sin_day, cos_day, sin_month, cos_month), axis=1)    

    features_list.append(feature_row)
    targets_list.append(target_values)

x_train = np.array(features_list)
y_train = np.array(targets_list)

In [69]:
# save the training NumPy arrays
with open('data/x_train.npy', 'wb') as filename:
    np.save(filename, x_train)

with open('data/y_train.npy', 'wb') as filename:
    np.save(filename, y_train)

In [20]:
# load the training NumPy arrays
with open('data/x_train.npy', 'rb') as filename:
    x_train = np.load(filename)

with open('data/y_train.npy', 'rb') as filename:
    y_train = np.load(filename)

In [21]:
x_train.shape, y_train.shape

((15000, 168, 7), (15000, 168, 1))

In [30]:
# get a collection of examples for evaluation
# use a range on the time series index
eval_range = np.arange(eval_start, eval_end)

features_list = list()
targets_list = list()

for start in eval_range:
    end = start + m
    values = np.expand_dims(ts[start:end]['kw_scaled'].values, axis=1)
    target_values = np.expand_dims(ts[1+start:1+end]['kw_scaled'].values, axis=1)
    
    timestamps_hour = ts[start:end].index.hour
    timestamps_day = ts[start:end].index.day
    timestamps_month = ts[start:end].index.month
    
    sin_hour = np.expand_dims(np.sin(2*np.pi*timestamps_hour/hours_in_day), axis=1)
    cos_hour = np.expand_dims(np.sin(2*np.pi*timestamps_hour/hours_in_day), axis=1)
    sin_day = np.expand_dims(np.sin(2*np.pi*timestamps_day/days_in_month), axis=1)
    cos_day = np.expand_dims(np.cos(2*np.pi*timestamps_day/days_in_month), axis=1)
    sin_month = np.expand_dims(np.sin(2*np.pi*timestamps_month/months_in_year), axis=1)
    cos_month = np.expand_dims(np.cos(2*np.pi*timestamps_month/months_in_year), axis=1)
    
    feature_row = np.concatenate((values, sin_hour, cos_hour, sin_day, cos_day, sin_month, cos_month), axis=1)    

    features_list.append(feature_row)
    targets_list.append(target_values)

x_eval = np.array(features_list)
y_eval = np.array(targets_list)

In [67]:
# save the evaluation NumPy arrays
with open('data/x_eval.npy', 'wb') as filename:
    np.save(filename, x_eval)

with open('data/y_eval.npy', 'wb') as filename:
    np.save(filename, y_eval)

In [22]:
# load the evaluation NumPy arrays
with open('data/x_eval.npy', 'rb') as filename:
    x_eval = np.load(filename)

with open('data/y_eval.npy', 'rb') as filename:
    y_eval = np.load(filename)

In [23]:
x_eval.shape, y_eval.shape

((4000, 168, 7), (4000, 168, 1))

In [62]:
# get a collection of examples for test
# use a range on the time series index
test_range = np.arange(test_start, test_end)

features_list = list()
targets_list = list()

for start in test_range:
    end = start + m
    values = np.expand_dims(ts[start:end]['kw_scaled'].values, axis=1)
    target_values = np.expand_dims(ts[1+start:1+end]['kw_scaled'].values, axis=1)
    
    timestamps_hour = ts[start:end].index.hour
    timestamps_day = ts[start:end].index.day
    timestamps_month = ts[start:end].index.month
    
    sin_hour = np.expand_dims(np.sin(2*np.pi*timestamps_hour/hours_in_day), axis=1)
    cos_hour = np.expand_dims(np.sin(2*np.pi*timestamps_hour/hours_in_day), axis=1)
    sin_day = np.expand_dims(np.sin(2*np.pi*timestamps_day/days_in_month), axis=1)
    cos_day = np.expand_dims(np.cos(2*np.pi*timestamps_day/days_in_month), axis=1)
    sin_month = np.expand_dims(np.sin(2*np.pi*timestamps_month/months_in_year), axis=1)
    cos_month = np.expand_dims(np.cos(2*np.pi*timestamps_month/months_in_year), axis=1)
    
    feature_row = np.concatenate((values, sin_hour, cos_hour, sin_day, cos_day, sin_month, cos_month), axis=1)    

    features_list.append(feature_row)
    targets_list.append(target_values)

x_test = np.array(features_list)
y_test = np.array(targets_list)

In [65]:
# save the test NumPy arrays
with open('data/x_test.npy', 'wb') as filename:
    np.save(filename, x_test)
    
with open('data/y_test.npy', 'wb') as filename:
    np.save(filename, y_test)

In [24]:
# load the test NumPy arrays
with open('data/x_test.npy', 'rb') as filename:
    x_test = np.load(filename)

with open('data/y_test.npy', 'rb') as filename:
    y_test = np.load(filename)

In [25]:
x_test.shape, y_test.shape

((1000, 168, 7), (1000, 168, 1))

In [26]:
# architecture details according to the Klingenbrunn experiment
# (including notes to further modifications on the basic autoregressive model)

In [85]:
# number of timesteps is the length of the input sequence,
# is the embedding dimension from SLDB
num_timesteps = m

In [86]:
# number of features is the active load value (main feature)
# plus the six components of the sine-cosine positional encoding on hour, day, month

# important: there is no value embedding, therefore d_model is very low
d_model = 7

# ToDo: use value embedding to a high-dimensional space and compare results
# ToDo: use a different positional encoding system and compare results

In [87]:
# as long as there is no value embedding, neither convolutional nor dense layers are required

In [88]:
# input layer for Keras functional
input_layer = tf.keras.layers.Input(shape=(num_timesteps, d_model))
input_layer

<KerasTensor: shape=(None, 168, 7) dtype=float32 (created by layer 'input_2')>

In [89]:
input_to_transformer_block = input_layer
input_to_transformer_block

<KerasTensor: shape=(None, 168, 7) dtype=float32 (created by layer 'input_2')>

In [90]:
num_heads = 2
ff_dim = 1024
dropout = 0.2

In [91]:
encoder_layer_1 = EncoderLayer(n_timesteps=num_timesteps,
                               embed_dim=d_model,
                               num_heads=num_heads,
                               ff_dim=ff_dim,
                               dropout=dropout)



In [92]:
encoder_layer_2 = EncoderLayer(n_timesteps=num_timesteps,
                               embed_dim=d_model,
                               num_heads=num_heads,
                               ff_dim=ff_dim,
                               dropout=dropout)



In [93]:
output_from_encoder_1 = encoder_layer_1(input_to_transformer_block)
output_from_encoder_1

<KerasTensor: shape=(None, 168, 7) dtype=float32 (created by layer 'encoder_layer_2')>

In [94]:
output_from_encoder_2 = encoder_layer_2(output_from_encoder_1)
output_from_encoder_2

<KerasTensor: shape=(None, 168, 7) dtype=float32 (created by layer 'encoder_layer_3')>

In [95]:
# Klingenbrunn uses a linear layer to decode the output_from_encoder
# from (?, num_timesteps, num_features) to (?, num_timesteps, 1)

# the equivalent operation in TensorFlow is a TimeDistributed Dense layer to 1

In [96]:
units_in_first_dense = 1
first_dense = tf.keras.layers.Dense(units_in_first_dense, activation="sigmoid")

In [97]:
distributed_first_dense = tf.keras.layers.TimeDistributed(first_dense)(output_from_encoder_2)
distributed_first_dense

<KerasTensor: shape=(None, 168, 1) dtype=float32 (created by layer 'time_distributed_1')>

In [98]:
model = tf.keras.Model(inputs=input_layer, outputs=distributed_first_dense)

In [99]:
model.compile("adam", "mse", metrics=[tf.keras.metrics.RootMeanSquaredError()])

In [100]:
history = model.fit(
    x_train, y_train, batch_size=32, epochs=100, validation_data=(x_eval, y_eval)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [101]:
# inference process on unseen data, that means on the test dataset

In [102]:
# define a forecast window to guide the iterative prediction process
# start with a hourly, day-ahead process
forecast_window = 24

In [103]:
# use naming conventions from Klingenbrunn

In [104]:
# the true active load values, plus the six positional encodings
# in the first row of the test dataset features (original ts indexes are 21000-21167)
src = tf.expand_dims(x_test[0, :, :], axis=0)
src.shape

TensorShape([1, 168, 7])

In [105]:
# the true active load values, plus the six positional encodings
# in the forecast-window-sized timesteps following the source (original ts indexes are 21168-21191)

In [106]:
target = tf.expand_dims(x_test[168, :forecast_window, :], axis=0)
target.shape

TensorShape([1, 24, 7])

In [107]:
# verify that active load values in the source and the target are contiguous in the time series
# that means, target is composed with the first timesteps of features
# in the rows 168-191 of test dataset

In [108]:
x_test[168:192, 0, 0]

array([0.26612255, 0.23641237, 0.24432548, 0.32326286, 0.36856605,
       0.45449848, 0.56519907, 0.68640037, 0.74005594, 0.84452149,
       0.86219931, 0.86331182, 0.87358708, 0.94115296, 0.89783385,
       0.90333212, 0.8544241 , 0.89447384, 0.90186633, 0.83024086,
       0.67798756, 0.54712923, 0.42006322, 0.33738621])

In [109]:
target[0, :, 0]

<tf.Tensor: shape=(24,), dtype=float64, numpy=
array([0.26612255, 0.23641237, 0.24432548, 0.32326286, 0.36856605,
       0.45449848, 0.56519907, 0.68640037, 0.74005594, 0.84452149,
       0.86219931, 0.86331182, 0.87358708, 0.94115296, 0.89783385,
       0.90333212, 0.8544241 , 0.89447384, 0.90186633, 0.83024086,
       0.67798756, 0.54712923, 0.42006322, 0.33738621])>

In [110]:
# therefore, source and target tensors are ready for inference process

In [111]:
next_input_model = src

In [112]:
# do not use the all_predictions array from Klingenbrunn, just the predictions on the forecast window
predictions_list = list()

In [113]:
# once tested, put the following lines on an iterative cycle
# for i in range(forecast_window -1): ??

In [114]:
for i in range(forecast_window):
    
    # get a prediction (1, 168, 1) from input (1, 168, 7)
    prediction = model.predict(
        next_input_model, batch_size=None, verbose=0, steps=None, callbacks=None, max_queue_size=10,
        workers=1, use_multiprocessing=False
    )
    
    # get the value of the most recent prediction (last timestep) into the predictions list
    predictions_list.append(prediction[:, -1, :][0][0])
    
    # from the source tensor, get the positional encodings for ti+1 to t167 (that is 168-i-1 values)
    pos_encoding_old_values = src[:, i+1:, 1:]

    # from target tensor, get the positional encodings for t168 to t168+i (that is i+1 values)
    pos_encoding_new_val = target[:, :i+1, 1:]

    # build new positional encodings with 168 values
    pos_encodings = tf.concat([pos_encoding_old_values, pos_encoding_new_val], axis=1)
    pos_encodings = tf.cast(pos_encodings, dtype=tf.float32)

    # build the values feature for the next input to the model
    # pop i+1 values at the beginning of the previous input
    value_feature_old_values = tf.expand_dims(src[:, i+1:, 0], axis=-1)
    value_feature_old_values = tf.cast(value_feature_old_values, dtype=tf.float32)

    # current predictions_list to NumPy array
    value_feature_new_values = np.array(predictions_list[:i+1])

    # current prediction array to tensor
    value_feature_new_values = tf.convert_to_tensor(value_feature_new_values)

    # expand dimensions of current prediction tensor to single-value feature
    value_feature_new_values = tf.expand_dims(value_feature_new_values, axis=-1)

    # expand dimensions of current prediction tensor to single-value batch
    value_feature_new_values = tf.expand_dims(value_feature_new_values, axis=0)
    
    # build the value feature tensor
    next_input_model = tf.concat([value_feature_old_values, value_feature_new_values], axis=1)
    
    # build the next input tensor for the model
    next_input_model = tf.concat([next_input_model, pos_encodings], axis=2)




In [115]:
# review results

In [116]:
np.array(predictions_list)

array([0.27063882, 0.24955139, 0.26786205, 0.32904452, 0.3623683 ,
       0.40988988, 0.4909711 , 0.5777742 , 0.65716684, 0.73376966,
       0.78400385, 0.82222307, 0.8517444 , 0.8649932 , 0.8633987 ,
       0.8349567 , 0.8171438 , 0.82490766, 0.8359086 , 0.7671213 ,
       0.6175975 , 0.47953865, 0.37069687, 0.31055808], dtype=float32)

In [117]:
np.array(target[0, :, 0])

array([0.26612255, 0.23641237, 0.24432548, 0.32326286, 0.36856605,
       0.45449848, 0.56519907, 0.68640037, 0.74005594, 0.84452149,
       0.86219931, 0.86331182, 0.87358708, 0.94115296, 0.89783385,
       0.90333212, 0.8544241 , 0.89447384, 0.90186633, 0.83024086,
       0.67798756, 0.54712923, 0.42006322, 0.33738621])

In [118]:
# symmetrical mean absolute percentage error
def smape(targets, predictions):
    '''
    predictions: a list with the predicted values
    targets: a list with the actual values
    '''
    import numpy as np
    # lists to NumPy arrays
    targets, predictions = np.array(targets), np.array(predictions)
    # verify predictions and targets have the same shape
    if predictions.shape == targets.shape:
            return(np.sum(2*np.abs(predictions - targets) /
                          (np.abs(targets) + np.abs(predictions)))/predictions.shape[0])

In [119]:
smape(np.array(target[0, :, 0]), np.array(predictions_list))

0.08151349767533242