<a href="https://colab.research.google.com/github/YangyangFu/transformer-time-series/blob/main/examples/train_informer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import os
import random
import numpy as np

# GPU/TPOU setup


In [2]:
runtime = "GPU"
if runtime == "TPU":
  resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
  tf.config.experimental_connect_to_cluster(resolver)
  # This is the TPU initialization code that has to be at the beginning.
  tf.tpu.experimental.initialize_tpu_system(resolver)
  print("All devices: ", tf.config.list_logical_devices('TPU'))
else:
  pass


# Install Dependency

In [3]:
!pip install git+https://github.com/YangyangFu/transformer-time-series@main

Collecting git+https://github.com/YangyangFu/transformer-time-series@main
  Cloning https://github.com/YangyangFu/transformer-time-series (to revision main) to /tmp/pip-req-build-aai9vrwg
  Running command git clone --filter=blob:none --quiet https://github.com/YangyangFu/transformer-time-series /tmp/pip-req-build-aai9vrwg
  Resolved https://github.com/YangyangFu/transformer-time-series to commit ba1a9ffc0f4d36db3a28340bbf6a8d8c930191f7
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: tsl
  Building wheel for tsl (setup.py) ... [?25l[?25hdone
  Created wheel for tsl: filename=tsl-1.0-py3-none-any.whl size=41953 sha256=f7d1547ab5a9739bd814abee25b4dbb1fc7a3ce4926bd6547ded521af4b7a870
  Stored in directory: /tmp/pip-ephem-wheel-cache-rkt01pdg/wheels/1e/96/45/2b21250d4fcf59625a099e8fb90e3b43278918abd7b7747e49
Successfully built tsl
Installing collected packages: tsl
Successfully installed tsl-1.0


In [4]:
from tsl.dataloader.batch_on_time import DataLoader
from tsl.transformers.informer import Informer
from tsl.utils.utils import seed_everything

seed_everything(42)


Random seed set as 42


# Download Data

In [5]:
!apt install subversion
!svn checkout https://github.com/YangyangFu/transformer-time-series/trunk/datasets-raw
!bash ./datasets-raw/download_data.sh

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libapr1 libaprutil1 libserf-1-1 libsvn1 libutf8proc2
Suggested packages:
  db5.3-util libapache2-mod-svn subversion-tools
The following NEW packages will be installed:
  libapr1 libaprutil1 libserf-1-1 libsvn1 libutf8proc2 subversion
0 upgraded, 6 newly installed, 0 to remove and 16 not upgraded.
Need to get 2,672 kB of archives.
After this operation, 10.5 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 libapr1 amd64 1.7.0-8ubuntu0.22.04.1 [108 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 libaprutil1 amd64 1.6.1-5ubuntu4.22.04.1 [92.6 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libserf-1-1 amd64 1.3.9-10ubuntu2 [50.0 kB]
Get:4 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libutf8proc2 amd64 2.7.0-3 [73.9 kB]
Get:5 http://archive.

# Experiment Settings


In [29]:
embed_dim = 512
source_seq_len = 360
pred_len = 24
target_seq_len = 168 + pred_len
target_cols=['HUFL','HULL','MUFL','MULL','LUFL','LULL','OT']
num_cov_cols=['HUFL','HULL','MUFL','MULL','LUFL','LULL','OT']
n_num_covs = len(num_cov_cols)
n_targets = len(target_cols)

MAX_EPOCHS = 10

# Create Data Loader

In [32]:
# get data path
data_path = "./ETT-small/ETTh1"

ts_file = 'ts.joblib'

# create dataloader
dataloader = DataLoader(
        data_path,
        ts_file,
        num_cov_global_file=None,
        cat_cov_global_file=None,
        num_cov_local_variant_file=[],
        cat_cov_local_variant_file=[],
        num_cov_local_invariant_file=[],
        cat_cov_local_invariant_file=[],
        num_cov_local_variant_names=[],
        cat_cov_local_variant_names=[],
        target_cols=target_cols,
        train_range=(0, 24*30*12),
        val_range=(24*30*12, 24*30*16),
        test_range=(24*30*16, 24*30*20),
        hist_len=source_seq_len,
        token_len=target_seq_len-pred_len,
        pred_len=pred_len,
        batch_size=32,
        freq='H',
        normalize=True,
        use_time_features=True,
        use_holiday=True,
        use_holiday_distance=False,
        normalize_time_features=False,
        use_history_for_covariates=False
)

train_ds = dataloader.generate_dataset(mode="train", shuffle=True, seed=1)
val_ds = dataloader.generate_dataset(mode="validation", shuffle=False, seed=1)
test_ds = dataloader.generate_dataset(mode="test", shuffle=False, seed=1)


Generating time features.................


100%|██████████| 17420/17420 [04:15<00:00, 68.23it/s]


In [33]:
# create informer model
model = Informer(output_dim=n_targets,
                pred_len=pred_len,
                num_layers_encoder=2,
                num_heads_encoder=8,
                key_dim_encoder=64,
                value_dim_encoder=64,
                output_dim_encoder=512,
                hidden_dim_encoder=2048,
                factor_encoder=5,
                num_layers_decoder=1,
                num_heads_decoder=8,
                key_dim_decoder=64,
                value_dim_decoder=64,
                output_dim_decoder=512,
                hidden_dim_decoder=2048,
                factor_decoder=5,
                num_cat_cov=0,
                cat_cov_embedding_size=[],
                cat_cov_embedding_dim=16,
                freq='H',
                use_holiday=True,
                dropout_rate=0.2,)

# training settings
loss_fn = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

train_metrics = [tf.keras.metrics.MeanAbsoluteError()]
val_metrics = [tf.keras.metrics.MeanAbsoluteError()]

# train step
@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        x_enc, x_dec = x
        y_pred = model(x_enc, x_dec, training=True)
        loss = loss_fn(y, y_pred)

    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    # update metrics
    for metric in train_metrics:
            metric.update_state(y, y_pred)
    return loss

# validation step
@tf.function
def val_step(x, y):
    x_enc, x_dec = x
    y_pred = model(x_enc, x_dec, training=False)
    loss = loss_fn(y, y_pred)
    for metric in val_metrics:
        metric.update_state(y, y_pred)
    return loss

In [None]:
# main loop
for epoch in range(MAX_EPOCHS):
    # take a batch
    for batch in train_ds:
        enc, dec = batch
        (ts_enc, num_global_enc, cat_global_enc, 
         num_local_variant_enc, cat_local_variant_enc, 
         num_local_invariant_enc, cat_local_invariant_enc, time_features_enc) = enc
        (ts_dec, num_global_dec, cat_global_dec, 
         num_local_variant_dec, cat_local_variant_dec, 
         num_local_invariant_dec, cat_local_invariant_dec, time_features_dec) = dec
                
        #try: 
        # zero for target 
        token_dec = ts_dec[:, :-pred_len, :]
        zeros = tf.zeros_like(ts_dec[:, -pred_len:, :])
        token_target_dec = tf.concat([token_dec, zeros], axis=1)
        
        # feed model
        x_enc = (ts_enc, None, time_features_enc)
        x_dec = (time_features_dec, token_target_dec)
        
        # train step
        loss = train_step((x_enc, x_dec), ts_dec[:, -pred_len:, :])

    # print loss every epoch
    print(f"Epoch {epoch+1}/{MAX_EPOCHS} training loss: {loss:.4f}, MAE: {train_metrics[0].result():.4f}")

    # reset train metrics
    for metric in train_metrics:
        metric.reset_states()

    # run validation loop
    # how to run validaiton loop without batching?
    for val_batch in val_ds:
        enc, dec = val_batch
        (ts_enc, num_global_enc, cat_global_enc, 
         num_local_variant_enc, cat_local_variant_enc, 
         num_local_invariant_enc, cat_local_invariant_enc, time_features_enc) = enc
        (ts_dec, num_global_dec, cat_global_dec, 
         num_local_variant_dec, cat_local_variant_dec, 
         num_local_invariant_dec, cat_local_invariant_dec, time_features_dec) = dec
        

        # zero for target 
        token_dec = ts_dec[:, :-pred_len, :]
        zeros = tf.zeros_like(ts_dec[:, -pred_len:, :])
        token_target_dec = tf.concat([token_dec, zeros], axis=1)
        
        # feed model
        x_enc = (ts_enc, None, time_features_enc)
        x_dec = (time_features_dec, token_target_dec)
        
        # calculate loss
        loss_val = val_step((x_enc, x_dec), ts_dec[:, -pred_len:, :])

    # print loss every epoch
    print(f"Epoch {epoch+1}/{MAX_EPOCHS} validation loss: {loss_val:.4f}, MAE: {val_metrics[0].result():.4f}")

    # reset val metrics
    for metric in val_metrics:
        metric.reset_states()

Epoch 1/50 training loss: 0.4252, MAE: 0.9885
Epoch 1/50 validation loss: 8.1311, MAE: 2.2756
Epoch 2/50 training loss: 0.3818, MAE: 0.4422
Epoch 2/50 validation loss: 0.0345, MAE: 0.5210
Epoch 3/50 training loss: 0.3992, MAE: 0.4024
Epoch 3/50 validation loss: 0.0736, MAE: 0.7503
Epoch 4/50 training loss: 0.3198, MAE: 0.3772
Epoch 4/50 validation loss: 1.5730, MAE: 1.6392
Epoch 5/50 training loss: 0.3401, MAE: 0.3641
Epoch 5/50 validation loss: 0.4194, MAE: 1.4245


# Test

In [14]:
test_metrics = [tf.keras.metrics.MeanSquaredError(), tf.keras.metrics.MeanAbsoluteError()]

# test step
@tf.function
def test_step(x, y):
    x_enc, x_dec = x
    y_pred = model(x_enc, x_dec, training=False)
    loss = loss_fn(y, y_pred)
    for metric in test_metrics:
        metric.update_state(y, y_pred)
    return loss


In [17]:
# reset metrics
for metric in val_metrics:
    metric.reset_states()

for test_batch in test_ds:
    enc, dec = test_batch
    (ts_enc, num_global_enc, cat_global_enc, 
        num_local_variant_enc, cat_local_variant_enc, 
        num_local_invariant_enc, cat_local_invariant_enc, time_features_enc) = enc
    (ts_dec, num_global_dec, cat_global_dec, 
        num_local_variant_dec, cat_local_variant_dec, 
        num_local_invariant_dec, cat_local_invariant_dec, time_features_dec) = dec
    

    # zero for target 
    token_dec = ts_dec[:, :-pred_len, :]
    zeros = tf.zeros_like(ts_dec[:, -pred_len:, :])
    token_target_dec = tf.concat([token_dec, zeros], axis=1)

    # feed model
    x_enc = (ts_enc, None, time_features_enc)
    x_dec = (time_features_dec, token_target_dec)

    # calculate loss
    loss_test = test_step((x_enc, x_dec), ts_dec[:, -pred_len:, :])
    
# print loss every epoch
print(f"Test loss: {test_metrics[0].result():.4f}, MAE: {test_metrics[1].result():.4f}")



Test loss: 0.4283, MAE: 0.5234
