# Time Series Prediction

In [2]:
import sys
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Add project root directory to the path
sys.path.append('/content/drive/My Drive/CS7643 - Final Project')

## Create Conda Environment

In [5]:
!pip install --upgrade pip
!pip install darts==0.29.0
!pip install optuna==2.0.0

Collecting pip
  Downloading pip-24.0-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
Successfully installed pip-24.0
Collecting darts==0.29.0
  Downloading darts-0.29.0-py3-none-any.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Collecting nfoursid>=1.0.0 (from darts==0.29.0)
  Downloading nfoursid-1.0.1-py3-none-any.whl.metadata (1.9 kB)
Collecting pmdarima>=1.8.0 (from darts==0.29.0)
  Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (7.8 kB)
Collecting pyod>=0.9.5 (from darts==0.29.0)
  Downloading pyod-1.1.3.tar.gz (160 kB)
[2K     [90m━━━━━━━━

## LSTM Model

In [6]:
import pickle
import numpy as np
import pandas as pd
from models import LSTMModel
from darts.metrics import rmsle

INPUT_CHUNK_LENGTH = 131
VAL_LEN = 16 # days
BATCH_SIZE = 128
N_EPOCH = 1
HIDDEN_DIM = 39
RNN_LAYERS = 3
DROPOUT = 0
MAX_SAMPLES_PER_TS = 60
LR = 0.0019971227090605087

In [7]:
# Load Model Covariates and Inputs
with open('/content/drive/My Drive/CS7643 - Final Project/data/LSTM_covariates.pkl', 'rb') as f:
    LSTM_covariates = pickle.load(f)
with open('/content/drive/My Drive/CS7643 - Final Project/data/LSTM_target.pkl', 'rb') as f:
    LSTM_target = pickle.load(f)
with open('/content/drive/My Drive/CS7643 - Final Project/data/LSTM_train.pkl', 'rb') as f:
    LSTM_train = pickle.load(f)

In [11]:
val_set = [s[-((2 * VAL_LEN) + INPUT_CHUNK_LENGTH) : -VAL_LEN] for s in LSTM_target]

lstm_model = LSTMModel(val_len=VAL_LEN, batch_size=BATCH_SIZE, lr=LR, n_epoch=N_EPOCH,
                  hidden_dim=HIDDEN_DIM, n_rnn_layers=RNN_LAYERS,
                  dropout=DROPOUT)

model = lstm_model.build(input_chunk_length=INPUT_CHUNK_LENGTH)

model.fit(
        series=LSTM_train,
        val_series=val_set,
        future_covariates=LSTM_covariates,
        val_future_covariates=LSTM_covariates,
        max_samples_per_ts=MAX_SAMPLES_PER_TS,
        num_loader_workers=4,
    )

# Reload best model
model = model.load_from_checkpoint("lstm_model")

# Generate Forecasts for the Test Data
test_data = [ts[:-16] for ts in LSTM_target]
preds = model.predict(series=test_data, future_covariates=LSTM_covariates, n=VAL_LEN)

with open('/content/drive/My Drive/CS7643 - Final Project/data/train_pipeline.pkl', 'rb') as file:
    train_pipeline = pickle.load(file)
with open('/content/drive/My Drive/CS7643 - Final Project/data/actual_series.pkl', 'rb') as file:
    actual_series = pickle.load(file)

# Transform Back
forecasts_back = train_pipeline.inverse_transform(preds, partial=True)

# Zero Forecasting
for n in range(0,len(forecasts_back)):
  if (LSTM_target[n][:-16].univariate_values()[-14:] == 0).all():
        forecasts_back[n] = forecasts_back[n].map(lambda x: x * 0)

LSTM_rmsle = rmsle(actual_series = actual_series,
                   pred_series = forecasts_back,
                   n_jobs = -1,
                   series_reduction=np.mean)

print("The mean RMSLE for the Global LSTM Model over 1782 series is {:.5f}.".format(LSTM_rmsle))


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 35.4 K
4 | V             | Linear           | 40    
---------------------------------------------------
35.5 K    Trainable params
0         Non-trainable params
35.5 K    Total params
0.142     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()


Training: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved. New best score: 0.015
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  pid = os.fork()


The mean RMSLE for the Global LSTM Model over 1782 series is 0.41740.


## N-HiTS Model

In [14]:
from models import NHitsModel

INPUT_CHUNK_LENGTH = 266
OUTPUT_CHUNK_LENGTH = 16
VAL_LEN = 16 # days
NUM_STACKS = 3
NUM_BLOCK = 3
NUM_LAYERS = 2
LAYER_EXP = 8
BATCH_SIZE = 128
N_EPOCH = 1
DROPOUT = 0.01
MAX_SAMPLES_PER_TS = 180
LR = 0.002996870143374216


# Load Model Covariates and Inputs
with open('/content/drive/My Drive/CS7643 - Final Project/data/NHiTS_covariates.pkl', 'rb') as f:
    NHiTS_covariates = pickle.load(f)
with open('/content/drive/My Drive/CS7643 - Final Project/data/training_transformed.pkl', 'rb') as f:
    training_transformed = pickle.load(f)
with open('/content/drive/My Drive/CS7643 - Final Project/data/NHiTS_train.pkl', 'rb') as f:
    NHiTS_train = pickle.load(f)


val_set = [s[-((2 * VAL_LEN) + INPUT_CHUNK_LENGTH) : -VAL_LEN] for s in training_transformed]


nhits_model = NHitsModel(batch_size=BATCH_SIZE, lr=LR,
                         n_epoch=N_EPOCH, num_stack=NUM_STACKS,
                         num_blocks=NUM_BLOCK, num_layers=NUM_LAYERS,
                         layer_exp=LAYER_EXP, dropout=DROPOUT)


model = nhits_model.build(input_chunk_length=INPUT_CHUNK_LENGTH,
                          output_chunk_length=OUTPUT_CHUNK_LENGTH)


model.fit(
        series=NHiTS_train,
        val_series=val_set,
        past_covariates=NHiTS_covariates,
        val_past_covariates=NHiTS_covariates,
        max_samples_per_ts=MAX_SAMPLES_PER_TS,
        num_loader_workers=4,
    )

# Reload best model
model = model.load_from_checkpoint("nhits_model")

# Generate Forecasts for the Test Data
test_data = [ts[:-16] for ts in training_transformed]

preds = model.predict(series=test_data, past_covariates=NHiTS_covariates, n=VAL_LEN)

with open('/content/drive/My Drive/CS7643 - Final Project/data/train_pipeline.pkl', 'rb') as file:
    train_pipeline = pickle.load(file)
with open('/content/drive/My Drive/CS7643 - Final Project/data/actual_series.pkl', 'rb') as file:
    actual_series = pickle.load(file)

# Transform Back
forecasts_back = train_pipeline.inverse_transform(preds, partial=True)

# Zero Forecasting
for n in range(0,len(forecasts_back)):
  if (actual_series[n][:-16].univariate_values()[-14:] == 0).all():
        forecasts_back[n] = forecasts_back[n].map(lambda x: x * 0)

NHiTS_rmsle = rmsle(actual_series = actual_series,
                   pred_series = forecasts_back,
                   n_jobs = -1,
                   series_reduction=np.mean)

print("The mean RMSLE for the Global NHiTS Model over 1782 series is {:.5f}.".format(NHiTS_rmsle))

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | stacks        | ModuleList       | 15.6 M
---------------------------------------------------
13.8 M    Trainable params
1.8 M     Non-trainable params
15.6 M    Total params
62.458    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()


Training: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved. New best score: 0.014
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  pid = os.fork()


The mean RMSLE for the Global NHiTS Model over 1782 series is 0.42961.


## TFT Model

In [15]:
from models import TFTModel

INPUT_CHUNK_LENGTH = 230
OUTPUT_CHUNK_LENGTH = 16
VAL_LEN = 16 # days
LSTM_LAYERS = 3
HIDDEN_SIZE = 16
N_HEAD = 4
FULL_ATTENTION = True
HIDDEN_CONT_SIZE = 16
BATCH_SIZE = 128
N_EPOCH = 1
DROPOUT = 0.01
MAX_SAMPLES_PER_TS = 7
LR = 0.009912733600616069


# Load Model Covariates and Inputs
with open('/content/drive/My Drive/CS7643 - Final Project/data/TFT_train.pkl', 'rb') as f:
    train = pickle.load(f)
with open('/content/drive/My Drive/CS7643 - Final Project/data/TFT_past_cov.pkl', 'rb') as f:
    tft_past_cov = pickle.load(f)
with open('/content/drive/My Drive/CS7643 - Final Project/data/TFT_fut_cov.pkl', 'rb') as f:
    tft_fut_cov = pickle.load(f)
with open('/content/drive/My Drive/CS7643 - Final Project/data/training_transformed.pkl', 'rb') as f:
    training_transformed = pickle.load(f)

val_set = [s[-((2 * VAL_LEN) + INPUT_CHUNK_LENGTH) : -VAL_LEN] for s in training_transformed]

tft_model = TFTModel(batch_size=BATCH_SIZE, lr=LR,
                     n_epoch=N_EPOCH, hidden_size=HIDDEN_SIZE,
                     lstm_layers=LSTM_LAYERS, n_head=N_HEAD,
                     full_attention=FULL_ATTENTION, dropout=DROPOUT,
                     hidden_continuous_size=HIDDEN_CONT_SIZE)


model = tft_model.build(input_chunk_length=INPUT_CHUNK_LENGTH,
                        output_chunk_length=OUTPUT_CHUNK_LENGTH)


model.fit(
    series=train,
    val_series=val_set,
    past_covariates=tft_past_cov,
    val_past_covariates=tft_past_cov,
    future_covariates=tft_fut_cov,
    val_future_covariates=tft_fut_cov,
    max_samples_per_ts=MAX_SAMPLES_PER_TS,
    num_loader_workers=4,
)

# Reload best model
model = model.load_from_checkpoint("tft_model")

# Generate Forecasts for the Test Data
test_data = [ts[:-16] for ts in training_transformed]

preds = model.predict(series=test_data, past_covariates=tft_past_cov, future_covariates=tft_fut_cov, n=VAL_LEN)

with open('/content/drive/My Drive/CS7643 - Final Project/data/train_pipeline.pkl', 'rb') as file:
    train_pipeline = pickle.load(file)
with open('/content/drive/My Drive/CS7643 - Final Project/data/actual_series.pkl', 'rb') as file:
    actual_series = pickle.load(file)

# Transform Back
forecasts_back = train_pipeline.inverse_transform(preds, partial=True)

# Zero Forecasting
for n in range(0,len(forecasts_back)):
  if (actual_series[n][:-16].univariate_values()[-14:] == 0).all():
        forecasts_back[n] = forecasts_back[n].map(lambda x: x * 0)

TFT_rmsle = rmsle(actual_series = actual_series,
                   pred_series = forecasts_back,
                   n_jobs = -1,
                   series_reduction=np.mean)

print("The mean RMSLE for the Global TFT Model over 1782 series is {:.5f}.".format(TFT_rmsle))


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name                              | Type                             | Params
----------------------------------------------------------------------------------------
0  | criterion                         | MSELoss                          | 0     
1  | train_metrics                     | MetricCollection                 | 0     
2  | val_metrics                       | MetricCollection                 | 0     
3  | input_embeddings                  | _MultiEmbedding                  | 0     
4  | static_covariates_vsn             | 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()


Training: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved. New best score: 0.093
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=1` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


The mean RMSLE for the Global TFT Model over 1782 series is 1.17080.
