# Demand forecasting with the Temporal Fusion Transformer


In [1]:
import os
import warnings

warnings.filterwarnings("ignore")  # avoid printing out absolute paths

os.chdir("../../..")

In [2]:
import copy
from pathlib import Path
import warnings

import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger
import numpy as np
import pandas as pd
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader, random_split

In [4]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
import statsmodels.tsa.api as tsa
import matplotlib.pyplot as plt
import sympy

In [5]:
path = os.getcwd() + '/docs/source/tutorials/project_data/'

# Predicting

In [19]:
data = pd.read_csv(path+'MiningProcess_Flotation_Plant_Database_hourly.csv', index_col=0)
data = data.drop(['time_idx', 'hour', '% Iron Concentrate'],axis=1)
data

Unnamed: 0,% Iron Feed,% Silica Feed,Starch Flow,Amina Flow,Ore Pulp Flow,Ore Pulp pH,Ore Pulp Density,Flotation Column 01 Air Flow,Flotation Column 02 Air Flow,Flotation Column 03 Air Flow,...,Flotation Column 06 Air Flow,Flotation Column 07 Air Flow,Flotation Column 01 Level,Flotation Column 02 Level,Flotation Column 03 Level,Flotation Column 04 Level,Flotation Column 05 Level,Flotation Column 06 Level,Flotation Column 07 Level,% Silica Concentrate
0,55.20,16.98,3162.625026,578.786678,398.753368,10.113487,1.729558,251.166672,250.226086,250.178287,...,251.232529,250.208184,450.383776,446.891845,450.474523,449.912259,455.792161,464.383310,450.532747,1.31
1,55.20,16.98,3133.256389,537.219661,399.871822,10.129742,1.667784,249.880589,250.214050,250.033317,...,249.909494,249.897572,449.373361,450.249356,450.081222,450.328806,448.722983,455.501528,451.387700,1.11
2,55.20,16.98,3479.482944,591.906744,398.763806,10.048403,1.732711,250.161328,250.104167,250.046350,...,250.242161,250.484183,449.972878,450.868711,450.901822,451.145822,451.134189,459.981311,450.296722,1.27
3,55.20,16.98,3228.036436,593.170106,399.866983,9.918614,1.731056,250.208772,250.204761,250.120861,...,249.825122,250.157622,487.940706,491.462111,487.387206,494.528183,495.664011,502.763850,494.939889,1.36
4,55.20,16.98,3327.280739,619.710806,399.615089,9.746029,1.765879,249.917800,250.160494,250.013500,...,250.249600,250.078639,549.031539,549.983156,549.459572,549.975483,549.512533,560.696300,550.271772,1.34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4092,49.75,23.20,3327.047776,497.211189,380.847200,9.176166,1.660871,301.565561,300.169133,299.900722,...,335.658144,298.736661,397.781583,498.753311,403.858956,398.930467,502.971728,399.468911,400.559511,1.65
4093,49.75,23.20,4225.800333,508.963856,381.112889,9.387541,1.691996,300.050172,299.967839,299.906550,...,348.172800,303.599269,399.928567,499.648950,399.208611,399.967122,501.624533,398.905006,400.486233,1.71
4094,49.75,23.20,2808.214692,517.748822,381.064411,9.771277,1.735647,299.814289,299.801561,299.973161,...,349.422803,309.875439,399.961433,500.484917,471.827489,399.931033,500.223311,401.899806,400.556333,1.80
4095,49.75,23.20,3191.497672,492.511228,380.445006,9.782121,1.716644,300.122756,299.679472,299.927028,...,349.613589,305.254558,399.601383,498.614494,836.277192,400.455389,500.406239,402.703283,401.769072,1.96


In [20]:
A = data.drop(['% Silica Concentrate'], axis = 1)
B = data[['% Silica Concentrate']]

In [21]:
A_2 = A.drop(A.index[-1:], axis=0)
B_2 = B.drop(B.index[:1], axis=0)
B_3 = B_2.reset_index(drop=True)
df = pd.concat([A_2,B_3], axis=1 )

# Size 4096

In [22]:
train = df[:4032]
test = df[4032:]

In [23]:
from sklearn.pipeline import Pipeline
from feature_engine.selection import DropConstantFeatures, DropDuplicateFeatures,SmartCorrelatedSelection


pip = Pipeline([('constant', DropConstantFeatures(tol=0.90)), ('duplicate', DropDuplicateFeatures()), ('correlated', SmartCorrelatedSelection(threshold=0.8))])
train_pro = pip.fit_transform(train.drop(['% Silica Concentrate'], axis=1))
train_pro['% Silica Concentrate'] = train['% Silica Concentrate']

In [24]:
columns = train_pro.columns
test_pro = test[columns]
test_pro['% Silica Concentrate'] = test['% Silica Concentrate']
    

In [25]:
error =[]
VAR_model = sm.tsa.VAR(train_pro)
results = VAR_model.fit(1)
forecast = results.forecast(train_pro.values[-1:], 64)
    
a= 0
for j in range(64) :
    a += (abs(forecast[j][-1] - test_pro['% Silica Concentrate'].iloc[j]))
    
error.append(a/64)
    
error

[0.5016390965170712]

In [26]:
sum(error)/len(error)

0.5016390965170712

# Size 2048

In [27]:
batc = []

for i in range(2) :
    batc.append(data[(data.index <= 2048*(i+1) ) & (data.index >= 2048*i)])

In [28]:
batch = [0 for i in range(2)]

for i in range(2) :
    X = batc[i].drop(['% Silica Concentrate'], axis = 1)
    Y = batc[i][['% Silica Concentrate']]
    X_2 = X.drop(X.index[-1:], axis=0)
    X_3 = X_2.reset_index(drop=True)
    Y_2 = Y.drop(Y.index[:1], axis=0)
    Y_3 = Y_2.reset_index(drop=True)
    batch[i] = pd.concat([X_3,Y_3], axis=1 )

In [29]:
train = []
test = []

for i in range(2) :
    train.append(batch[i][:2016])
    test.append(batch[i][2016:])

In [30]:
from sklearn.pipeline import Pipeline
from feature_engine.selection import DropConstantFeatures, DropDuplicateFeatures,SmartCorrelatedSelection

train_pro=[]

for i in range(2) :
    pip = Pipeline([('constant', DropConstantFeatures(tol=0.90)), ('duplicate', DropDuplicateFeatures()), ('correlated', SmartCorrelatedSelection(threshold=0.8))])
    a = pip.fit_transform(train[i].drop(['% Silica Concentrate'], axis=1))
    train_pro.append(a)
    train_pro[i]['% Silica Concentrate'] = train[i]['% Silica Concentrate']

In [31]:
test_pro = [0 for i in range(2)]

for i in range(2) :
    columns = train_pro[i].columns
    test_pro[i] = test[i][columns]
    test_pro[i]['% Silica Concentrate'] = test[i]['% Silica Concentrate']
    

In [32]:
error = []

for i in range(2) :
    VAR_model = sm.tsa.VAR(train_pro[i])
    results = VAR_model.fit(1)
    forecast = results.forecast(train_pro[i].values[-1:], 32)
    
    a= 0
    for j in range(32) :
        a += (abs(forecast[j][-1] - test_pro[i]['% Silica Concentrate'].iloc[j]))
    error.append(a/32)
    
error

[0.7210267678651082, 0.5815933812860486]

In [33]:
sum(error)/len(error)

0.6513100745755784

# Size 1024

In [34]:
batc = []

for i in range(4) :
    batc.append(data[(data.index <= 1024*(i+1) ) & (data.index >= 1024*i)])

In [35]:
batch = [0 for i in range(4)]

for i in range(4) :
    X = batc[i].drop(['% Silica Concentrate'], axis = 1)
    Y = batc[i][['% Silica Concentrate']]
    X_2 = X.drop(X.index[-1:], axis=0)
    X_3 = X_2.reset_index(drop=True)
    Y_2 = Y.drop(Y.index[:1], axis=0)
    Y_3 = Y_2.reset_index(drop=True)
    batch[i] = pd.concat([X_3,Y_3], axis=1 )

In [36]:
train = []
test = []

for i in range(4) :
    train.append(batch[i][:1008])
    test.append(batch[i][1008:])

In [37]:
from sklearn.pipeline import Pipeline
from feature_engine.selection import DropConstantFeatures, DropDuplicateFeatures,SmartCorrelatedSelection

train_pro=[]

for i in range(4) :
    pip = Pipeline([('constant', DropConstantFeatures(tol=0.90)), ('duplicate', DropDuplicateFeatures()), ('correlated', SmartCorrelatedSelection(threshold=0.8))])
    a = pip.fit_transform(train[i].drop(['% Silica Concentrate'], axis=1))
    train_pro.append(a)
    train_pro[i]['% Silica Concentrate'] = train[i]['% Silica Concentrate']

In [38]:
test_pro = [0 for i in range(4)]

for i in range(4) :
    columns = train_pro[i].columns
    test_pro[i] = test[i][columns]
    test_pro[i]['% Silica Concentrate'] = test[i]['% Silica Concentrate']
    

In [39]:
error = []

for i in range(4) :
    VAR_model = sm.tsa.VAR(train_pro[i])
    results = VAR_model.fit(1)
    forecast = results.forecast(train_pro[i].values[-1:], 16)
    
    a= 0
    for j in range(16) :
        a += (abs(forecast[j][-1] - test_pro[i]['% Silica Concentrate'].iloc[j]))
    error.append(a/16)
    
error

[0.7517950616278205,
 0.2902952847240423,
 1.1742546061135593,
 0.37507151098529584]

In [40]:
sum(error)/len(error)

0.6478541158626795

# Size 512

In [41]:
batc = []

for i in range(8) :
    batc.append(data[(data.index <= 512*(i+1) ) & (data.index >= 512*i)])

In [42]:
batch = [0 for i in range(8)]

for i in range(8) :
    X = batc[i].drop(['% Silica Concentrate'], axis = 1)
    Y = batc[i][['% Silica Concentrate']]
    X_2 = X.drop(X.index[-1:], axis=0)
    X_3 = X_2.reset_index(drop=True)
    Y_2 = Y.drop(Y.index[:1], axis=0)
    Y_3 = Y_2.reset_index(drop=True)
    batch[i] = pd.concat([X_3,Y_3], axis=1 )

In [43]:
train = []
test = []

for i in range(8) :
    train.append(batch[i][:504])
    test.append(batch[i][504:])

In [44]:
from sklearn.pipeline import Pipeline
from feature_engine.selection import DropConstantFeatures, DropDuplicateFeatures,SmartCorrelatedSelection

train_pro=[]

for i in range(8) :
    pip = Pipeline([('constant', DropConstantFeatures(tol=0.90)), ('duplicate', DropDuplicateFeatures()), ('correlated', SmartCorrelatedSelection(threshold=0.8))])
    a = pip.fit_transform(train[i].drop(['% Silica Concentrate'], axis=1))
    train_pro.append(a)
    train_pro[i]['% Silica Concentrate'] = train[i]['% Silica Concentrate']

In [45]:
test_pro = [0 for i in range(8)]

for i in range(8) :
    columns = train_pro[i].columns
    test_pro[i] = test[i][columns]
    test_pro[i]['% Silica Concentrate'] = test[i]['% Silica Concentrate']
    

In [46]:
error = []

for i in range(8) :
    VAR_model = sm.tsa.VAR(train_pro[i])
    results = VAR_model.fit(1)
    forecast = results.forecast(train_pro[i].values[-1:], 8)
    
    a= 0
    for j in range(8) :
        a += (abs(forecast[j][-1] - test_pro[i]['% Silica Concentrate'].iloc[j]))
    error.append(a/8)
    
error

[0.667331970504103,
 0.7334049305512231,
 0.7735419260698279,
 0.25376812135192917,
 0.7093293846017079,
 0.8588572755886824,
 0.7760431922239461,
 0.6394180491501408]

In [47]:
sum(error)/len(error)

0.6764618562551951

# Size 256

In [48]:
batc = []

for i in range(16) :
    batc.append(data[(data.index <= 256*(i+1) ) & (data.index >= 256*i)])

In [49]:
batch = [0 for i in range(16)]

for i in range(16) :
    X = batc[i].drop(['% Silica Concentrate'], axis = 1)
    Y = batc[i][['% Silica Concentrate']]
    X_2 = X.drop(X.index[-1:], axis=0)
    X_3 = X_2.reset_index(drop=True)
    Y_2 = Y.drop(Y.index[:1], axis=0)
    Y_3 = Y_2.reset_index(drop=True)
    batch[i] = pd.concat([X_3,Y_3], axis=1 )

In [50]:
train = []
test = []

for i in range(16) :
    train.append(batch[i][:252])
    test.append(batch[i][252:])

In [51]:
from sklearn.pipeline import Pipeline
from feature_engine.selection import DropConstantFeatures, DropDuplicateFeatures,SmartCorrelatedSelection

train_pro=[]

for i in range(16) :
    pip = Pipeline([('constant', DropConstantFeatures(tol=0.90)), ('duplicate', DropDuplicateFeatures()), ('correlated', SmartCorrelatedSelection(threshold=0.8))])
    a = pip.fit_transform(train[i].drop(['% Silica Concentrate'], axis=1))
    train_pro.append(a)
    train_pro[i]['% Silica Concentrate'] = train[i]['% Silica Concentrate']

In [52]:
test_pro = [0 for i in range(16)]

for i in range(16) :
    columns = train_pro[i].columns
    test_pro[i] = test[i][columns]
    test_pro[i]['% Silica Concentrate'] = test[i]['% Silica Concentrate']
    

In [53]:
error = []

for i in range(16) :
    VAR_model = sm.tsa.VAR(train_pro[i])
    results = VAR_model.fit(1)
    forecast = results.forecast(train_pro[i].values[-1:], 4)
    
    a= 0
    for j in range(4) :
        a += (abs(forecast[j][-1] - test_pro[i]['% Silica Concentrate'].iloc[j]))
    error.append(a/4)
    
error

[0.7588692856502232,
 0.5758279330827402,
 0.17422432111756625,
 0.9470243974710877,
 0.5049323463972796,
 0.12076840795101929,
 1.4585873611663653,
 0.15066258638099378,
 0.6756814310608223,
 2.629760605062894,
 0.18288279487065007,
 1.2039687999997877,
 0.3369364318122243,
 0.994147021998484,
 0.8068629852927021,
 0.33388816041197467]

In [54]:
sum(error)/len(error)

0.740939054357926

# Size128

In [55]:
batc = []

for i in range(32) :
    batc.append(data[(data.index <= 128*(i+1) ) & (data.index >= 128*i)])

In [56]:
batch = [0 for i in range(32)]

for i in range(32) :
    X = batc[i].drop(['% Silica Concentrate'], axis = 1)
    Y = batc[i][['% Silica Concentrate']]
    X_2 = X.drop(X.index[-1:], axis=0)
    X_3 = X_2.reset_index(drop=True)
    Y_2 = Y.drop(Y.index[:1], axis=0)
    Y_3 = Y_2.reset_index(drop=True)
    batch[i] = pd.concat([X_3,Y_3], axis=1 )

In [57]:
train = []
test = []

for i in range(32) :
    train.append(batch[i][:126])
    test.append(batch[i][126:])

In [58]:
from sklearn.pipeline import Pipeline
from feature_engine.selection import DropConstantFeatures, DropDuplicateFeatures,SmartCorrelatedSelection

train_pro=[]

for i in range(32) :
    pip = Pipeline([('constant', DropConstantFeatures(tol=0.90)), ('duplicate', DropDuplicateFeatures()), ('correlated', SmartCorrelatedSelection(threshold=0.8))])
    a = pip.fit_transform(train[i].drop(['% Silica Concentrate'], axis=1))
    train_pro.append(a)
    train_pro[i]['% Silica Concentrate'] = train[i]['% Silica Concentrate']

In [59]:
test_pro = [0 for i in range(32)]

for i in range(32) :
    columns = train_pro[i].columns
    test_pro[i] = test[i][columns]
    test_pro[i]['% Silica Concentrate'] = test[i]['% Silica Concentrate']
    

In [60]:
error = []

for i in range(32) :
    VAR_model = sm.tsa.VAR(train_pro[i])
    results = VAR_model.fit(1)
    forecast = results.forecast(train_pro[i].values[-1:], 2)
    
    a= 0
    for j in range(2) :
        a += (abs(forecast[j][-1] - test_pro[i]['% Silica Concentrate'].iloc[j]))
    error.append(a/2)
    
error

[0.4044749599918225,
 0.6272813192522686,
 0.45384037150249834,
 0.38020877126596897,
 0.5941708900204254,
 0.15732389351886722,
 0.4867485826393698,
 0.14968421135189647,
 0.8260609030558888,
 0.7431408011888019,
 0.47339053509832185,
 0.18965768972660413,
 0.04249806827610403,
 0.804391532373494,
 0.32902732743118657,
 0.10521758878244247,
 0.5411414582225831,
 0.8296751673616746,
 0.5187189824834039,
 0.7981034825799765,
 0.11492615448904542,
 0.2293011735145295,
 0.2478782258684734,
 1.665381213899621,
 0.10630062198468959,
 0.22061806758577518,
 0.11766432969553642,
 1.9450691904746853,
 0.048399462993863884,
 0.8767765827427412,
 0.3832943862709437,
 0.25901156409608017]

In [61]:
sum(error)/len(error)

0.4896680471793621

# Size64

In [62]:
batc = []

for i in range(64) :
    batc.append(data[(data.index <= 64*(i+1) ) & (data.index >= 64*i)])

In [63]:
batch = [0 for i in range(64)]

for i in range(64) :
    X = batc[i].drop(['% Silica Concentrate'], axis = 1)
    Y = batc[i][['% Silica Concentrate']]
    X_2 = X.drop(X.index[-1:], axis=0)
    X_3 = X_2.reset_index(drop=True)
    Y_2 = Y.drop(Y.index[:1], axis=0)
    Y_3 = Y_2.reset_index(drop=True)
    batch[i] = pd.concat([X_3,Y_3], axis=1 )

In [64]:
train = []
test = []

for i in range(64) :
    train.append(batch[i][:62])
    test.append(batch[i][62:])

In [65]:
from sklearn.pipeline import Pipeline
from feature_engine.selection import DropConstantFeatures, DropDuplicateFeatures,SmartCorrelatedSelection

train_pro=[]

for i in range(64) :
    pip = Pipeline([('constant', DropConstantFeatures(tol=0.90)), ('duplicate', DropDuplicateFeatures()), ('correlated', SmartCorrelatedSelection(threshold=0.8))])
    a = pip.fit_transform(train[i].drop(['% Silica Concentrate'], axis=1))
    train_pro.append(a)
    train_pro[i]['% Silica Concentrate'] = train[i]['% Silica Concentrate']

In [66]:
test_pro = [0 for i in range(64)]

for i in range(64) :
    columns = train_pro[i].columns
    test_pro[i] = test[i][columns]
    test_pro[i]['% Silica Concentrate'] = test[i]['% Silica Concentrate']
    

In [67]:
error = []

a= 0
for i in range(64) :
    VAR_model = sm.tsa.VAR(train_pro[i])
    results = VAR_model.fit(1)
    forecast = results.forecast(train_pro[i].values[-1:], 2)
    
    a= 0
    for j in range(2) :
        a += (abs(forecast[j][-1] - test_pro[i]['% Silica Concentrate'].iloc[j]))
    error.append(a/2)
    
error

ValueError: x contains one or more constant columns. Column(s) 11 are constant. Adding a constant with trend='c' is not allowed.

In [None]:
sum(error)/len(error)