In [1]:
import sys
sys.path.insert(0, '..')

In [2]:
import pandas as pd
from torch.utils.data import DataLoader,Dataset, Subset
import numpy as np
from numpy import ndarray
import tft_model
from tft_model import TFT
from data_formatters import ts_dataset
from data_formatters.ts_dataset import TSDataset
import data_formatters.base
import expt_settings.configs
import importlib
from data_formatters import utils
import torch.optim as optim
import torch
from torch import Tensor
from pandas import DataFrame
from typing import List, Dict
import os
from datetime import datetime

EXTRACT TESTSET

In [3]:
ExperimentConfig = expt_settings.configs.ExperimentConfig

config = ExperimentConfig('electricity', '../outputs')
data_formatter = config.make_data_formatter()


print("*** Training from defined parameters for {} ***".format('electricity'))
data_csv_path = '../data/hourly_electricity.csv'
print("Loading & splitting data...")
raw_data: DataFrame = pd.read_csv(data_csv_path, index_col=0)
train, valid, test = data_formatter.split_data(raw_data)
test['date'] = test['date'].astype('datetime64[s]')

*** Training from defined parameters for electricity ***
Loading & splitting data...


  mask |= (ar1 == a)


Formatting train-valid-test splits.
Setting scalers with training data...


In [4]:
test.head()

Unnamed: 0,power_usage,t,days_from_start,categorical_id,date,id,hour,day,day_of_week,month,hours_from_start,categorical_day_of_week,categorical_hour
23208,3.313606,31968.0,1332,0,2014-08-25 00:00:00,MT_001,-1.661325,25,-1.503741,8,2.001283,0,0
23209,3.160683,31969.0,1332,0,2014-08-25 01:00:00,MT_001,-1.516862,25,-1.503741,8,2.001942,0,1
23210,3.160683,31970.0,1332,0,2014-08-25 02:00:00,MT_001,-1.372399,25,-1.503741,8,2.002601,0,2
23211,3.313606,31971.0,1332,0,2014-08-25 03:00:00,MT_001,-1.227936,25,-1.503741,8,2.00326,0,3
23212,3.237144,31972.0,1332,0,2014-08-25 04:00:00,MT_001,-1.083473,25,-1.503741,8,2.00392,0,4


In [5]:
test.dtypes

power_usage                       float64
t                                 float64
days_from_start                     int64
categorical_id                      int32
date                       datetime64[ns]
id                                 object
hour                              float64
day                                 int64
day_of_week                       float64
month                               int64
hours_from_start                  float64
categorical_day_of_week             int64
categorical_hour                    int64
dtype: object

In [6]:
# test = test.sort_values(by=['categorical_id', 'date'], ascending=True, ignore_index=True)

In [7]:
test.head()

Unnamed: 0,power_usage,t,days_from_start,categorical_id,date,id,hour,day,day_of_week,month,hours_from_start,categorical_day_of_week,categorical_hour
23208,3.313606,31968.0,1332,0,2014-08-25 00:00:00,MT_001,-1.661325,25,-1.503741,8,2.001283,0,0
23209,3.160683,31969.0,1332,0,2014-08-25 01:00:00,MT_001,-1.516862,25,-1.503741,8,2.001942,0,1
23210,3.160683,31970.0,1332,0,2014-08-25 02:00:00,MT_001,-1.372399,25,-1.503741,8,2.002601,0,2
23211,3.313606,31971.0,1332,0,2014-08-25 03:00:00,MT_001,-1.227936,25,-1.503741,8,2.00326,0,3
23212,3.237144,31972.0,1332,0,2014-08-25 04:00:00,MT_001,-1.083473,25,-1.503741,8,2.00392,0,4


In [8]:
test.dtypes

power_usage                       float64
t                                 float64
days_from_start                     int64
categorical_id                      int32
date                       datetime64[ns]
id                                 object
hour                              float64
day                                 int64
day_of_week                       float64
month                               int64
hours_from_start                  float64
categorical_day_of_week             int64
categorical_hour                    int64
dtype: object

In [9]:
test.shape, train.shape, valid.shape

((123984, 13), (1923536, 13), (274536, 13))

SET CONFIG

In [10]:
static_cols = ['meter']
categorical_cols = ['hour']
real_cols = ['power_usage','hour', 'day']
config = {}
config['static_variables'] = len(static_cols)
config['time_varying_categoical_variables'] = 1
config['time_varying_real_variables_encoder'] = 4
config['time_varying_real_variables_decoder'] = 3
config['num_masked_series'] = 1
config['static_embedding_vocab_sizes'] = [369]
config['time_varying_embedding_vocab_sizes'] = [369]
config['embedding_dim'] = 8
config['lstm_hidden_dimension'] = 160
config['lstm_layers'] = 1
config['dropout'] = 0.05
config['device'] = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
config['batch_size'] = 64
config['encode_length'] = 168
config['attn_heads'] = 4
config['num_quantiles'] = 3
config['vailid_quantiles'] = [0.1,0.5,0.9]
config['seq_length'] = 192

LOAD TRAINED MODEL

In [11]:
model_path = r'C:\Users\Lorenzo\savedmodels_tft_pytorch_electricity'

In [12]:
def load_model(config: Dict, model_path: str) -> TFT:
    model: TFT = TFT(config)
    model.load_state_dict(torch.load(os.path.join(model_path, 'TemporalFusionTransformer_electricity.pt')))
    
    return model

In [13]:
model = load_model(config, model_path)

  "num_layers={}".format(dropout, num_layers))


PUT TEST DATA IN DATASET

In [14]:
id_col = 'id'
time_col='date'
input_cols =['power_usage', 'hour', 'day_of_week', 'hours_from_start', 'categorical_id']
target_col = 'power_usage'
static_cols = ['categorical_id']
time_steps=192
num_encoder_steps = 168
output_size = 1
max_samples = 10000
input_size = 5

In [15]:
elect_test: TSDataset = ts_dataset.TSDataset(id_col, static_cols, time_col, input_cols,
                                                  target_col, time_steps, 25000,
                                                  input_size, num_encoder_steps, len(static_cols), output_size, test)

Getting valid sampling locations.
Extracting 25000 samples...
10000 of 25000 samples done...
20000 of 25000 samples done...


In [16]:
batch_size=64
test_loader = DataLoader(
            elect_test,
            batch_size=batch_size,
            num_workers=2,
            shuffle=False
        )

In [17]:
j = 0
with torch.no_grad():
    for batch in test_loader:
        inputs: Tensor = batch.get('inputs')
        outputs: Tensor = batch.get('outputs')
        identifier: Tensor = batch.get('identifier')
        time: Tensor = batch.get('time')
        print("Inputs shape is ", inputs.shape)
        print("Outputs shape is ", outputs.shape)
        print("Identifier shape is ", identifier.shape)
        print("Time shape is ", time.shape)
        break

Inputs shape is  torch.Size([64, 192, 5])
Outputs shape is  torch.Size([64, 24, 1])
Identifier shape is  torch.Size([64, 192, 1])
Time shape is  torch.Size([64, 192, 1])


In [23]:
inputs[0,168:,:], identifier[0,168:,:], [pd.Timestamp(i).strftime('%Y-%m-%d %X') for i in time[0,168:,0].detach().cpu().numpy().tolist()]

(tensor([[-1.3505e-01,  2.1669e-01,  5.0430e-01,  2.1838e+00,  1.4800e+02],
         [ 1.0566e+00,  3.6116e-01,  5.0430e-01,  2.1845e+00,  1.4800e+02],
         [ 1.1650e+00,  5.0562e-01,  5.0430e-01,  2.1852e+00,  1.4800e+02],
         [ 9.4831e-01,  6.5008e-01,  5.0430e-01,  2.1858e+00,  1.4800e+02],
         [ 8.1623e-02,  7.9455e-01,  5.0430e-01,  2.1865e+00,  1.4800e+02],
         [ 8.3998e-01,  9.3901e-01,  5.0430e-01,  2.1871e+00,  1.4800e+02],
         [ 1.0566e+00,  1.0835e+00,  5.0430e-01,  2.1878e+00,  1.4800e+02],
         [-2.6713e-02,  1.2279e+00,  5.0430e-01,  2.1885e+00,  1.4800e+02],
         [ 1.1650e+00,  1.3724e+00,  5.0430e-01,  2.1891e+00,  1.4800e+02],
         [ 1.1650e+00,  1.5169e+00,  5.0430e-01,  2.1898e+00,  1.4800e+02],
         [ 8.1623e-02,  1.6613e+00,  5.0430e-01,  2.1904e+00,  1.4800e+02],
         [ 1.5983e+00, -1.6613e+00,  1.0063e+00,  2.1911e+00,  1.4800e+02],
         [ 5.1497e-01, -1.5169e+00,  1.0063e+00,  2.1918e+00,  1.4800e+02],
         [ 4

In [21]:
test[(test['categorical_id']==148) & (test['date']=='2014-08-29 14:00:00')].iloc[0, :]

power_usage                           2.031676
t                                      32078.0
days_from_start                           1336
categorical_id                             148
date                       2014-08-29 14:00:00
id                                      MT_149
hour                                  0.361158
day                                         29
day_of_week                           0.504303
month                                        8
hours_from_start                      2.073782
categorical_day_of_week                      4
categorical_hour                            14
Name: 3557325, dtype: object

In [None]:
identifier[0,:,:]

In [None]:
outputs = []
timestamps = []
with torch.no_grad():
    for batch in test_loader:
        identifier: Tensor = batch.get('time')
        output_ts: List = identifier[:,-24:,0].detach().cpu().numpy().flatten().tolist()
        output_ts: List = [pd.Timestamp(i).strftime('%Y-%m-%d %X') for i in output_ts]
        timestamps.extend(output_ts)
#         print(len(output_ts))
        output, encoder_ouput, decoder_output, attn, attn_weights, emb_enc, emb_dec = model(batch)
        # extract 50th percentile
        predictions: List = output[:,:,1].detach().cpu().numpy().flatten().tolist()
        outputs.extend(predictions)

In [None]:
len(outputs), len(timestamps)

In [None]:
print(timestamps[:10])

In [None]:
# any duplicates in lists?


In [None]:
# pd.Timestamp(1.4092164e+18)