In [1]:
import sys
sys.path.insert(0, '..')

In [2]:
import pandas as pd
from torch.utils.data import DataLoader,Dataset, Subset
import numpy as np
from numpy import ndarray
import tft_model
from tft_model import TFT
from data_formatters import ts_dataset
from data_formatters.ts_dataset import TSDataset
import data_formatters.base
import expt_settings.configs
import importlib
from data_formatters import utils
import torch.optim as optim
import torch
from torch import Tensor
from pandas import DataFrame
from typing import List, Dict
import os
from datetime import datetime

EXTRACT TESTSET

In [3]:
ExperimentConfig = expt_settings.configs.ExperimentConfig

config = ExperimentConfig('electricity', '../outputs')
data_formatter = config.make_data_formatter()


print("*** Training from defined parameters for {} ***".format('electricity'))
data_csv_path = '../data/hourly_electricity.csv'
print("Loading & splitting data...")
raw_data: DataFrame = pd.read_csv(data_csv_path, index_col=0)
train, valid, test = data_formatter.split_data(raw_data)
test['date'] = test['date'].astype('datetime64[s]')

*** Training from defined parameters for electricity ***
Loading & splitting data...


  mask |= (ar1 == a)


Formatting train-valid-test splits.
Setting scalers with training data...


In [4]:
test.head()

Unnamed: 0,power_usage,t,days_from_start,categorical_id,date,id,hour,day,day_of_week,month,hours_from_start,categorical_day_of_week,categorical_hour
23208,3.313606,31968.0,1332,0,2014-08-25 00:00:00,MT_001,-1.661325,25,-1.503741,8,2.001283,0,0
23209,3.160683,31969.0,1332,0,2014-08-25 01:00:00,MT_001,-1.516862,25,-1.503741,8,2.001942,0,1
23210,3.160683,31970.0,1332,0,2014-08-25 02:00:00,MT_001,-1.372399,25,-1.503741,8,2.002601,0,2
23211,3.313606,31971.0,1332,0,2014-08-25 03:00:00,MT_001,-1.227936,25,-1.503741,8,2.00326,0,3
23212,3.237144,31972.0,1332,0,2014-08-25 04:00:00,MT_001,-1.083473,25,-1.503741,8,2.00392,0,4


In [5]:
test.dtypes

power_usage                       float64
t                                 float64
days_from_start                     int64
categorical_id                      int32
date                       datetime64[ns]
id                                 object
hour                              float64
day                                 int64
day_of_week                       float64
month                               int64
hours_from_start                  float64
categorical_day_of_week             int64
categorical_hour                    int64
dtype: object

In [6]:
# test = test.sort_values(by=['categorical_id', 'date'], ascending=True, ignore_index=True)

In [7]:
test.head()

Unnamed: 0,power_usage,t,days_from_start,categorical_id,date,id,hour,day,day_of_week,month,hours_from_start,categorical_day_of_week,categorical_hour
23208,3.313606,31968.0,1332,0,2014-08-25 00:00:00,MT_001,-1.661325,25,-1.503741,8,2.001283,0,0
23209,3.160683,31969.0,1332,0,2014-08-25 01:00:00,MT_001,-1.516862,25,-1.503741,8,2.001942,0,1
23210,3.160683,31970.0,1332,0,2014-08-25 02:00:00,MT_001,-1.372399,25,-1.503741,8,2.002601,0,2
23211,3.313606,31971.0,1332,0,2014-08-25 03:00:00,MT_001,-1.227936,25,-1.503741,8,2.00326,0,3
23212,3.237144,31972.0,1332,0,2014-08-25 04:00:00,MT_001,-1.083473,25,-1.503741,8,2.00392,0,4


In [8]:
test.dtypes

power_usage                       float64
t                                 float64
days_from_start                     int64
categorical_id                      int32
date                       datetime64[ns]
id                                 object
hour                              float64
day                                 int64
day_of_week                       float64
month                               int64
hours_from_start                  float64
categorical_day_of_week             int64
categorical_hour                    int64
dtype: object

In [9]:
test.shape, train.shape, valid.shape

((123984, 13), (1923536, 13), (274536, 13))

SET CONFIG

In [10]:
static_cols = ['meter']
categorical_cols = ['hour']
real_cols = ['power_usage','hour', 'day']
config = {}
config['static_variables'] = len(static_cols)
config['time_varying_categoical_variables'] = 1
config['time_varying_real_variables_encoder'] = 4
config['time_varying_real_variables_decoder'] = 3
config['num_masked_series'] = 1
config['static_embedding_vocab_sizes'] = [369]
config['time_varying_embedding_vocab_sizes'] = [369]
config['embedding_dim'] = 8
config['lstm_hidden_dimension'] = 160
config['lstm_layers'] = 1
config['dropout'] = 0.05
config['device'] = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
config['batch_size'] = 64
config['encode_length'] = 168
config['attn_heads'] = 4
config['num_quantiles'] = 3
config['vailid_quantiles'] = [0.1,0.5,0.9]
config['seq_length'] = 192

LOAD TRAINED MODEL

In [11]:
model_path = r'C:\Users\Lorenzo\savedmodels_tft_pytorch_electricity'

In [12]:
def load_model(config: Dict, model_path: str) -> TFT:
    model: TFT = TFT(config)
    model.load_state_dict(torch.load(os.path.join(model_path, 'TemporalFusionTransformer_electricity.pt')))
    
    return model

In [13]:
model = load_model(config, model_path)

  "num_layers={}".format(dropout, num_layers))


PUT TEST DATA IN DATASET

In [14]:
id_col = 'id'
time_col='date'
input_cols =['power_usage', 'hour', 'day_of_week', 'hours_from_start', 'categorical_id']
target_col = 'power_usage'
static_cols = ['categorical_id']
time_steps=192
num_encoder_steps = 168
output_size = 1
max_samples = 10000
input_size = 5

In [15]:
elect_test: TSDataset = ts_dataset.TSDataset(id_col, static_cols, time_col, input_cols,
                                                  target_col, time_steps, 25000,
                                                  input_size, num_encoder_steps, len(static_cols), output_size, test)

Getting valid sampling locations.
Extracting 25000 samples...
10000 of 25000 samples done...
20000 of 25000 samples done...


In [16]:
batch_size=64
test_loader = DataLoader(
            elect_test,
            batch_size=batch_size,
            num_workers=2,
            shuffle=False
        )

In [38]:
j = 0
with torch.no_grad():
    for batch in test_loader:
        inputs: Tensor = batch.get('inputs')
        outputs: Tensor = batch.get('outputs')
        identifier: Tensor = batch.get('identifier')
        time: Tensor = batch.get('time')
        print("Inputs shape is ", inputs.shape)
        print("Outputs shape is ", outputs.shape)
        print("Identifier shape is ", identifier.shape)
        print("Time shape is ", time.shape)
        output, encoder_ouput, decoder_output, attn, attn_weights, emb_enc, emb_dec = model(batch)
        t1: List = output[:,0,1].detach().cpu().numpy().tolist()
        t2: List = output[:,1,1].detach().cpu().numpy().tolist()
        t3: List = output[:,2,1].detach().cpu().numpy().tolist()
        t4: List = output[:,3,1].detach().cpu().numpy().tolist()
        t5: List = output[:,4,1].detach().cpu().numpy().tolist()
        t6: List = output[:,5,1].detach().cpu().numpy().tolist()
        t7: List = output[:,6,1].detach().cpu().numpy().tolist()
        t8: List = output[:,7,1].detach().cpu().numpy().tolist()
        t9: List = output[:,8,1].detach().cpu().numpy().tolist()
        t10: List = output[:,9,1].detach().cpu().numpy().tolist()
        t11: List = output[:,10,1].detach().cpu().numpy().tolist()
        t12: List = output[:,11,1].detach().cpu().numpy().tolist()
        t13: List = output[:,12,1].detach().cpu().numpy().tolist()
        t14: List = output[:,13,1].detach().cpu().numpy().tolist()
        t15: List = output[:,14,1].detach().cpu().numpy().tolist()
        t16: List = output[:,15,1].detach().cpu().numpy().tolist()
        t17: List = output[:,16,1].detach().cpu().numpy().tolist()
        t18: List = output[:,17,1].detach().cpu().numpy().tolist()
        t19: List = output[:,18,1].detach().cpu().numpy().tolist()
        t20: List = output[:,19,1].detach().cpu().numpy().tolist()
        t21: List = output[:,20,1].detach().cpu().numpy().tolist()
        t22: List = output[:,21,1].detach().cpu().numpy().tolist()
        t23: List = output[:,22,1].detach().cpu().numpy().tolist()
        t24: List = output[:,23,1].detach().cpu().numpy().tolist()
        break

Inputs shape is  torch.Size([64, 192, 5])
Outputs shape is  torch.Size([64, 24, 1])
Identifier shape is  torch.Size([64, 192, 1])
Time shape is  torch.Size([64, 192, 1])


In [20]:
batch_prediction_times: List = [pd.Timestamp(i).strftime('%Y-%m-%d %X') for i in time[:,167,0].detach().cpu().numpy().tolist()]

In [23]:
batch_identifiers: List = [int(i) for i in identifier[:,0,0].detach().cpu().numpy().tolist()]

In [39]:
data: DataFrame = DataFrame({'prediction_ts': batch_prediction_times, 'identifier': batch_identifiers, 't1': t1, 't2': t2, 't3': t3, 't4': t4, 
                             't5': t5, 't6': t6, 't7': t7, 't8': t8, 't9': t9, 't10': t10, 't11': t11, 't12': t12, 't13': t13,
                            't14': t14, 't15': t15, 't16': t16, 't17': t17, 't18': t18, 't19': t19, 't20': t20, 't21': t21, 
                             't22': t22, 't23': t23, 't24': t24})

In [40]:
data

Unnamed: 0,prediction_ts,identifier,t1,t2,t3,t4,t5,t6,t7,t8,...,t15,t16,t17,t18,t19,t20,t21,t22,t23,t24
0,2014-09-05 13:00:00,293,0.664330,0.674312,0.831760,0.954479,1.115294,0.877375,0.806636,0.425319,...,-1.212566,-1.101851,-0.421896,0.201461,0.161605,0.914541,1.056299,1.200662,1.178375,1.075056
1,2014-09-04 21:00:00,281,0.677726,-0.053359,-0.926792,-1.113827,-1.301150,-1.597138,-1.446874,-1.418132,...,0.770133,0.823105,0.762902,0.826889,0.886053,0.826117,0.984079,0.948428,0.986278,0.919410
2,2014-09-04 17:00:00,313,0.666584,1.171833,1.129977,0.914023,-0.023720,-0.888714,-1.318206,-1.284654,...,-0.258192,0.432018,0.615242,0.816919,1.483193,1.260792,0.871367,0.706135,0.883221,0.886884
3,2014-09-03 20:00:00,269,0.692616,0.446119,-0.255468,-1.343559,-1.517584,-1.644229,-1.742829,-1.714427,...,0.851947,1.082472,0.879465,0.823283,0.931387,1.000018,1.083994,1.124100,1.124258,0.913123
4,2014-09-03 14:00:00,146,1.459434,1.444598,1.426630,1.336490,1.223527,1.279622,1.272123,0.675841,...,-0.217227,-0.139248,0.313830,0.509848,0.941778,1.105010,0.650329,1.381721,1.463401,1.354505
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,2014-09-03 20:00:00,223,0.353561,0.208051,-0.399741,-1.213230,-1.484370,-1.524887,-1.531624,-1.522007,...,0.524089,0.968793,0.595579,0.637755,0.459226,0.968369,1.041623,1.315105,0.610791,0.414136
60,2014-09-05 03:00:00,104,-1.115715,-1.090907,-1.059091,-1.018141,-0.713959,-0.330141,0.712739,1.006475,...,1.573520,1.196876,1.049011,0.482169,-0.057345,-0.446531,-1.066156,-1.103370,-1.141290,-1.169959
61,2014-09-03 19:00:00,247,0.689136,0.018444,-0.838082,-1.139033,-1.168990,-1.203106,-1.209110,-1.247422,...,0.415964,0.486450,0.791185,0.583518,0.466436,0.705560,0.679836,0.808588,0.853415,0.807823
62,2014-09-06 11:00:00,169,1.099511,0.921264,0.907997,0.854949,0.954490,1.048596,1.087947,0.857590,...,-1.346628,-1.375201,-1.429575,-1.328127,-0.760011,-0.126257,0.095028,0.857789,1.205497,1.280085


In [30]:
# instantiate prediction dataframe
predictions_df: DataFrame = DataFrame(data=None, columns=['prediction_ts', 'identifier', 't1', 't2', 't3', 't4', 't5', 't6', 't7', 't8', 't9', 't10', 't11', 't12'])

In [None]:
with torch.no_grad():
    for batch in test_loader:
#         outputs: Tensor = batch.get('outputs')
        identifier: Tensor = batch.get('identifier')
        time: Tensor = batch.get('time')
        # save predictions timestamps
        batch_prediction_times: List = [pd.Timestamp(i).strftime('%Y-%m-%d %X') for i in time[:,167,0].detach().cpu().numpy().tolist()]
        batch_identifiers: List = [int(i) for i in identifier[:,0,0].detach().cpu().numpy().tolist()] 
        # forward pass
        output, encoder_ouput, decoder_output, attn, attn_weights, emb_enc, emb_dec = model(batch)
        # extract 50th percentile
        predictions: List = output[:,:,1].detach().cpu().numpy().flatten().tolist()
        # fill dataframe
        predictions_df.append()

In [None]:
len(outputs), len(timestamps)

In [None]:
print(timestamps[:10])

In [None]:
# any duplicates in lists?


In [None]:
# pd.Timestamp(1.4092164e+18)