In [None]:
import sys
sys.path.insert(0, '..')

In [None]:
import pandas as pd
from torch.utils.data import DataLoader,Dataset, Subset
import numpy as np
import tft_model
from data_formatters import ts_dataset
from data_formatters.ts_dataset import TSDataset
import data_formatters.base
import expt_settings.configs
import importlib
from data_formatters import utils
import torch.optim as optim
import torch
from pandas import DataFrame
from typing import List, Dict

In [None]:
torch.__version__

In [None]:
pd.set_option('max_columns', 1000)

In [None]:
importlib.reload(tft_model)
importlib.reload(utils)

In [None]:
ExperimentConfig = expt_settings.configs.ExperimentConfig

config = ExperimentConfig('electricity', '../outputs')
data_formatter = config.make_data_formatter()


print("*** Training from defined parameters for {} ***".format('electricity'))
data_csv_path = '../data/hourly_electricity.csv'
print("Loading & splitting data...")
raw_data: DataFrame = pd.read_csv(data_csv_path, index_col=0)
train, valid, test = data_formatter.split_data(raw_data)
train_samples, valid_samples = data_formatter.get_num_samples_for_calibration(
)

# Sets up default params
fixed_params: Dict = data_formatter.get_experiment_params()
params: Dict = data_formatter.get_default_model_params()

In [None]:
train_samples, valid_samples

In [None]:
len(train.id.unique())

In [None]:
train.head()

In [None]:
train.columns

In [None]:
id_col = 'id'
time_col='hours_from_start'
input_cols =['power_usage', 'hour', 'day_of_week', 'hours_from_start', 'categorical_id']
target_col = 'power_usage'
static_cols = ['categorical_id']
time_steps=192
num_encoder_steps = 168
output_size = 1
max_samples = 10000
input_size = 5

# elect = ts_dataset.TSDataset(id_col, static_cols, time_col, input_cols,
#                       target_col, time_steps, max_samples,
#                      input_size, num_encoder_steps, 1, output_size, train)

In [None]:
elect_train: TSDataset = ts_dataset.TSDataset(id_col, static_cols, time_col, input_cols,
                      target_col, time_steps, max_samples,
                     input_size, num_encoder_steps, 1, output_size, train)

elect_valid: TSDataset = ts_dataset.TSDataset(id_col, static_cols, time_col, input_cols,
                                                  target_col, time_steps, 1000,
                                                  input_size, num_encoder_steps, 1, output_size, valid)

In [None]:
batch_size=64
train_loader = DataLoader(
            elect_train,
            batch_size=batch_size,
            num_workers=2,
            shuffle=False
        )

In [None]:
for batch in train_loader:
    break

In [None]:
static_cols = ['meter']
categorical_cols = ['hour']
real_cols = ['power_usage','hour', 'day']
config = {}
config['static_variables'] = len(static_cols)
config['time_varying_categoical_variables'] = 1
config['time_varying_real_variables_encoder'] = 4
config['time_varying_real_variables_decoder'] = 3
config['num_masked_series'] = 1
config['static_embedding_vocab_sizes'] = [369]
config['time_varying_embedding_vocab_sizes'] = [369]
config['embedding_dim'] = 8
config['lstm_hidden_dimension'] = 160
config['lstm_layers'] = 1
config['dropout'] = 0.05
config['device'] = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
config['batch_size'] = 64
config['encode_length'] = 168
config['attn_heads'] = 4
config['num_quantiles'] = 3
config['vailid_quantiles'] = [0.1,0.5,0.9]
config['seq_length'] = 192

In [None]:
config

In [None]:
model = tft_model.TFT(config)


In [None]:
output,encoder_output, decoder_output, \
attn,attn_output_weights, embeddings_encoder, embeddings_decoder = model.forward(batch)

In [None]:
output.shape

In [None]:
q_loss_func = tft_model.QuantileLoss([0.1,0.5,0.9])

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)
model.train()
epochs=100
losses = []
for i in range(epochs):
    epoch_loss = [] 
    j=0
    for batch in train_loader:
        output, encoder_ouput, decoder_output, attn, attn_weights, emb_enc, emb_dec = model(batch)
        targets = batch['outputs']
        targets = targets.to(torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'))
        loss = q_loss_func(output[:,:,:].view(-1,3), targets[:,:,0].flatten().float())
        loss.backward()
        optimizer.step()
        epoch_loss.append(loss.item())
#         j+=1
#         if j>5:
#             break
    losses.append(np.mean(epoch_loss))
    print(np.mean(epoch_loss))
    

In [None]:
output, encoder_ouput, decoder_output, attn, attn_weights, emb_enc, emb_dec = model(batch)

In [None]:
output.shape

In [None]:
import matplotlib.pyplot as plt
import numpy as np

ind = np.random.choice(64)
print(ind)
plt.plot(output[ind,:,0].detach().cpu().numpy(), label='pred_1')
plt.plot(output[ind,:,1].detach().cpu().numpy(), label='pred_5')
plt.plot(output[ind,:,2].detach().cpu().numpy(), label='pred_9')

plt.plot(batch['outputs'][ind,:,0], label='true')
plt.legend()

In [None]:
attn_weights.shape

In [None]:
plt.matshow(attn_weights.detach().cpu().numpy()[0,:,:])

In [None]:
plt.imshow(attn_weights.detach().cpu().numpy()[0,:,:])