In [1]:
import numpy as np
import torch
from matplotlib import pyplot as plt

from data_handling.data_loader import load_mavir_data
from trainer_lib import Grid, transformer_grid_search, TrainerOptions, GridSearchOptions
from models import Transformer
import utils

In [2]:
df = load_mavir_data('data/mavir_data/mavir.csv')
df['Power'] = utils.min_max_norm(df['Power'])
sample = utils.sample(df, 5000, start_idx=0)

# imfs, residue = utils.apply_eemd(sample['Power'].to_numpy(), spline_kind='akima')

In [3]:
training_data = np.array(sample['Power'].to_numpy()[...,np.newaxis], dtype=np.float32)
# training_data_decomp = np.array(np.c_[imfs.transpose(), residue[...,np.newaxis]], dtype=np.float32)
# training_data_decomp_rem = np.array(np.c_[imfs[1:].transpose(), residue[...,np.newaxis]], dtype=np.float32)
# training_data_combined = np.array(np.c_[sample['Power'].to_numpy()[...,np.newaxis], imfs.transpose(), residue[...,np.newaxis]], dtype=np.float32)
# training_data_combined_rem = np.array(np.c_[sample['Power'].to_numpy()[...,np.newaxis], imfs[1:].transpose(), residue[...,np.newaxis]], dtype=np.float32)
# print(training_data.shape)
# print(training_data.dtype)
# print(training_data_decomp.shape)
# print(training_data_decomp.dtype)

In [4]:
params = {
    'src_size' : [1],
    'tgt_size' : [1],
    'd_model' : [256], #, 256, 512],
    'num_heads' : [2], # , 4, 8],
    'num_layers' : [2], # , 2, 3],
    'd_ff' : [512, 1024], # , 1024, 2048],
    'src_seq_length' : [24], # , 96],
    'tgt_seq_length' : [1],
    'src_window': [4, 8],
    'tgt_window': [1],
    'dropout' : [0.2], # , 0.1, 0.15, 0.2],
}

grid = Grid(params)
names = utils.generate_name(len(grid), 42)

training_opts = TrainerOptions(
    batch_size=8,
    epochs=30,
    learning_rate=1e-4,
    weight_decay=1e-4,
    warmup_steps=10,
    warmup_start_factor=1e-6,
    gradient_accumulation_steps=8,
    early_stopping_patience=5,
    early_stopping_min_delta=0.01,
    save_every_n_epochs=1,
    save_path=''
)

grid_search_opts = GridSearchOptions(
    root_save_path='./trained/regular/',
    valid_split=0.2,
    window_step_size=4,
    random_seed=42,
    use_start_token=True
)

models = transformer_grid_search(grid, training_data, training_opts, grid_search_opts)

Train size: 981, Validation size: 245
Epoch: 1; Learning rate: [1.0000090000000002e-05]; Train - MSE: 1.6198442498358292; Eval - MSE: 1.375135604412325, RMSE: 1.1698223876868619, MAE: 1.1401000695843848, MAPE: 1.7430111002032154
<class 'float'> <class 'float'> <class 'float'> <class 'float'>
Epoch: 2; Learning rate: [2.0000080000000004e-05]; Train - MSE: 1.2470769828897184; Eval - MSE: 0.231626127756411, RMSE: 0.47710820384691816, MAE: 0.4209099979169907, MAPE: 0.6470923845253022
<class 'float'> <class 'float'> <class 'float'> <class 'float'>
Epoch: 3; Learning rate: [3.000007000000001e-05]; Train - MSE: 0.5387142442348531; Eval - MSE: 0.2847975055056234, RMSE: 0.5250039394404606, MAE: 0.45712650687463824, MAPE: 0.676776083546002
<class 'float'> <class 'float'> <class 'float'> <class 'float'>
Epoch: 4; Learning rate: [4.000006000000001e-05]; Train - MSE: 0.5247753731664119; Eval - MSE: 0.1987481867113421, RMSE: 0.43613749143966335, MAE: 0.35645473339865286, MAPE: 0.5248547409385735
<cl



Epoch: 10; Learning rate: [0.0001]; Train - MSE: 0.25072620563754217; Eval - MSE: 0.0882181819648512, RMSE: 0.293912947885276, MAE: 0.27225491404533386, MAPE: 0.4085922823573557
<class 'float'> <class 'float'> <class 'float'> <class 'float'>
Epoch: 11; Learning rate: [9.900000000000001e-05]; Train - MSE: 0.2170159412108787; Eval - MSE: 0.06177007166608687, RMSE: 0.24560112442230458, MAE: 0.22092972695827484, MAPE: 0.33733100894721324
<class 'float'> <class 'float'> <class 'float'> <class 'float'>
Epoch: 12; Learning rate: [9.801e-05]; Train - MSE: 0.18963919122650363; Eval - MSE: 0.12190686358559517, RMSE: 0.34532467178993076, MAE: 0.30477214628650295, MAPE: 0.46951591020439143
<class 'float'> <class 'float'> <class 'float'> <class 'float'>
Epoch: 13; Learning rate: [9.70299e-05]; Train - MSE: 0.17372562751416273; Eval - MSE: 0.04544945013138555, RMSE: 0.21020193238336593, MAE: 0.18530190567816457, MAPE: 0.2876332571083627
<class 'float'> <class 'float'> <class 'float'> <class 'float'>

KeyboardInterrupt: 

In [None]:

#for model in models_decomp_combined:
#    plt.plot(model['metrics']['eval_loss'], label=f'{model["name"]} - {model["params"]["enc_seq_length"]} - combined')

#for model in models_decomp_combined_rem:
#    plt.plot(model['metrics']['eval_loss'], label=f'{model["name"]} - {model["params"]["enc_seq_length"]} - combined rem')

#for model in models_decomp_rem:
#    plt.plot(model['metrics']['eval_loss'], label=f'{model["name"]} - {model["params"]["enc_seq_length"]} - rem')

#for model in models_decomp:
#    plt.plot(model['metrics']['eval_loss'], label=f'{model["name"]} - {model["params"]["enc_seq_length"]} - decomp')
max_len = 0
for model in models:
    plt.plot(model['metrics']['eval']['MSE'], label=f'{model["name"]} - {model["params"]["src_seq_length"]} - normal - eval')
    plt.plot(np.arange(len(model['metrics']['train']['MSE'])), model['metrics']['train']['MSE'], label=f'{model["name"]} - {model["params"]["src_seq_length"]} - normal - train')
    max_len = max(len(model['metrics']['eval']['MSE']), max_len)
    max_len = max(len(model['metrics']['train']['MSE']), max_len)

plt.xticks(np.arange(max_len))
plt.xlabel('epochs')
plt.ylabel('mse')
plt.legend()
plt.show()

In [None]:
from trainer_lib.datasets import TimeSeriesWindowedTensorDataset, TimeSeriesWindowedDatasetConfig

shift, look_back, pred = 50, 24, 1


for model in models:
    model['model'].eval()
    dataset = TimeSeriesWindowedTensorDataset(training_data, TimeSeriesWindowedDatasetConfig(model['params']['src_window'], 
                                                                                             model['params']['tgt_window'], 
                                                                                             model['params']['src_seq_length'], 
                                                                                             model['params']['tgt_seq_length'], 
                                                                                             1, 
                                                                                             False))
    ones = torch.ones(1, 1, dataset[0][1].shape[-1])
    
    with torch.no_grad():
    
        ground_truth = []
        predicted = []
        for shift_offset in range(shift, shift+24, 1):
            out = ones
            for i in range(pred):
                out = torch.concatenate((ones, model['model'](dataset[shift_offset][0].unsqueeze(0), out)), dim=1)
        
            predicted.append(dataset.get_sequence_from_y_windows(out[:, 1:, :].detach()))
            ground_truth.append(dataset.get_sequence_from_y_windows(dataset[shift_offset][1]))
    
    predicted = np.array(predicted).reshape(24)
    ground_truth = np.array(ground_truth).reshape(24)
    plt.plot(ground_truth, label='ground truth')
    plt.plot(predicted, label='24h rolling one step')
    
    #output = model['model'](
    #    dataset[shift][0].unsqueeze(0),  
    #    torch.concat((ones, dataset[shift][1][:-1, :].unsqueeze(0)), dim=1)
    #)
    #
    #plt.plot(torch.concat(
    #    (dataset.get_sequence_from_x_windows(dataset[shift][0]), 
    #     dataset.get_sequence_from_y_windows(dataset[shift][1])), dim=0), label='original')
    #plt.plot(
    #    torch.concatenate(
    #        (dataset.get_sequence_from_x_windows(dataset[shift][0]),
    #         dataset.get_sequence_from_y_windows(output[:, :, :].detach())), dim=0), 
    #    label='full access - normal'
    #)
    #plt.plot(
    #    torch.concatenate(
    #        (dataset.get_sequence_from_x_windows(dataset[shift][0]), 
    #         dataset.get_sequence_from_y_windows(out[:, 1:, :].detach())), dim=0),
    #    label='predicted - normal'
    #)
    
#for model in models_decomp:
#    out = torch.ones(1,1,11)
#    for _ in range(25):
#        output = model['model'](torch.tensor(training_data_decomp[np.newaxis, 0:72,:]), out)# torch.tensor(training_data[np.newaxis, 73:97,:]))# torch.zeros((1, 24, 1)))
#        out = torch.concatenate((out, output[:,-1,:].unsqueeze(1)), axis=1)
#    plt.plot(out[:, 1:-1, :].detach().reshape((24,11)).sum(-1), label='predicted - decomp')

#for model in models_decomp_rem:
#    out = torch.ones(1,1,10)
#    for _ in range(25):
#        output = model['model'](torch.tensor(training_data_decomp_rem[np.newaxis, 0:72,:]), out)# torch.tensor(training_data[np.newaxis, 73:97,:]))# torch.zeros((1, 24, 1)))
#        out = torch.concatenate((out, output[:,-1,:].unsqueeze(1)), axis=1)
#    plt.plot(out[:, 1:-1, :].detach().reshape((24,10)).sum(-1), label='predicted - rem')

#for model in models_decomp_combined:
#    out = torch.ones(1,1,1)
#    for _ in range(25):
#        output = model['model'](torch.tensor(training_data_combined[np.newaxis, 0:72,:]), out)# torch.tensor(training_data[np.newaxis, 73:97,:]))# torch.zeros((1, 24, 1)))
#        out = torch.concatenate((out, output[:,-1,:].unsqueeze(1)), axis=1)
#    plt.plot(out[:, 1:-1, :].detach().reshape((24,)), label='predicted - combined')

#for model in models_decomp_combined_rem:
#    out = torch.ones(1,1,1)
#    for _ in range(25):
#        output = model['model'](torch.tensor(training_data_combined_rem[np.newaxis, 0:72,:]), out)# torch.tensor(training_data[np.newaxis, 73:97,:]))# torch.zeros((1, 24, 1)))
#        out = torch.concatenate((out, output[:,-1,:].unsqueeze(1)), axis=1)
#    plt.plot(out[:, 1:-1, :].detach().reshape((24,)), label='predicted - combined rem')

plt.legend()
plt.show()