In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils import clip_grad_norm_
from torch.autograd import Variable
from torchsummary import summary
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import torch.optim as optim
from sklearn.preprocessing import StandardScaler

import numpy as np
import matplotlib.pyplot as plt
from keras.preprocessing.sequence import TimeseriesGenerator

In [8]:
hyperparam = {
    "data" : {
        "stake_training_data" : 0.75, 
        "path" : '../data/vega_shrinkwrapper_original/NewBlade/'
    },
    "model" : {
        "input_size" : 7,
        "n_hidden" : 150,
        "sequence_size" : 20,
        "batch_size" : 1,
        "lstm_layer" : 3,
    },
    "cycling_lr" : {
        # step_size is the number of training iterations (total samples/batch_size) per half cycle. 
        # Authors suggest setting step_size 2-8 x training iterations in epoch.
        "step_size" : (1536/8)*2, 
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 3e-3, 
        "max_lr" :0.1
    },
    "training": {
        "n_epochs" : 20,
        "patience" : 50,
    }
}

# Test TimeSeriesGenerator

In [9]:
class DataPreperator():
    def __init__(self, path):
        self.path = path
        
    def load_data(self):
        return pd.read_csv(self.path)
    
    def preprocess_data(self, train_data, validation_data):
        # Remove time feature
        train_data = train_data.drop(labels="Timestamp", axis=1)
        validation_data = validation_data.drop(labels="Timestamp", axis=1)
        # Initialise standard scaler
        scaler = StandardScaler()
        scaler.fit(train_data)
        # Transform data
        train_preprocessed = scaler.transform(train_data)
        validation_preprocessed = scaler.transform(validation_data)
        return train_preprocessed, validation_preprocessed 
        
    def provide_data(self, stake_training_data):
        dataset = self.load_data()
        amount_training_data = round(len(dataset)*stake_training_data)
        train_data = dataset.iloc[0:amount_training_data,:]
        validation_data = dataset.iloc[amount_training_data:,:]
        train_preprocessed, validation_preporcessed = self.preprocess_data(train_data, validation_data)
        
        return train_preprocessed, validation_preporcessed

In [10]:
train_loader = DataPreperator(path=hyperparam['data']['path']+'NewBlade001.csv')
train_data, validation_data = train_loader.provide_data(stake_training_data=hyperparam['data']['stake_training_data'])

In [13]:
class DataProvider(Dataset):
    def __init__(self, data, timesteps):
        # Data as numpy array is provided
        self.data = data
        # Data generator is initialized, batch_size=1 is indipendent of neural network's batch_size 
        self.generator = TimeseriesGenerator(self.data, self.data, length=timesteps, batch_size=1)

    def __getitem__(self, index):
        x, y = self.generator[index]
        x_torch = torch.from_numpy(x)
        # Dimension 0 with size 1 (created by TimeseriesGenerator because of batch_size=1) gets removed 
        # because DataLoader will add a dimension 0 with size=batch_size as well
        x_torch = torch.squeeze(x_torch) # torch.Size([1, timesteps, 7]) --> torch.Size([timesteps, 7])
        y_torch = torch.from_numpy(y)
        y_torch = torch.squeeze(y_torch)
        return (x_torch.float(), y_torch.float()) 

    def __len__(self):
        return len(self.generator)

In [14]:
# Data is shuffled because each mini batch is indipendent from each other, but samples of a minibatch are in chronological order
dataset_train = DataProvider(train_data, timesteps=3)
data_loader_training = DataLoader(dataset_train, batch_size=1, num_workers=1, shuffle=False, drop_last=True)

In [15]:
for batch_idx, data in enumerate(data_loader_training):
    x,y = data
    print(x)
    print("------")
    print(y)
    print(y.size())
    print("------")

tensor([[[-0.1649, -0.0654, -1.0356, -0.6074, -1.3080, -2.2818, -4.2927],
         [-0.0994, -0.1056, -1.0356, -0.5940, -1.3080, -2.2901, -4.3037],
         [-0.1734,  0.0551, -1.0356, -0.6127, -1.3080, -2.2735, -4.3148]]])
------
tensor([[-0.1640,  0.0952, -1.0356, -0.6181, -1.3080, -2.2901, -4.3037]])
torch.Size([1, 7])
------
tensor([[[-0.0994, -0.1056, -1.0356, -0.5940, -1.3080, -2.2901, -4.3037],
         [-0.1734,  0.0551, -1.0356, -0.6127, -1.3080, -2.2735, -4.3148],
         [-0.1640,  0.0952, -1.0356, -0.6181, -1.3080, -2.2901, -4.3037]]])
------
tensor([[-0.2181,  0.1354, -1.0356, -0.6127, -1.3080, -2.2860, -4.2927]])
torch.Size([1, 7])
------
tensor([[[-0.1734,  0.0551, -1.0356, -0.6127, -1.3080, -2.2735, -4.3148],
         [-0.1640,  0.0952, -1.0356, -0.6181, -1.3080, -2.2901, -4.3037],
         [-0.2181,  0.1354, -1.0356, -0.6127, -1.3080, -2.2860, -4.2927]]])
------
tensor([[-0.2174,  0.1354, -1.0356, -0.6074, -1.3080, -2.2860, -4.2927]])
torch.Size([1, 7])
------
tensor(

KeyboardInterrupt: 