In [1]:
import torch
import torch.nn as nn
from torch.nn.utils import clip_grad_norm_
from torch.utils.data import DataLoader
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# own Modules 
from models import LstmMseDropout
from data_loader import DataPreperator, DataSet
from trainer import Trainer
from loss_module import LossModuleMse, LossModuleMle

## Hyperparameters

In [2]:
hyperparam = {
    "data" : {
        "stake_training_data" : 0.75, 
        "path" : '../../../data/artifical_signals/NewBlade_with_sine.csv' 
    },
    "model" : {
        "input_size" : 8,
        "n_hidden_lstm" : 150,
        "sequence_size" : 50,
        "batch_size" : 8,
        "lstm_layer" : 2,
        "n_hidden_fc": 50,
        "dropout_rate": 0.2
    },
    "cycling_lr" : {
        # step_size is the number of training iterations (total samples/batch_size) per half cycle. 
        # Authors suggest setting step_size 2-8 x training iterations in epoch.
        "step_size" : (2048/8)*2, 
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 0.016, 
        "max_lr" :0.1
    },
    "training": {
        "n_epochs" : 100,
        "patience" : 10,
    },
    "filed_location": {
        "trained_model" : "../../../models/MSE_model/tesssst.pt",
        "history" : "../../visualisation/files/history_training/history_MSE.csv"
    }
}

## Split raw data into train and validation data and scale it

In [3]:
train_loader = DataPreperator(path=hyperparam['data']['path'], first_order_difference=True)
train_data, validation_data = train_loader.provide_data(stake_training_data=hyperparam['data']['stake_training_data'])


### Mean and variance from scale process

In [4]:
mean, var = train_loader.provide_statistics()
print(mean)
print(var)

[-9.91425250e-05  1.20717099e-05  3.30086724e+00  2.28118387e+00
  5.70847231e+00  1.32484425e+00  1.20857364e-04 -5.39472122e-04]
[1.04538899e-02 1.32004086e-03 2.77006730e+01 6.11135598e+02
 5.13248993e+00 3.31823108e+02 1.11835724e-03 1.14592843e+00]


## Initialize Dataset 
Time series data must be transformed into a structure of samples with `input` and `target` components before it can be used to fit a supervised learning model. <br>
For a time series interested in one-step predictions, the observations at prior time steps, so-called lag observations, are used as `input` and the `target` is the observation at the current time step.

For example, a univariate series can be expressed as a supervised learning problem with three time steps for `input` and one step as `target`, as follows:

|input|target|
|-----|------|
[1, 2, 3]|[4]
[2, 3, 4]|[5]
[3, 4, 5]|[6]

The Keras deep learning library provides the `TimeseriesGenerator` to automatically transform both univariate and multivariate time series data into such a format. 

In [5]:
dataset_train = DataSet(train_data, timesteps=hyperparam["model"]["sequence_size"])
dataset_validation = DataSet(validation_data, timesteps=hyperparam["model"]["sequence_size"])

## Initialize DataLoader
Actually the data has a other size than in the table above because of multivariate time series data and because of using batches. <br>
__First dimension__: batch size --> Defines the number of samples that will be propagated through the network simultaneously. <br>
__Second dimension:__ timesteps --> Number of sequence which is passed into the LSTM <br>
__Third dimension:__ input_dim --> Is the number of features. In this case data from 7 sensors, collected at the same time. <br>

![](../../../knowledge/pictures/input_shape.png)

Data is shuffled because each mini batch is indipendent from each other, but samples of a minibatch are in chronological order.

In [6]:
data_loader_training = DataLoader(dataset_train, batch_size=hyperparam["model"]["batch_size"], 
                                  num_workers=1, shuffle=True, drop_last=True)
data_loader_validation = DataLoader(dataset_validation, batch_size=hyperparam["model"]["batch_size"], 
                                    num_workers=1, shuffle=True, drop_last=True)

In [7]:
for batch_idx, data in enumerate(data_loader_training):
    x,y = data
    print('Data of batch: {}'.format(batch_idx))
    print("Size of input data: {}".format(x.size()))
    print("Size of target data: {}".format(y.size()))
    if batch_idx >=1: break

Data of batch: 0
Size of input data: torch.Size([8, 50, 8])
Size of target data: torch.Size([8, 8])
Data of batch: 1
Size of input data: torch.Size([8, 50, 8])
Size of target data: torch.Size([8, 8])


## Initialize Neural Network
__Parameters for LSTM Modul:__
- input_size : The number of expected features in the input x
- hidden_size :The number of features in the hidden state h
- num_layers : Number of recurrent layers. E.g., setting num_layers=2 would mean stacking two LSTMs together to form a stacked LSTM, with the second LSTM taking in outputs of the first LSTM and computing the final results.
- batch_first : If True, then the input __and output__ tensors are provided as (batch, seq, feature).
- dropout – If non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to dropout. Default: 0

In [8]:
torch.manual_seed(0)
model = LstmMseDropout(batch_size=hyperparam['model']['batch_size'], 
                       input_dim=hyperparam['model']['input_size'], 
                       n_hidden_lstm=hyperparam['model']['n_hidden_lstm'], 
                       n_layers=hyperparam['model']['lstm_layer'],
                       dropout_rate= hyperparam['model']['dropout_rate'],
                        n_hidden_fc=hyperparam['model']['n_hidden_fc']
                       )

## Define MSE Loss function as torch module

In [9]:
criterion = LossModuleMse(hyperparam["model"]["input_size"], hyperparam["model"]["batch_size"])

## Initialize Optimizer and Cyclic Learning Rate Scheduler

In [10]:
optimizer = torch.optim.SGD(model.parameters(), lr=1.)  
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer=optimizer, 
                                              base_lr=hyperparam['cycling_lr']['base_lr'], 
                                              max_lr=hyperparam['cycling_lr']['max_lr'], 
                                              step_size_up=hyperparam['cycling_lr']['step_size'], 
                                              mode=hyperparam['cycling_lr']['mode'],
                                              gamma=hyperparam['cycling_lr']['gamma']
                                             )

## Initialize Trainer

In [11]:
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  scheduler=scheduler,
                  criterion=criterion, 
                  location_model=hyperparam["filed_location"]["trained_model"], 
                  location_stats=hyperparam["filed_location"]["history"], 
                  patience=hyperparam['training']['patience']
                 )

## Training Loop
An epoch consists of a learning cycle over all batches of training data and an evaluation of the most recent model with the testing data. 

In [12]:
# Create lists to save training loss and validation loss of each epoch
hist_loss = []
torch.manual_seed(0)

for epoch in range(hyperparam['training']['n_epochs']):
    # Train with batches 
    mean_epoch_training_loss = trainer.train(data_loader_training)

    # Evaluate
    mean_epoch_validation_loss = trainer.evaluate(data_loader_validation, hist_loss, epoch)

    # Cache History
    trainer.cache_history_training(hist_loss, epoch, mean_epoch_training_loss, mean_epoch_validation_loss)

    # Save model if its the best one since the last change in configuration of hyperparameters
    status_ok = trainer.save_model(epoch, mean_epoch_validation_loss, hyperparam['model']['input_size'], 
                                   hyperparam['model']['lstm_layer'], hyperparam['model']['n_hidden_lstm'], 
                                   hyperparam['model']['n_hidden_fc'])
    if not status_ok:
        break

        # Safe results to csv file
df = pd.DataFrame(hist_loss)
df.to_csv(hyperparam["filed_location"]["history"], sep=";", index=False)



-------- epoch_no. 0 finished with eval loss nan--------


TypeError: save_model() missing 1 required positional argument: 'seq_size'

## Visualization of cyclic learning rate

In [None]:
x = range(len(lr_find_lr))
data = pd.DataFrame(data={'y': lr_find_lr, 'x': x})
f, ax = plt.subplots(figsize=(7, 7))
sns.lineplot(x=data.x, y=data.y, ax=ax)
plt.show()