In [119]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# own Modules 
#from models import LstmMle
from data_loader import DataPreperator, DataSet
#from trainer import Trainer
#from loss_module import LossModuleMle

In [120]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import builtins

class Trainer():
    def __init__(self, model, optimizer, scheduler, scheduler_active, criterion, patience, location_model, location_stats):
        self.model = model
        # lr=1. because of scheduler (1*learning_rate_schedular)
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.scheduler_active = scheduler_active
        # initialize further variables
        self.criterion = criterion
        self.epoch_training_loss = []
        self.epoch_validation_loss = []
        self.lowest_loss = 99
        self.trails = 0
        self.patience = patience
        self.location_model = location_model
        self.location_stats = location_stats
    
    def train(self, data_loader_training):
        for batch_number, data in enumerate(data_loader_training):
            # The LSTM has to be reinitialised, otherwise the LSTM will treat a new batch 
            # as a continuation of a sequence. When batches of data are independent sequences, 
            # then you should reinitialise the hidden state before each batch. 
            # But if your data is made up of really long sequences and you cut it up into batches 
            # making sure that each batch follows on from the previous batch, then in that case 
            # you wouldn’t reinitialise the hidden state before each batch.
            # In the current workflow of class DataProvoider independent sequences are returned. 
            input_data, target_data = data
            
            self.model.train()
            hidden = self.model.init_hidden()

            # Zero out gradient, else they will accumulate between minibatches
            self.optimizer.zero_grad()

            # Forward propagation
            y_hat, tau = self.model(input_data, hidden)

            # Calculate loss
            loss = self.criterion(y_hat, tau, target_data)
            
            if loss >5:
                print("Mean loss in batchnumber {}: {}".format(batch_number, loss))
            
            self.epoch_training_loss.append(loss.item())

            # Backward pass
            loss.backward()

            # Update parameters
            self.optimizer.step()

            # Update LR if scheduler is active 
            if self.scheduler_active:
                self.scheduler.step()
            
        # Return mean of loss over all training iterations
        return sum(self.epoch_training_loss) / float(len(self.epoch_training_loss))
    
    def evaluate(self, data_loader_validation, hist_loss, epoch):
        for batch_number, data in enumerate(data_loader_validation):
            input_data, target_data = data
            self.model.eval()
            hidden = self.model.init_hidden()
            y_hat, tau = self.model(input_data, hidden)

            # Calculate loss
            loss = self.criterion(y_hat, tau, target_data)
            self.epoch_validation_loss.append(loss.item())
            
        # Return mean of loss over all validation iterations
        return sum(self.epoch_validation_loss) / float(len(self.epoch_validation_loss))
            
    def cache_history_training(self, hist_loss, epoch, mean_epoch_training_loss, mean_epoch_validation_loss):
        # Save training and validation loss to history
        print("-------- epoch_no. {} finished with eval loss {}--------".format(epoch, mean_epoch_validation_loss))
        return {'epoch': epoch, 'training': mean_epoch_training_loss, 'validation': mean_epoch_validation_loss}
            
        # Empty list for new epoch 
        self.epoch_training_loss = []
        self.epoch_validation_loss = []
        
    def save_model(self, epoch, mean_epoch_validation_loss, input_size, n_lstm_layer, n_hidden_lstm, n_hidden_fc, seq_size):
        if mean_epoch_validation_loss < self.lowest_loss:
            self.trials = 0
            self.lowest_loss = mean_epoch_validation_loss
            torch.save({
                'model_state_dict': self.model.state_dict(),
                'optimizer_state_dict': self.optimizer.state_dict(),
                'loss': mean_epoch_validation_loss
            }, self.location_model+"_InputSize"+str(input_size)+"_LayerLstm"+
                str(n_lstm_layer)+"_HiddenLstm"+str(n_hidden_lstm)+"_HiddenFc"+str(n_hidden_fc)+"_Seq"+str(seq_size)+".pt")
            print("Epoch {}: best model saved with loss: {}".format(epoch, mean_epoch_validation_loss))
            return True
    
        # Else: Increase trails by one and start new epoch as long as not too many epochs 
        # were unsuccessful (controlled by patience)
        else:
            self.trials += 1
            if self.trials >= self.patience :
                print("Early stopping on epoch {}".format(epoch))
                return False
            return True
    
    def save_statistic(self, hist_loss, sequenze_size, n_lstm_layer, n_hidden_lstm, n_hidden_fc, time):
        with open(self.location_stats, 'a') as file:
            file.write("\n"+str(round(min(hist_loss),2))+","+str(sequenze_size)+","+str(n_lstm_layer)+","+
            str(n_hidden_lstm)+","+str(n_hidden_fc)+","+str(round(time,1)))

In [121]:
import torch.nn.functional as F

class LstmMle(nn.Module):
    def __init__(self, batch_size, input_dim, n_hidden_lstm, n_layers, dropout_rate, n_hidden_fc):
        super(LstmMle, self).__init__()
        # Attributes for LSTM Network
        self.input_dim = input_dim
        self.n_hidden_lstm = n_hidden_lstm
        self.n_layers = n_layers
        self.batch_size = batch_size
        self.dropout_rate = dropout_rate
        self.n_hidden_fc = n_hidden_fc
        
        # Definition of NN layer
        # batch_first = True because dataloader creates batches and batch_size is 0. dimension
        self.lstm = nn.LSTM(input_size = self.input_dim, hidden_size = self.n_hidden_lstm, num_layers = self.n_layers, batch_first = True, dropout = self.dropout_rate)
        self.fc1 = nn.Linear(self.n_hidden_lstm, self.n_hidden_fc)
        self.dropout = nn.Dropout(p=self.dropout_rate)
        self.fc_y_hat = nn.Linear(self.n_hidden_fc, self.input_dim)
        self.fc_tau = nn.Linear(self.n_hidden_fc, self.input_dim)
        
    def forward(self, input_data, hidden):
        # Forward propagate LSTM
        # LSTM in Pytorch return two results: the first one usually called output 
        # and the second one (hidden_state, cell_state). 
        lstm_out, (hidden_state, cell_state) = self.lstm(input_data, hidden)

        # LSTM returns as output all the hidden_states for all the timesteps (seq), 
        # in other words all of the hidden states throughout
        # the sequence.
        # Thus we have to select the output from the last sequence (last hidden state of sequence)
        # Length of input data can varry 
        length_seq = input_data.size()[1]
        last_out = lstm_out[:,length_seq-1,:]

        # Forward path through the subsequent fully connected tanh activation 
        # neural network with 2q output channels
        out = self.fc1(last_out)
        out = self.dropout(out)
        out = F.tanh(out)
        y_hat = self.fc_y_hat(out)
        tau = self.fc_tau(out)
        
        #print("y_hat: {}".format(y_hat))
        #print("tau: {}".format(tau))
        #print("-------------------")
        
        return y_hat, tau
    
    def init_hidden(self):
        # This method is for initializing hidden state as well as cell state
        # We need to detach the hidden state to prevent exploding/vanishing gradients
        h0 = torch.zeros(self.n_layers, self.batch_size, self.n_hidden_lstm, requires_grad=False)
        c0 = torch.zeros(self.n_layers, self.batch_size, self.n_hidden_lstm, requires_grad=False)
        return [t for t in (h0, c0)]

In [122]:
class LossModuleMle(torch.nn.Module):
    def __init__(self, input_size, batch_size):
        """
        In the constructor we instantiate the module and assign them as
        member variables.
        """
        super(LossModuleMle, self).__init__()
        self.input_size = input_size
        self.batch_size = batch_size

    def forward(self, y_hat, tau, target_data):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data.
        We are minimizing the the negative log likelihood loss function.
        We write σ_t = exp(τ_t) to guarantee σ > 0 and to provide numerical stability in the learning process.
        """
        # Extract elements from output
        
        #print("y_hat: {}".format(y_hat))
        #print("tau: {}".format(tau))
        
        # Compute loss
        term = torch.pow((target_data - y_hat) / torch.exp(tau), 2) + 2 * tau
        loss_batches = torch.sum(input=term, dim=1) / self.input_size
        mean_loss = torch.sum(loss_batches)/self.batch_size
        
        #print("Term: {}".format(term))
        #print("Loss per batch: {}".format(loss_batches))
        #print("Mean loss: {}".format(mean_loss))
        #if mean_loss >5 or mean_loss <0:
            #print("target_data: {}".format(target_data))
            #print("y_hat: {}".format(y_hat))
            #print("tau: {}".format(tau))
            #print("Term: {}".format(term))
            #print("Mean loss: {}".format(mean_loss))
        
        return mean_loss

## Hyperparameters

In [123]:
param = {
    "data" : {
        "stake_training_data" : 0.75, 
        "path" : '../../../data/phm_data_challenge/01_M01_DC_preprocessed_grid_search.csv' ,
    },
    "preprocessing" : {
        "first_order_difference" : False,
        "droped_features": ["ID", "ongoing time", "up time", "RLU", "runnum"],
        "features_not_to_scale": ['FIXTURESHUTTERPOSITION_0.0', 'FIXTURESHUTTERPOSITION_1.0',
                                      'FIXTURESHUTTERPOSITION_2.0', 'FIXTURESHUTTERPOSITION_3.0',
                                      'FIXTURESHUTTERPOSITION_255.0']
    },
    "model" : {
        "input_size" : 21,
        "n_hidden_lstm" : 100,
        "sequence_size" : 50,
        "batch_size" : 8,
        "lstm_layer" : 2,
        "n_hidden_fc": 50,
        "dropout_rate": 0.2
    },
    "cycling_lr" : {
        "scheduler_active" : True, 
        # step_size is the number of training iterations (total samples/batch_size) per half cycle. 
        # Authors suggest setting step_size 2-8 x training iterations in epoch.
        "step_size" : (12500/8)*2, 
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 0.005,
        "max_lr" :0.03
    },
    "training": {
        "n_epochs" : 100,
        "patience" : 10,
    },
    "filed_location": {
        "trained_model" : "../../../models/MLE_model/xxxxxxxx",
        "history" : "../../visualisation/files/history_training/MLExxxxxxxxx.csv"
    }
}

## Preprocessing
1. First order difference (if selected)
2. Split data into train and validation data
3. Scale train and validation data with train's mean and variance

In [124]:
train_loader = DataPreperator(path=param['data']['path'], 
                              ignored_features=param['preprocessing']['droped_features'],
                              stake_training_data=param['data']['stake_training_data'],
                              features_not_to_scale=param['preprocessing']['features_not_to_scale'],
                              first_order_difference=param["preprocessing"]["first_order_difference"])
train_data, validation_data = train_loader.prepare_data()
print(len(train_data)+len(validation_data))

6251


### Mean and variance from scale process (only of continious features)

In [125]:
mean, var = train_loader.provide_statistics()
print(mean)
print(var)

[-0.00616403  0.0014136  -0.00775649  0.02678833  0.11496844  0.01260359
 -0.03450196  0.01716666  0.00792266  0.05113533  0.15137018 -0.15249734
 -0.06415179 -0.25143178  0.87424645 -0.30155094]
[9.54757947e-01 9.65299841e-01 9.79715208e-01 9.95428027e-01
 1.20228023e+00 9.84873713e-01 4.43684229e-01 1.01021270e+00
 9.70240421e-01 6.98134612e-01 1.44322416e+00 9.29744851e-12
 1.95742556e-05 1.38008019e-05 4.47681049e-05 1.81650562e-01]


## Initialize Dataset 
Time series data must be transformed into a structure of samples with `input` and `target` components before it can be used to fit a supervised learning model. <br>
For a time series interested in one-step predictions, the observations at prior time steps, so-called lag observations, are used as `input` and the `target` is the observation at the current time step.

For example, a univariate series can be expressed as a supervised learning problem with three time steps for `input` and one step as `target`, as follows:

|input|target|
|-----|------|
[1, 2, 3]|[4]
[2, 3, 4]|[5]
[3, 4, 5]|[6]

The Keras deep learning library provides the `TimeseriesGenerator` to automatically transform both univariate and multivariate time series data into such a format. 

In [126]:
dataset_train = DataSet(train_data, timesteps=param["model"]["sequence_size"])
dataset_validation = DataSet(validation_data, timesteps=param["model"]["sequence_size"])

## Initialize DataLoader
Actually the data has a other size than in the table above because of multivariate time series data and because of using batches. <br>
__First dimension__: batch size --> Defines the number of samples that will be propagated through the network simultaneously. <br>
__Second dimension:__ timesteps --> Number of sequence which is passed into the LSTM <br>
__Third dimension:__ input_dim --> Is the number of features. In this case data from 7 sensors, collected at the same time. <br>

![](../../../knowledge/pictures/input_shape.png)

Data is shuffled because each mini batch is indipendent from each other, but samples of a minibatch are in chronological order.

In [127]:
data_loader_training = DataLoader(dataset_train, 
                                  batch_size=param["model"]["batch_size"], 
                                  num_workers=4, 
                                  shuffle=True, 
                                  drop_last=True
                                 )
data_loader_validation = DataLoader(dataset_validation, 
                                    batch_size=param["model"]["batch_size"], 
                                    num_workers=4, 
                                    shuffle=True, 
                                    drop_last=True
                                   )

In [128]:
for batch_idx, data in enumerate(data_loader_training):
    x,y = data
    print('Data of batch: {}'.format(batch_idx))
    print("Size of input data: {}".format(x.size()))
    print("Size of target data: {}".format(y.size()))
    if batch_idx >=1: break

Data of batch: 0
Size of input data: torch.Size([8, 50, 21])
Size of target data: torch.Size([8, 21])
Data of batch: 1
Size of input data: torch.Size([8, 50, 21])
Size of target data: torch.Size([8, 21])


## Initialize Neural Network
__Parameters for LSTM Modul:__
- input_size : The number of expected features in the input x
- hidden_size :The number of features in the hidden state h
- num_layers : Number of recurrent layers. E.g., setting num_layers=2 would mean stacking two LSTMs together to form a stacked LSTM, with the second LSTM taking in outputs of the first LSTM and computing the final results.
- batch_first : If True, then the input __and output__ tensors are provided as (batch, seq, feature).
- dropout – If non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to dropout. Default: 0

In [129]:
torch.manual_seed(0)
model = LstmMle(batch_size=param['model']['batch_size'], 
                input_dim=param['model']['input_size'], 
                n_hidden_lstm=param['model']['n_hidden_lstm'], 
                n_layers=param['model']['lstm_layer'],
                dropout_rate= param['model']['dropout_rate'],
                n_hidden_fc=param['model']['n_hidden_fc']
                )

## Define MSE Loss function as torch module

In [130]:
criterion = LossModuleMle(param["model"]["input_size"], param["model"]["batch_size"])

## Initialize Optimizer and Cyclic Learning Rate Scheduler

In [131]:
optimizer = torch.optim.SGD(model.parameters(), lr=1.)  
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer=optimizer, 
                                              base_lr=param['cycling_lr']['base_lr'], 
                                              max_lr=param['cycling_lr']['max_lr'], 
                                              step_size_up=param['cycling_lr']['step_size'], 
                                              mode=param['cycling_lr']['mode'],
                                              gamma=param['cycling_lr']['gamma']
                                             )

## Initialize Trainer

In [132]:
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  scheduler=scheduler,
                  scheduler_active = param["cycling_lr"]["scheduler_active"],
                  criterion=criterion, 
                  location_model=param["filed_location"]["trained_model"], 
                  location_stats=param["filed_location"]["history"], 
                  patience=param['training']['patience']
                 )

## Training Loop
An epoch consists of a learning cycle over all batches of training data and an evaluation of the most recent model with the testing data. 

In [133]:
# Create lists to save training loss and validation loss of each epoch
hist_loss = []
torch.manual_seed(0)

for epoch in range(param['training']['n_epochs']):
    # Train with batches 
    mean_epoch_training_loss = trainer.train(data_loader_training)
    
    # Evaluate
    mean_epoch_validation_loss = trainer.evaluate(data_loader_validation, hist_loss, epoch)

    # Cache History
    history = trainer.cache_history_training(hist_loss, epoch, mean_epoch_training_loss, mean_epoch_validation_loss)
    hist_loss.append(history)

    # Save model if its the best one since the last change in configuration of hyperparameters
    status_ok = trainer.save_model(epoch, mean_epoch_validation_loss, param['model']['input_size'], 
                                   param['model']['lstm_layer'], param['model']['n_hidden_lstm'], 
                                   param['model']['n_hidden_fc'], param["model"]["sequence_size"])
    if not status_ok:
        break

# Safe results to csv file
df = pd.DataFrame(hist_loss)
df.to_csv(param["filed_location"]["history"], sep=";", index=False)



Mean loss in batchnumber 347: 30.261371612548828
Mean loss in batchnumber 372: 5.352893352508545
Mean loss in batchnumber 373: 8.840778350830078
Mean loss in batchnumber 374: 7.8138427734375
Mean loss in batchnumber 402: 31.865032196044922
Mean loss in batchnumber 405: 6.212568283081055
Mean loss in batchnumber 418: 1099.4146728515625
Mean loss in batchnumber 419: 46.77112579345703
-------- epoch_no. 0 finished with eval loss nan--------


AttributeError: 'Trainer' object has no attribute 'trials'