In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# own Modules 
from models_mle import LstmMle_1
from data_set import DataSet
from data_preperator import DataPreperator
from trainer import Trainer
from loss_module import LossMle

## Hyperparameters

In [2]:
param = {
    "data" : {
        "stake_training_data" : 0.75, 
        "path" : '../../../data/artifical_signals/MLE_analysis/artifical_2_signals.csv' ,
    },
    "preprocessing" : {
        "first_order_difference" : False,
        "droped_features" : ["ID"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 2,
        "n_hidden_lstm" : 15,
        "sequence_size" : 25,
        "batch_size" : 8,
        "lstm_layer" : 1,
        "n_hidden_fc": 75,
        "dropout_rate_lstm": 0.0,
        "dropout_rate_fc": 0.2
    },
    "cycling_lr" : {
        "scheduler_active" : True, 
        # step_size is the number of training iterations (total samples/batch_size) per half cycle. 
        # Authors suggest setting step_size 2-8 x training iterations in epoch.
        "step_size" : (9500/8)*2, 
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 0.0001,
        "max_lr" :0.0005
    },
    "training": {
        "n_epochs" : 100,
        "patience" : 10,
    },
    "filed_location": {
        "trained_model" : "../../../models/MLE_model/artifical_2_signals",
        "history" : "../../visualisation/files/history_training/history_MLE_artifical_training.csv"
    }
}

## Preprocessing
1. First order difference (if selected)
2. Split data into train and validation data
3. Scale train and validation data with train's mean and variance

In [3]:
train_loader = DataPreperator(path=param['data']['path'], 
                              ignored_features=param['preprocessing']['droped_features'],
                              stake_training_data=param['data']['stake_training_data'],
                              features_not_to_scale=param['preprocessing']['features_not_to_scale'],
                              first_order_difference=param["preprocessing"]["first_order_difference"])
train_data, validation_data = train_loader.prepare_data()
print(len(train_data))

9000


### Mean and variance from scale process (only of continious features)

In [4]:
mean, var = train_loader.provide_statistics()
print(mean)
print(var)

[-0.00393712 -0.01294209]
[49.18936568  0.34270256]


## Initialize Dataset 
Time series data must be transformed into a structure of samples with `input` and `target` components before it can be used to fit a supervised learning model. <br>
For a time series interested in one-step predictions, the observations at prior time steps, so-called lag observations, are used as `input` and the `target` is the observation at the current time step.

For example, a univariate series can be expressed as a supervised learning problem with three time steps for `input` and one step as `target`, as follows:

|input|target|
|-----|------|
[1, 2, 3]|[4]
[2, 3, 4]|[5]
[3, 4, 5]|[6]

The Keras deep learning library provides the `TimeseriesGenerator` to automatically transform both univariate and multivariate time series data into such a format. 

In [5]:
dataset_train = DataSet(train_data, timesteps=param["model"]["sequence_size"])
dataset_validation = DataSet(validation_data, timesteps=param["model"]["sequence_size"])

## Initialize DataLoader
Actually the data has a other size than in the table above because of multivariate time series data and because of using batches. <br>
__First dimension__: batch size --> Defines the number of samples that will be propagated through the network simultaneously. <br>
__Second dimension:__ timesteps --> Number of sequence which is passed into the LSTM <br>
__Third dimension:__ input_dim --> Is the number of features. In this case data from 7 sensors, collected at the same time. <br>

![](../../../knowledge/pictures/input_shape.png)

Data is shuffled because each mini batch is indipendent from each other, but samples of a minibatch are in chronological order.

In [6]:
data_loader_training = DataLoader(dataset_train, 
                                  batch_size=param["model"]["batch_size"], 
                                  num_workers=0, 
                                  shuffle=True, 
                                  drop_last=True
                                 )
data_loader_validation = DataLoader(dataset_validation, 
                                    batch_size=param["model"]["batch_size"], 
                                    num_workers=0, 
                                    shuffle=True, 
                                    drop_last=True
                                   )

In [7]:
for batch_idx, data in enumerate(data_loader_training):
    x,y = data
    print('Data of batch: {}'.format(batch_idx))
    print("Size of input data: {}".format(x.size()))
    print("Size of target data: {}".format(y.size()))
    if batch_idx >=1: break

Data of batch: 0
Size of input data: torch.Size([8, 25, 2])
Size of target data: torch.Size([8, 2])
Data of batch: 1
Size of input data: torch.Size([8, 25, 2])
Size of target data: torch.Size([8, 2])


# Training Phase 1: Only consider mu
Set tau=0 --> sigma=1

In [8]:
K_phase_1=0

## Initialize Neural Network

In [9]:
torch.manual_seed(0)
model = LstmMle_1(batch_size=param['model']['batch_size'], 
                 input_dim=param['model']['input_size'], 
                 n_hidden_lstm=param['model']['n_hidden_lstm'], 
                 n_layers=param['model']['lstm_layer'],
                 dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                 dropout_rate_fc= param['model']['dropout_rate_fc'],
                 n_hidden_fc=param['model']['n_hidden_fc'],
                 K = K_phase_1
                 )

## Initialize MSE Loss function as torch module

In [10]:
criterion = LossMle(param["model"]["input_size"], param["model"]["batch_size"])

## Initialize Optimizer and Cyclic Learning Rate Scheduler

In [11]:
optimizer = torch.optim.SGD(model.parameters(), lr=1.)  
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer=optimizer, 
                                              base_lr=param['cycling_lr']['base_lr'], 
                                              max_lr=param['cycling_lr']['max_lr'], 
                                              step_size_up=param['cycling_lr']['step_size'], 
                                              mode=param['cycling_lr']['mode'],
                                              gamma=param['cycling_lr']['gamma']
                                             )

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import builtins

class Trainer():
    def __init__(self, model, optimizer, scheduler, scheduler_active, criterion, patience, location_model, location_stats):
        self.model = model
        # lr=1. because of scheduler (1*learning_rate_schedular)
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.scheduler_active = scheduler_active
        # initialize further variables
        self.criterion = criterion
        self.epoch_training_loss = []
        self.epoch_validation_loss = []
        self.lowest_loss = 99
        self.trials = 0
        self.fold = "Fold xx"
        self.patience = patience
        self.location_model = location_model
        self.location_stats = location_stats
    
    def train(self, data_loader_training):
        for batch_number, (input_data, target_data) in enumerate(data_loader_training):
            # The LSTM has to be reinitialised, otherwise the LSTM will treat a new batch 
            # as a continuation of a sequence. When batches of data are independent sequences, 
            # then you should reinitialise the hidden state before each batch. 
            # But if your data is made up of really long sequences and you cut it up into batches 
            # making sure that each batch follows on from the previous batch, then in that case 
            # you wouldn’t reinitialise the hidden state before each batch.
            # In the current workflow of class DataProvoider independent sequences are returned. 
            self.model.train()
            hidden = self.model.init_hidden()

            # Zero out gradient, else they will accumulate between minibatches
            self.optimizer.zero_grad()

            # Forward propagation
            output = self.model(input_data, hidden)

            # Calculate loss
            loss = self.criterion(output, target_data)
            self.epoch_training_loss.append(loss.item())

            # Backward pass
            loss.backward()

            # Update parameters
            self.optimizer.step()

            # Update LR if scheduler is active 
            if self.scheduler_active:
                self.scheduler.step()
            
        # Return mean of loss over all training iterations
        return sum(self.epoch_training_loss) / float(len(self.epoch_training_loss))
    
    def evaluate(self, data_loader_validation, hist_loss, epoch):
        for batch_number, data in enumerate(data_loader_validation):
            with torch.no_grad():
                input_data, target_data = data
                self.model.eval()
                hidden = self.model.init_hidden()
                output = self.model(input_data, hidden)

                # Calculate loss
                loss = self.criterion(output, target_data)
                self.epoch_validation_loss.append(loss.item())
            
        # Return mean of loss over all validation iterations
        return sum(self.epoch_validation_loss) / float(len(self.epoch_validation_loss))
            
    def cache_history_training(self, hist_loss, epoch, mean_epoch_training_loss, mean_epoch_validation_loss):
        # Save training and validation loss to history
        history = {'epoch': epoch, 'training': mean_epoch_training_loss, 'validation': mean_epoch_validation_loss}
        hist_loss.append(history)     
        print("-------- epoch_no. {} finished with eval loss {}--------".format(epoch, mean_epoch_validation_loss))
            
        # Empty list for new epoch 
        self.epoch_training_loss = []
        self.epoch_validation_loss = []
                 
    def save_model(self, epoch, mean_epoch_validation_loss, input_size, n_lstm_layer, n_hidden_lstm, n_hidden_fc, seq_size):
        if mean_epoch_validation_loss < self.lowest_loss:
            self.trials = 0
            self.lowest_loss = mean_epoch_validation_loss
            torch.save({
                'model_state_dict': self.model.state_dict(),
                'optimizer_state_dict': self.optimizer.state_dict(),
                'loss': mean_epoch_validation_loss
            }, self.location_model+self.fold+"_InputSize"+str(input_size)+"_LayerLstm"+
                str(n_lstm_layer)+"_HiddenLstm"+str(n_hidden_lstm)+"_HiddenFc"+str(n_hidden_fc)+"_Seq"+str(seq_size)+".pt")
            print("Epoch {}: best model saved with loss: {}".format(epoch, mean_epoch_validation_loss))
            return True
    
        # Else: Increase trails by one and start new epoch as long as not too many epochs 
        # were unsuccessful (controlled by patience)
        else:
            self.trials += 1
            if self.trials >= self.patience :
                print("Early stopping on epoch {}".format(epoch))
                return False
            return True
    
    def save_statistic(self, hist_loss, sequenze_size, n_lstm_layer, n_hidden_lstm, n_hidden_fc, time):
        with open(self.location_stats, 'a') as file:
            file.write("\n"+str(round(min(hist_loss),2))+","+str(sequenze_size)+","+str(n_lstm_layer)+","+ \
                       str(n_hidden_lstm)+","+str(n_hidden_fc)+","+str(round(time,1)))

## Initialize Trainer

In [13]:
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  scheduler=scheduler,
                  scheduler_active = param["cycling_lr"]["scheduler_active"],
                  criterion=criterion, 
                  location_model=param["filed_location"]["trained_model"], 
                  location_stats=param["filed_location"]["history"], 
                  patience=param['training']['patience']
                 )

## Training Loop
An epoch consists of a learning cycle over all batches of training data and an evaluation of the most recent model with the testing data. 

In [14]:
# Create lists to save training loss and validation loss of each epoch
hist_loss = []

for epoch in range(param['training']['n_epochs']):
    # Train with batches 
    mean_epoch_training_loss = trainer.train(data_loader_training)
    
    # Evaluate
    mean_epoch_validation_loss = trainer.evaluate(data_loader_validation, hist_loss, epoch)

    # Cache History
    trainer.cache_history_training(hist_loss, epoch, mean_epoch_training_loss, mean_epoch_validation_loss)

    # Save model if its the best one since the last change in configuration of hyperparameters
    status_ok = trainer.save_model(epoch, mean_epoch_validation_loss, param['model']['input_size'], 
                                   param['model']['lstm_layer'], param['model']['n_hidden_lstm'], 
                                   param['model']['n_hidden_fc'], param["model"]["sequence_size"])
    if not status_ok:
        break

# Safe results to csv file
df = pd.DataFrame(hist_loss)
df.to_csv(param["filed_location"]["history"], sep=";", index=False)

-------- epoch_no. 0 finished with eval loss 0.6965288090898663--------
Epoch 0: best model saved with loss: 0.6965288090898663
-------- epoch_no. 1 finished with eval loss 0.41500057962866166--------
Epoch 1: best model saved with loss: 0.41500057962866166
-------- epoch_no. 2 finished with eval loss 0.33616098931255045--------
Epoch 2: best model saved with loss: 0.33616098931255045
-------- epoch_no. 3 finished with eval loss 0.3079480937508881--------
Epoch 3: best model saved with loss: 0.3079480937508881
-------- epoch_no. 4 finished with eval loss 0.2902742980946749--------
Epoch 4: best model saved with loss: 0.2902742980946749
-------- epoch_no. 5 finished with eval loss 0.2674855360324492--------
Epoch 5: best model saved with loss: 0.2674855360324492
-------- epoch_no. 6 finished with eval loss 0.24910440946605328--------
Epoch 6: best model saved with loss: 0.24910440946605328


KeyboardInterrupt: 

# Training Phase 2: Consider mu and sigma, take pre-trained model form phase 1 

In [15]:
param_phase_2 = {
    "model" : {
        "path" : "../../../models/MLE_model/artifical_2_signalsFold xx_InputSize2_LayerLstm1_HiddenLstm15_HiddenFc75_Seq25.pt"
    }
}

In [16]:
K_phase_2 = 1

## Load pre-trained model

In [17]:
model = LstmMle_1(batch_size=param['model']['batch_size'], 
                 input_dim=param['model']['input_size'], 
                 n_hidden_lstm=param['model']['n_hidden_lstm'], 
                 n_layers=param['model']['lstm_layer'],
                 dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                 dropout_rate_fc= param['model']['dropout_rate_fc'],
                 n_hidden_fc=param['model']['n_hidden_fc'],
                 K = K_phase_2
                 )
checkpoint = torch.load(param_phase_2["model"]["path"])
model.load_state_dict(checkpoint['model_state_dict'])

RuntimeError: Error(s) in loading state_dict for LstmMle_1:
	size mismatch for lstm.weight_ih_l0: copying a param with shape torch.Size([60, 13]) from checkpoint, the shape in current model is torch.Size([60, 2]).
	size mismatch for fc_y_hat.weight: copying a param with shape torch.Size([13, 75]) from checkpoint, the shape in current model is torch.Size([2, 75]).
	size mismatch for fc_y_hat.bias: copying a param with shape torch.Size([13]) from checkpoint, the shape in current model is torch.Size([2]).
	size mismatch for fc_tau.weight: copying a param with shape torch.Size([13, 75]) from checkpoint, the shape in current model is torch.Size([2, 75]).
	size mismatch for fc_tau.bias: copying a param with shape torch.Size([13]) from checkpoint, the shape in current model is torch.Size([2]).

## Initialize MSE Loss function

In [65]:
criterion = LossMle(param["model"]["input_size"], param["model"]["batch_size"])

## Initialize Optimizer and Cyclic Learning Rate Scheduler

In [66]:
optimizer = torch.optim.SGD(model.parameters(), lr=1.)  
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer=optimizer, 
                                              base_lr=param['cycling_lr']['base_lr'], 
                                              max_lr=param['cycling_lr']['max_lr'], 
                                              step_size_up=param['cycling_lr']['step_size'], 
                                              mode=param['cycling_lr']['mode'],
                                              gamma=param['cycling_lr']['gamma']
                                             )

## Initialize Trainer

In [67]:
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  scheduler=scheduler,
                  scheduler_active = param["cycling_lr"]["scheduler_active"],
                  criterion=criterion, 
                  location_model=param["filed_location"]["trained_model"], 
                  location_stats=param["filed_location"]["history"], 
                  patience=param['training']['patience']
                 )

## Training Loop
An epoch consists of a learning cycle over all batches of training data and an evaluation of the most recent model with the testing data. 

In [68]:
# Create lists to save training loss and validation loss of each epoch
hist_loss = []
for epoch in range(param['training']['n_epochs']):
    # Train with batches 
    mean_epoch_training_loss = trainer.train(data_loader_training)
    
    # Evaluate
    mean_epoch_validation_loss = trainer.evaluate(data_loader_validation, hist_loss, epoch)

    # Cache History
    trainer.cache_history_training(hist_loss, epoch, mean_epoch_training_loss, mean_epoch_validation_loss)

    # Save model if its the best one since the last change in configuration of hyperparameters
    status_ok = trainer.save_model(epoch, mean_epoch_validation_loss, param['model']['input_size'], 
                                   param['model']['lstm_layer'], param['model']['n_hidden_lstm'], 
                                   param['model']['n_hidden_fc'], param["model"]["sequence_size"])
    if not status_ok:
        break

# Safe results to csv file
df = pd.DataFrame(hist_loss)
df.to_csv(param["filed_location"]["history"], sep=";", index=False)

-------- epoch_no. 0 finished with eval loss -1.1801360681922552--------
Epoch 0: best model saved with loss: -1.1801360681922552
-------- epoch_no. 1 finished with eval loss -2.310223784711626--------
Epoch 1: best model saved with loss: -2.310223784711626
-------- epoch_no. 2 finished with eval loss -2.6696690996333916--------
Epoch 2: best model saved with loss: -2.6696690996333916
-------- epoch_no. 3 finished with eval loss -2.88374100051801--------
Epoch 3: best model saved with loss: -2.88374100051801
-------- epoch_no. 4 finished with eval loss -2.9297657501943966--------
Epoch 4: best model saved with loss: -2.9297657501943966
-------- epoch_no. 5 finished with eval loss -2.859773428018087--------
-------- epoch_no. 6 finished with eval loss -3.3086625034956967--------
Epoch 6: best model saved with loss: -3.3086625034956967
-------- epoch_no. 7 finished with eval loss -3.5183817225217204--------
Epoch 7: best model saved with loss: -3.5183817225217204
-------- epoch_no. 8 fin

KeyboardInterrupt: 