In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from random import randint

# own Modules 
from models_mle import LstmMle
from data_set import DataSetSensors
from data_preperator import DataPreperator
from trainer import Trainer
from loss_module import LossMle
from logger import Logger

# Train final model after cross validation showed generalisation ability of given model with its hyperparameters

## Parameters phm data

In [None]:
param = {
    "data" : {
        "path" : '../../../data/phm_data_challenge/recipe/dataset_for_each_recipe/training/training_recipe_67.csv' ,
    },
    "preprocessing" : {
        "first_order_difference" : False,
        "droped_features" : ["ID", "stage", "Lot", "runnum", "recipe", "recipe_step",
                            "up time", "ongoing time", 
                            "ETCHSOURCEUSAGE", "ETCHAUXSOURCETIMER", 
                            "ETCHAUX2SOURCETIMER", "FIXTURESHUTTERPOSITION", "ROTATIONSPEED"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 12,
        "n_hidden_lstm" : 15,
        "sequence_size" : 25,
        "batch_size" : 8,
        "lstm_layer" : 1,
        "n_hidden_fc_1": 75,
        "n_hidden_fc_2": 25,
        "dropout_rate_lstm": 0.0,
        "dropout_rate_fc": 0.2
    },
    "cycling_lr" : {
        "scheduler_active" : True, 
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 0.0001,
        "max_lr" :0.0005
    },
    "training": {
        "n_epochs" : 30,
        "patience" : 5,
    },
    "filed_location": {
        "trained_model" : "../../../models/MLE_model/phm67_",
        "log_file" : "../../../models/MLE_model/0_logs/phm_"
    }
}

## Parameters artifical signal

In [None]:
param = {
    "data" : { 
        "path" : '../../../data/artifical_signals/artifical_2_signals.csv' ,
    },
    "preprocessing" : {
        "first_order_difference" : False,
        "droped_features" : ["ID",
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 2,
        "n_hidden_lstm" : 15,
        "sequence_size" : 25,
        "batch_size" : 8,
        "lstm_layer" : 1,
        "n_hidden_fc_1": 75,
        "n_hidden_fc_2": 25,
        "dropout_rate_lstm": 0.0,
        "dropout_rate_fc": 0.2
    },
    "cycling_lr" : {
        "scheduler_active" : True,  
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 0.0001,
        "max_lr" :0.0005
    },
    "training": {
        "n_epochs" : 2,
        "patience" : 5,
    },
    "filed_location": {
        "trained_model" : "../../../models/MLE_model/art2_",
        "log_file" : "../../../models/MLE_model/0_logs/artifical_"
    }
}

## Parameters cpps Data

In [None]:
param = {
    "data" : {
        "path" : '../../../data/cpps_degradation_new/data_obs10/train/obs_space_train_sinusiod_preprocessed.csv' ,
    },
    "preprocessing" : {
        "first_order_difference" : False,
        "droped_features" : ["ID"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 10,
        "n_hidden_lstm" : 75,
        "sequence_size" : 25,
        "batch_size" : 8,
        "lstm_layer" : 1,
        "n_hidden_fc_1": 75,
        "n_hidden_fc_2": 25,
        "dropout_rate_lstm": 0.0,
        "dropout_rate_fc": 0.2
    },
    "cycling_lr" : {
        "scheduler_active" : True, 
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 0.0001,
        "max_lr" :0.0005
    },
    "training": {
        "n_epochs" : 30,
        "patience" : 5,
    },
    "filed_location": {
        "trained_model" : "../../../models/MLE_model/cpps_A2_",
        "log_file" : "../../../models/MLE_model/0_logs/cpps_A2_"
    }
}

## Parameters variance dataset

In [None]:
param = {
    "data" : {
        "path" : '../../../data/variation_in_variance/dataset_increasing_variance.csv' ,
    },
    "preprocessing" : {
        "first_order_difference" : False,
        "droped_features" : ["ID","time", "mu", "sigma" 
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 1,
        "n_hidden_lstm" : 2,
        "sequence_size" : 36,
        "batch_size" : 2,
        "lstm_layer" : 1,
        "n_hidden_fc_1": 5,
        "n_hidden_fc_2": 25,
        "dropout_rate_lstm": 0.0,
        "dropout_rate_fc": 0.2
    },
    "cycling_lr" : {
        "scheduler_active" : True, 
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 0.0001,
        "max_lr" :0.0005
    },
    "training": {
        "n_epochs" : 30,
        "patience" : 5,
    },
    "filed_location": {
        "trained_model" : "../../../models/MLE_model/variance_dataset_increasing_",
        "log_file" : "../../../models/MLE_model/0_logs/variance_dataset_increasing_"
    }
}

## Preprocessing
1. First order difference (if selected)
2. Split data into train and validation data
3. Scale train and validation data with train's mean and variance

In [None]:
train_loader = DataPreperator(path=param['data']['path'], 
                              ignored_features=param['preprocessing']['droped_features'],
                              stake_training_data=0.8,
                              features_not_to_scale=param['preprocessing']['features_not_to_scale'],
                              first_order_difference=param["preprocessing"]["first_order_difference"])
train_data, _ = train_loader.prepare_data()
print(len(train_data))

### Mean and variance from scale process (only of continious features)

In [None]:
mean, var = train_loader.provide_statistics()
print(mean)
print(var)

## Initialize Dataset 
Time series data must be transformed into a structure of samples with `input` and `target` components before it can be used to fit a supervised learning model. <br>
For a time series interested in one-step predictions, the observations at prior time steps, so-called lag observations, are used as `input` and the `target` is the observation at the current time step.

For example, a univariate series can be expressed as a supervised learning problem with three time steps for `input` and one step as `target`, as follows:

|input|target|
|-----|------|
[1, 2, 3]|[4]
[2, 3, 4]|[5]
[3, 4, 5]|[6]

The Keras deep learning library provides the `TimeseriesGenerator` to automatically transform both univariate and multivariate time series data into such a format. 

In [None]:
dataset_train = DataSetSensors(train_data, timesteps=param["model"]["sequence_size"])

## Initialize DataLoader
Actually the data has a other size than in the table above because of multivariate time series data and because of using batches. <br>
__First dimension__: batch size --> Defines the number of samples that will be propagated through the network simultaneously. <br>
__Second dimension:__ timesteps --> Number of sequence which is passed into the LSTM <br>
__Third dimension:__ input_dim --> Is the number of features. In this case data from 7 sensors, collected at the same time. <br>

Data is shuffled because each mini batch is indipendent from each other, but samples of a minibatch are in chronological order.

In [None]:
data_loader_training = DataLoader(dataset_train, 
                                  batch_size=param["model"]["batch_size"], 
                                  num_workers=0, 
                                  shuffle=True, 
                                  drop_last=True
                                 )

In [None]:
for batch_idx, data in enumerate(data_loader_training):
    x,y = data
    print('Data of batch: {}'.format(batch_idx))
    print("Size of input data: {}".format(x.size()))
    print("Size of target data: {}".format(y.size()))
    if batch_idx >=1: break

# Training Phase 1: Only consider mu
Set tau=0 --> sigma=1

In [None]:
K_phase_1=0

## Initialize Neural Network
### Options 1-4:
#### Architecture 1 - One FCNN (2 layers, last splited for mu and sigma)
#### Architecture 2 - Two seperate FCNN for mu and sigma each (2 layers)
#### Architecture 3 - Two seperate FCNN for mu and sigma each (3 layers)
#### Architecture 4 - Two complete seperate subnetworks (from LSTM layer to last FC layer)

In [None]:
torch.manual_seed(0)
model = LstmMle(batch_size=param['model']['batch_size'], 
                input_dim=param['model']['input_size'], 
                n_hidden_lstm=param['model']['n_hidden_lstm'], 
                n_layers=param['model']['lstm_layer'],
                dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                dropout_rate_fc= param['model']['dropout_rate_fc'],
                n_hidden_fc_1=param['model']['n_hidden_fc_1'],
                K = K_phase_1,
                option = 1
                )
log_message_architecture = "Architecture: LSTM module and a subsequent FCNN (2 layers, last splited for mu and sigma)"

## Initialize MLE Loss function as torch module

In [None]:
criterion = LossMle(param["model"]["input_size"], param["model"]["batch_size"])

## Initialize Optimizer and Cyclic Learning Rate Scheduler

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=1.)  
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer=optimizer, 
                                              base_lr=param['cycling_lr']['base_lr'], 
                                              max_lr=param['cycling_lr']['max_lr'], 
                                              step_size_up=(len(train_data)/param['model']['batch_size'])*2, # Authors of Cyclic LR suggest setting step_size 2-8 x training iterations in epoch.
                                              mode=param['cycling_lr']['mode'],
                                              gamma=param['cycling_lr']['gamma']
                                             )

## Initialize Trainer

In [None]:
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  scheduler=scheduler,
                  scheduler_active = param["cycling_lr"]["scheduler_active"],
                  criterion=criterion, 
                  location_model=param["filed_location"]["trained_model"], 
                  patience=param['training']['patience']
                 )

## Training Loop
An epoch consists of a learning cycle over all batches of training data and an evaluation of the most recent model with the testing data. 

In [None]:
# Initialise Logger
session_id = str(randint(10000, 99999))
logger = Logger(param["filed_location"]["log_file"], session_id)

# Log model architecture and training configuration
logger.log_message("Architecture and Training configuration:")
logger.log_message("Loss function: MLE")
logger.log_message(log_message_architecture)
logger.log_message("Batch size: {}".format(param['model']['batch_size']))
logger.log_message("Input size: {}".format(param['model']['input_size']))
logger.log_message("Sequence length: {}".format(param["model"]["sequence_size"]))
logger.log_message("Hidden units LSTM: {}".format(param['model']['n_hidden_lstm']))
logger.log_message("Amount LSTM layer: {}".format(param['model']['lstm_layer']))
logger.log_message("Dropout rate LSTM: {}".format(param['model']['dropout_rate_lstm']))
logger.log_message("Dropout rate fc NN: {}".format(param['model']['dropout_rate_fc']))
logger.log_message("Hidden units fc1: {}".format(param['model']['n_hidden_fc_1']))
logger.log_message("Hidden units fc2: {}".format(param['model']['n_hidden_fc_1']))
logger.log_message("Cycling LR mode: {}".format(param['cycling_lr']['mode']))
logger.log_message("Cycling LR base LR: {}".format(param['cycling_lr']['base_lr']))
logger.log_message("Cycling LR max LR: {}".format(param['cycling_lr']['max_lr']))
logger.log_message("- -"*20)

print("Training phase 1 is started")
logger.log_message("Training phase 1 is started")

# Create lists to save training loss and validation loss of each epoch
hist_loss = []

for epoch in range(param['training']['n_epochs']):
    # Train with batches 
    mean_epoch_training_loss = trainer.train(data_loader_training)

    # Save model if its the best one since the last change in configuration of hyperparameters
    status_ok, path_model_phase_1 = trainer.save_model(epoch, mean_epoch_training_loss, session_id)
    
    # Log information of current epoch
    logger.log_current_statistics(epoch, mean_epoch_training_loss)
        
    if not status_ok:
        break
print("Training phase 1 is finished")
logger.log_message("Training phase 1 is finished")

# Training Phase 2: Consider mu and sigma, take pre-trained model form phase 1 

In [None]:
param_phase_2 = {
    "model" : {
        "path" : path_model_phase_1
    },
    "cycling_lr" : {
        "scheduler_active" : True, 
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 0.0001, 
        "max_lr" : 0.0005
    },
    "filed_location": {
        "history" : "../../visualisation/files/history_training/MLE/phase2_phm_data_recipe_66.csv"
    }
}

In [None]:
K_phase_2 = 1

## Initialize Neural Network
### Options 1-4:
#### Architecture 1 - One FCNN (2 layers, last splited for mu and sigma)
#### Architecture 2 - Two seperate FCNN for mu and sigma each (2 layers)
#### Architecture 3 - Two seperate FCNN for mu and sigma each (3 layers)
#### Architecture 4 - Two complete seperate subnetworks (from LSTM layer to last FC layer)

In [None]:
torch.manual_seed(0)
model = LstmMle_1(batch_size=param['model']['batch_size'], 
                 input_dim=param['model']['input_size'], 
                 n_hidden_lstm=param['model']['n_hidden_lstm'], 
                 n_layers=param['model']['lstm_layer'],
                 dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                 dropout_rate_fc= param['model']['dropout_rate_fc'],
                 n_hidden_fc_1=param['model']['n_hidden_fc_1'],
                 K = K_phase_1,
                 option = 1
                 )
log_message_architecture = "Architecture: LSTM module and a subsequent FCNN (2 layers, last splited for mu and sigma)"

## Initialize MLE Loss function

In [None]:
criterion = LossMle(param["model"]["input_size"], param["model"]["batch_size"])

## Initialize Optimizer and Cyclic Learning Rate Scheduler

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=1.)  
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer=optimizer, 
                                              base_lr=param_phase_2['cycling_lr']['base_lr'], 
                                              max_lr=param_phase_2['cycling_lr']['max_lr'], 
                                              step_size_up=(len(train_data)/param['model']['batch_size'])*2,  # Authors of Cyclic LR suggest setting step_size 2-8 x training iterations in epoch.
                                              mode=param_phase_2['cycling_lr']['mode'],
                                              gamma=param_phase_2['cycling_lr']['gamma']
                                             )

## Initialize Trainer

In [None]:
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  scheduler=scheduler,
                  scheduler_active = param["cycling_lr"]["scheduler_active"],
                  criterion=criterion, 
                  location_model=param["filed_location"]["trained_model"],
                  patience=param['training']['patience']
                 )

## Training Loop
An epoch consists of a learning cycle over all batches of training data and an evaluation of the most recent model with the testing data. 

In [None]:
# Start Training phase 2
logger.log_message("Training phase 2 is started")

for epoch in range(param['training']['n_epochs']):
    # Train with batches 
    mean_epoch_training_loss = trainer.train(data_loader_training)

    # Save model if its the best one since the last change in configuration of hyperparameters
    status_ok = trainer.save_model(epoch, mean_epoch_training_loss, session_id)
    
    # Log information of current epoch
    logger.log_current_statistics(epoch, mean_epoch_training_loss)
    
    if not status_ok:
        break

print("Training phase 2 is finished")
logger.log_message("Training phase 2 is finished")