In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# own Modules 
from models_mle import LstmMle_1, LstmMle_2, LstmMle_3
from data_set import DataSet
from data_preperator import DataPreperator
from trainer import Trainer
from loss_module import LossMle

# Train final model after cross validation showed generalisation ability of given model with its hyperparameters

## Parameters phm data

In [2]:
param = {
    "data" : {
        "path" : '../../../data/phm_data_challenge/recipe/dataset_for_each_recipe/training/training_recipe_67.csv' ,
    },
    "preprocessing" : {
        "first_order_difference" : False,
        "droped_features" : ["ID", "stage", "Lot", "runnum", "recipe", "recipe_step",
                            "up time", "ongoing time", 
                            "ETCHSOURCEUSAGE", "ETCHAUXSOURCETIMER", 
                            "ETCHAUX2SOURCETIMER", "FIXTURESHUTTERPOSITION", "ROTATIONSPEED"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 12,
        "n_hidden_lstm" : 15,
        "sequence_size" : 25,
        "batch_size" : 8,
        "lstm_layer" : 1,
        "n_hidden_fc_1": 75,
        "n_hidden_fc_2": 25,
        "dropout_rate_lstm": 0.0,
        "dropout_rate_fc": 0.2
    },
    "cycling_lr" : {
        "scheduler_active" : True, 
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 0.0001,
        "max_lr" :0.0005
    },
    "training": {
        "n_epochs" : 20,
        "patience" : 5,
    },
    "filed_location": {
        "trained_model" : "../../../models/MLE_model/phm_data_recipe_66",
        "history" : "../../visualisation/files/history_training/MLE/xxx.csv"
    }
}

## Parameters artifical signal

In [None]:
param = {
    "data" : { 
        "path" : '../../../data/cpps_degradation/train/samples_obs_space_train_sinusoid_20190902140748.csv' ,
    },
    "preprocessing" : {
        "first_order_difference" : False,
        "droped_features" : ["ID",
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 2,
        "n_hidden_lstm" : 15,
        "sequence_size" : 25,
        "batch_size" : 8,
        "lstm_layer" : 1,
        "n_hidden_fc_1": 75,
        "n_hidden_fc_2": 25,
        "dropout_rate_lstm": 0.0,
        "dropout_rate_fc": 0.2
    },
    "cycling_lr" : {
        "scheduler_active" : True,  
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 0.0001,
        "max_lr" :0.0005
    },
    "training": {
        "n_epochs" : 20,
        "patience" : 5,
    },
    "filed_location": {
        "trained_model" : "../../../models/MLE_model/artifical_2_signals",
        "history" : "../../visualisation/files/history_training/MLE/xxx.csv"
    }
}

## Parameters cpps Data

In [None]:
param = {
    "data" : {
        "path" : '../../../data/cpps_degradation/cpps_data_degradation_training.csv' ,
    },
    "preprocessing" : {
        "first_order_difference" : False,
        "droped_features" : ["ID"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 10,
        "n_hidden_lstm" : 15,
        "sequence_size" : 25,
        "batch_size" : 8,
        "lstm_layer" : 1,
        "n_hidden_fc_1": 75,
        "n_hidden_fc_2": 25,
        "dropout_rate_lstm": 0.0,
        "dropout_rate_fc": 0.2
    },
    "cycling_lr" : {
        "scheduler_active" : True, 
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 0.0001,
        "max_lr" :0.0005
    },
    "training": {
        "n_epochs" : 20,
        "patience" : 5,
    },
    "filed_location": {
        "trained_model" : "../../../models/MLE_model/degeneration_cpps_data",
        "history" : "../../visualisation/files/history_training/MLE/xxx.csv"
    }
}

## Preprocessing
1. First order difference (if selected)
2. Split data into train and validation data
3. Scale train and validation data with train's mean and variance

In [3]:
train_loader = DataPreperator(path=param['data']['path'], 
                              ignored_features=param['preprocessing']['droped_features'],
                              stake_training_data=0.99,
                              features_not_to_scale=param['preprocessing']['features_not_to_scale'],
                              first_order_difference=param["preprocessing"]["first_order_difference"])
train_data, _ = train_loader.prepare_data()
print(len(train_data))

29219


### Mean and variance from scale process (only of continious features)

In [4]:
mean, var = train_loader.provide_statistics()
print(mean)
print(var)

[ 0.00236702  0.3953089   0.48772743  0.3857511   0.49987399  0.06291772
 -0.03491417  0.43371134  0.23365129 -0.06136357 -0.12245359  0.2516167 ]
[3.62155978e-04 7.33502893e-01 8.30760891e-01 7.06805763e-01
 9.44752420e-01 4.07861536e-01 1.61079596e-01 8.46321709e-01
 3.65172841e-01 6.37131077e-01 4.60727666e-11 9.93519995e-01]


## Initialize Dataset 
Time series data must be transformed into a structure of samples with `input` and `target` components before it can be used to fit a supervised learning model. <br>
For a time series interested in one-step predictions, the observations at prior time steps, so-called lag observations, are used as `input` and the `target` is the observation at the current time step.

For example, a univariate series can be expressed as a supervised learning problem with three time steps for `input` and one step as `target`, as follows:

|input|target|
|-----|------|
[1, 2, 3]|[4]
[2, 3, 4]|[5]
[3, 4, 5]|[6]

The Keras deep learning library provides the `TimeseriesGenerator` to automatically transform both univariate and multivariate time series data into such a format. 

In [None]:
dataset_train = DataSet(train_data, timesteps=param["model"]["sequence_size"])

## Initialize DataLoader
Actually the data has a other size than in the table above because of multivariate time series data and because of using batches. <br>
__First dimension__: batch size --> Defines the number of samples that will be propagated through the network simultaneously. <br>
__Second dimension:__ timesteps --> Number of sequence which is passed into the LSTM <br>
__Third dimension:__ input_dim --> Is the number of features. In this case data from 7 sensors, collected at the same time. <br>

![](../../../knowledge/pictures/input_shape.png)

Data is shuffled because each mini batch is indipendent from each other, but samples of a minibatch are in chronological order.

In [None]:
data_loader_training = DataLoader(dataset_train, 
                                  batch_size=param["model"]["batch_size"], 
                                  num_workers=0, 
                                  shuffle=True, 
                                  drop_last=True
                                 )

In [None]:
for batch_idx, data in enumerate(data_loader_training):
    x,y = data
    print('Data of batch: {}'.format(batch_idx))
    print("Size of input data: {}".format(x.size()))
    print("Size of target data: {}".format(y.size()))
    if batch_idx >=1: break

# Training Phase 1: Only consider mu
Set tau=0 --> sigma=1

In [None]:
K_phase_1=0

## Initialize Neural Network
### Architecture 1 - Normal 

In [None]:
torch.manual_seed(0)
model = LstmMle_1(batch_size=param['model']['batch_size'], 
                 input_dim=param['model']['input_size'], 
                 n_hidden_lstm=param['model']['n_hidden_lstm'], 
                 n_layers=param['model']['lstm_layer'],
                 dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                 dropout_rate_fc= param['model']['dropout_rate_fc'],
                 n_hidden_fc=param['model']['n_hidden_fc_1'],
                 K = K_phase_1
                 )

### Architecture 2 - Seperate network for mu and tau

In [None]:
torch.manual_seed(0)
model = LstmMle_2(batch_size=param['model']['batch_size'], 
                 input_dim=param['model']['input_size'], 
                 n_hidden_lstm=param['model']['n_hidden_lstm'], 
                 n_layers=param['model']['lstm_layer'],
                 dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                 dropout_rate_fc= param['model']['dropout_rate_fc'],
                 n_hidden_fc=param['model']['n_hidden_fc_1'],
                 K = K_phase_1
                 )

### Architecture 3 - Seperate network for mu and tau which is deeper than LstmMle_2

In [None]:
torch.manual_seed(0)
model = LstmMle_3(batch_size=param['model']['batch_size'], 
                 input_dim=param['model']['input_size'], 
                 n_hidden_lstm=param['model']['n_hidden_lstm'], 
                 n_layers=param['model']['lstm_layer'],
                 dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                 dropout_rate_fc= param['model']['dropout_rate_fc'],
                 n_hidden_fc_1=param['model']['n_hidden_fc_1'], 
                 n_hidden_fc_2=param['model']['n_hidden_fc_2'],
                 K = K_phase_1
                 )

## Initialize MLE Loss function as torch module

In [None]:
criterion = LossMle(param["model"]["input_size"], param["model"]["batch_size"])

## Initialize Optimizer and Cyclic Learning Rate Scheduler

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=1.)  
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer=optimizer, 
                                              base_lr=param['cycling_lr']['base_lr'], 
                                              max_lr=param['cycling_lr']['max_lr'], 
                                              step_size_up=(len(train_data)/param['model']['batch_size'])*2, # Authors of Cyclic LR suggest setting step_size 2-8 x training iterations in epoch.
                                              mode=param['cycling_lr']['mode'],
                                              gamma=param['cycling_lr']['gamma']
                                             )

## Initialize Trainer

In [None]:
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  scheduler=scheduler,
                  scheduler_active = param["cycling_lr"]["scheduler_active"],
                  criterion=criterion, 
                  location_model=param["filed_location"]["trained_model"], 
                  location_stats=param["filed_location"]["history"], 
                  patience=param['training']['patience']
                 )

## Training Loop
An epoch consists of a learning cycle over all batches of training data and an evaluation of the most recent model with the testing data. 

In [None]:
# Create lists to save training loss and validation loss of each epoch
hist_loss = []

for epoch in range(param['training']['n_epochs']):
    # Train with batches 
    mean_epoch_training_loss = trainer.train(data_loader_training)

    # Save model if its the best one since the last change in configuration of hyperparameters
    status_ok, path_model_phase_1 = trainer.save_model(epoch, mean_epoch_training_loss, param['model']['input_size'], 
                                                       param['model']['lstm_layer'], param['model']['n_hidden_lstm'], 
                                                       param['model']['n_hidden_fc_1'], param["model"]["sequence_size"])
    if not status_ok:
        break
print("Finished training phase 1")

# Training Phase 2: Consider mu and sigma, take pre-trained model form phase 1 

In [None]:
param_phase_2 = {
    "model" : {
        "path" : path_model_phase_1
    },
    "cycling_lr" : {
        "scheduler_active" : True, 
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 0.0001, 
        "max_lr" : 0.0005
    },
    "filed_location": {
        "history" : "../../visualisation/files/history_training/MLE/phase2_phm_data_recipe_66.csv"
    }
}

In [None]:
K_phase_2 = 1

## Initialize Neural Network
### Architecture 1 - Normal 

In [None]:
model = LstmMle_1(batch_size=param['model']['batch_size'], 
                 input_dim=param['model']['input_size'], 
                 n_hidden_lstm=param['model']['n_hidden_lstm'], 
                 n_layers=param['model']['lstm_layer'],
                 dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                 dropout_rate_fc= param['model']['dropout_rate_fc'],
                 n_hidden_fc=param['model']['n_hidden_fc_1'],
                 K = K_phase_2
                 )
checkpoint = torch.load(param_phase_2["model"]["path"])
model.load_state_dict(checkpoint['model_state_dict'])

### Architecture 2 - Seperate network for mu and tau

In [None]:
torch.manual_seed(0)
model = LstmMle_2(batch_size=param['model']['batch_size'], 
                 input_dim=param['model']['input_size'], 
                 n_hidden_lstm=param['model']['n_hidden_lstm'], 
                 n_layers=param['model']['lstm_layer'],
                 dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                 dropout_rate_fc= param['model']['dropout_rate_fc'],
                 n_hidden_fc=param['model']['n_hidden_fc_1'],
                 K = K_phase_1
                 )
checkpoint = torch.load(param_phase_2["model"]["path"])
model.load_state_dict(checkpoint['model_state_dict'])

### Architecture 3 - Seperate network for mu and tau which is deeper than LstmMle_2

In [None]:
torch.manual_seed(0)
model = LstmMle_3(batch_size=param['model']['batch_size'], 
                 input_dim=param['model']['input_size'], 
                 n_hidden_lstm=param['model']['n_hidden_lstm'], 
                 n_layers=param['model']['lstm_layer'],
                 dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                 dropout_rate_fc= param['model']['dropout_rate_fc'],
                 n_hidden_fc_1=param['model']['n_hidden_fc_1'], 
                 n_hidden_fc_2=param['model']['n_hidden_fc_2'],
                 K = K_phase_1
                 )

## Initialize MLE Loss function

In [None]:
criterion = LossMle(param["model"]["input_size"], param["model"]["batch_size"])

## Initialize Optimizer and Cyclic Learning Rate Scheduler

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=1.)  
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer=optimizer, 
                                              base_lr=param_phase_2['cycling_lr']['base_lr'], 
                                              max_lr=param_phase_2['cycling_lr']['max_lr'], 
                                              step_size_up=(len(train_data)/param['model']['batch_size'])*2,  # Authors of Cyclic LR suggest setting step_size 2-8 x training iterations in epoch.
                                              mode=param_phase_2['cycling_lr']['mode'],
                                              gamma=param_phase_2['cycling_lr']['gamma']
                                             )

## Initialize Trainer

In [None]:
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  scheduler=scheduler,
                  scheduler_active = param["cycling_lr"]["scheduler_active"],
                  criterion=criterion, 
                  location_model=param["filed_location"]["trained_model"], 
                  location_stats=param["filed_location"]["history"], 
                  patience=param['training']['patience']
                 )

## Training Loop
An epoch consists of a learning cycle over all batches of training data and an evaluation of the most recent model with the testing data. 

In [None]:
# Create lists to save training loss and validation loss of each epoch
hist_loss = []
for epoch in range(param['training']['n_epochs']):
    # Train with batches 
    mean_epoch_training_loss = trainer.train(data_loader_training)

    # Save model if its the best one since the last change in configuration of hyperparameters
    status_ok = trainer.save_model(epoch, mean_epoch_validation_loss, param['model']['input_size'], 
                                   param['model']['lstm_layer'], param['model']['n_hidden_lstm'], 
                                   param['model']['n_hidden_fc_1'], param["model"]["sequence_size"])
    if not status_ok:
        break

print("Finished training phase 2")