In [2]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from random import randint
import os

# own Modules 
from models_mse import LstmMse
from data_set import DataSetSensors
from loss_module import LossMse
from data_preperator import DataPreperator, DataPreperatorPrediction
from trainer import Trainer
from logger import Logger
from anomaly_classifier import AnomalyClassifier
from predictor import PredictorMse

# Train final model after cross validation showed generalisation ability of given model with its hyperparameters

## Parameters

In [3]:
param = {
    "data" : {
        "path" : '../../../data/phm_data_challenge/recipe/dataset_for_each_recipe/training/training_recipe_67.csv' ,
    },
    "preprocessing" : {
        "first_order_difference" : False,
        "droped_features_training": ["ID", "stage", "Lot", "runnum", "recipe", "recipe_step",
                                     "up time", "ongoing time", 
                                     "ETCHSOURCEUSAGE", "ETCHAUXSOURCETIMER", 
                                     "ETCHAUX2SOURCETIMER", "FIXTURESHUTTERPOSITION", "ROTATIONSPEED"
                                     ],
        "droped_features_prediction": ["stage", "Lot", "runnum", "recipe", "recipe_step",
                            "up time", "ongoing time", 
                            "ETCHSOURCEUSAGE", "ETCHAUXSOURCETIMER", 
                            "ETCHAUX2SOURCETIMER", "FIXTURESHUTTERPOSITION", "ROTATIONSPEED"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 12,
        "n_hidden_lstm" : 75,
        "sequence_size" : 75,
        "batch_size" : 8,
        "lstm_layer" : 1,
        "n_hidden_fc": 50,
        "dropout_rate_lstm": 0.0,
        "dropout_rate_fc": 0.2
    },
    "cycling_lr" : {
        "scheduler_active" : True, 
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 0.001, 
        "max_lr" :0.05
    },
    "training": {
        "n_epochs" : 50,
        "patience" : 10,
    },
    "filed_location": {
        "trained_model" : "../../../models/MSE_model/phm_data_",
        "log_file" : "../../../models/MSE_model/0_logs/phm_",
        "temp_path" : "./temp_file.csv"
    },
}

## Preprocessing
1. First order difference (if selected)
2. Split data into train and validation data
3. Scale train and validation data with train's mean and variance

In [4]:
train_loader = DataPreperator(path=param['data']['path'], 
                              ignored_features=param['preprocessing']['droped_features_training'],
                              stake_training_data=0.8,
                              features_not_to_scale=param['preprocessing']['features_not_to_scale'],
                              first_order_difference=param["preprocessing"]["first_order_difference"])
train_data, _ = train_loader.prepare_data()
print(len(train_data))

23611


### Mean and variance from scale process (only of continious features)

In [5]:
mean_training_data, var_training_data = train_loader.provide_statistics()
print(mean_training_data)
print(var_training_data)

[ 0.00219541  0.39942487  0.48849002  0.39137344  0.49645521  0.06358841
 -0.03362983  0.42847005  0.23177262 -0.05862687 -0.12245322  0.26657224]
[3.63299444e-04 7.33308098e-01 8.24869101e-01 7.06758840e-01
 9.33803748e-01 4.13773967e-01 1.64392960e-01 8.44223904e-01
 3.70581250e-01 6.34987303e-01 4.49881967e-11 1.02046564e+00]


## Initialize Dataset 
Time series data must be transformed into a structure of samples with `input` and `target` components before it can be used to fit a supervised learning model. <br>
For a time series interested in one-step predictions, the observations at prior time steps, so-called lag observations, are used as `input` and the `target` is the observation at the current time step.

For example, a univariate series can be expressed as a supervised learning problem with three time steps for `input` and one step as `target`, as follows:

|input|target|
|-----|------|
[1, 2, 3]|[4]
[2, 3, 4]|[5]
[3, 4, 5]|[6]

The Keras deep learning library provides the `TimeseriesGenerator` to automatically transform both univariate and multivariate time series data into such a format. 

In [6]:
dataset_train = DataSetSensors(train_data, timesteps=param["model"]["sequence_size"])

TypeError: __init__() got an unexpected keyword argument 'target'

## Initialize DataLoader
Actually the data has a other size than in the table above because of multivariate time series data and because of using batches. <br>
__First dimension__: batch size --> Defines the number of samples that will be propagated through the network simultaneously. <br>
__Second dimension:__ timesteps --> Number of sequence which is passed into the LSTM <br>
__Third dimension:__ input_dim --> Is the number of features. In this case data from 7 sensors, collected at the same time. <br>

![](../../../knowledge/pictures/input_shape.png)

Data is shuffled because each mini batch is indipendent from each other, but samples of a minibatch are in chronological order.

In [7]:
data_loader_training = DataLoader(dataset_train, 
                                  batch_size=param["model"]["batch_size"], 
                                  num_workers=0, 
                                  shuffle=True, 
                                  drop_last=True
                                 )

In [8]:
for batch_idx, data in enumerate(data_loader_training):
    x,y = data
    print('Data of batch: {}'.format(batch_idx))
    print("Size of input data: {}".format(x.size()))
    print("Size of target data: {}".format(y.size()))
    if batch_idx >=1: break

Data of batch: 0
Size of input data: torch.Size([8, 25, 2])
Size of target data: torch.Size([8, 2])
Data of batch: 1
Size of input data: torch.Size([8, 25, 2])
Size of target data: torch.Size([8, 2])


## Initialize Neural Network
__Parameters for LSTM Modul:__
- input_size : The number of expected features in the input x
- hidden_size :The number of features in the hidden state h
- num_layers : Number of recurrent layers. E.g., setting num_layers=2 would mean stacking two LSTMs together to form a stacked LSTM, with the second LSTM taking in outputs of the first LSTM and computing the final results.
- batch_first : If True, then the input __and output__ tensors are provided as (batch, seq, feature).
- dropout – If non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to dropout. Default: 0

In [21]:
torch.manual_seed(0)
model = LstmMse(batch_size=param['model']['batch_size'], 
                input_dim=param['model']['input_size'], 
                n_hidden_lstm=param['model']['n_hidden_lstm'], 
                n_layers=param['model']['lstm_layer'],
                dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                dropout_rate_fc= param['model']['dropout_rate_fc'],
                n_hidden_fc=param['model']['n_hidden_fc']
                )

## Define MSE Loss function as torch module

In [22]:
criterion = LossMse(param["model"]["input_size"], param["model"]["batch_size"])

## Initialize Optimizer and Cyclic Learning Rate Scheduler

In [23]:
optimizer = torch.optim.SGD(model.parameters(), lr=1.)  
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer=optimizer, 
                                              base_lr=param['cycling_lr']['base_lr'], 
                                              max_lr=param['cycling_lr']['max_lr'], 
                                              step_size_up=(len(train_data)/param['model']['batch_size'])*2, # Authors of Cyclic LR suggest setting step_size 2-8 x training iterations in epoch.
                                              mode=param['cycling_lr']['mode'],
                                              gamma=param['cycling_lr']['gamma']
                                             )

## Initialize Trainer

In [24]:
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  scheduler=scheduler,
                  scheduler_active = param["cycling_lr"]["scheduler_active"],
                  criterion=criterion, 
                  location_model=param["filed_location"]["trained_model"], 
                  patience=param['training']['patience']
                 )

## Training Loop
An epoch consists of a learning cycle over all batches of training data and an evaluation of the most recent model with the testing data. 

In [25]:
# Initialise Logger
session_id = str(randint(10000, 99999))
logger = Logger(param["filed_location"]["log_file"], session_id)

# Log model architecture and training configuration
logger.log_message("Architecture and Training configuration:")
logger.log_message("Loss function: MSE")
logger.log_message("Batch size: {}".format(param['model']['batch_size']))
logger.log_message("Input size: {}".format(param['model']['input_size']))
logger.log_message("Sequence length: {}".format(param["model"]["sequence_size"]))
logger.log_message("Hidden units LSTM: {}".format(param['model']['n_hidden_lstm']))
logger.log_message("Amount LSTM layer: {}".format(param['model']['lstm_layer']))
logger.log_message("Dropout rate LSTM: {}".format(param['model']['dropout_rate_lstm']))
logger.log_message("Dropout rate fc NN: {}".format(param['model']['dropout_rate_fc']))
logger.log_message("Hidden units fc: {}".format(param['model']['n_hidden_fc']))
logger.log_message("Cycling LR mode: {}".format(param['cycling_lr']['mode']))
logger.log_message("Cycling LR base LR: {}".format(param['cycling_lr']['base_lr']))
logger.log_message("Cycling LR max LR: {}".format(param['cycling_lr']['max_lr']))
logger.log_message("- -"*20)

print("Training phase is started")
logger.log_message("Training phase is started")
torch.manual_seed(0)

for epoch in range(param['training']['n_epochs']):
    # Train with batches 
    mean_epoch_training_loss = trainer.train(data_loader_training)

    # Save model if its the best one since the last change in configuration of hyperparameters
    status_ok = trainer.save_model(epoch, mean_epoch_training_loss, session_id)
    
    # Log information of current epoch
    logger.log_current_statistics(epoch, mean_epoch_training_loss)
    
    if not status_ok:
        break

print("Training phase is finished")
logger.log_message("Training phase is finished")

Training phase is started
Epoch 0: best model saved with loss: 0.16020687000072864


KeyboardInterrupt: 

## Get Mean Reconstruction Error of Training Set

In [26]:
# Load trained model
model = LstmMse(batch_size=param['model']['batch_size'], 
                input_dim=param['model']['input_size'], 
                n_hidden_lstm=param['model']['n_hidden_lstm'], 
                n_hidden_fc=param['model']['n_hidden_fc'], 
                n_layers=param['model']['lstm_layer'], 
                dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                dropout_rate_fc= param['model']['dropout_rate_fc']
                )
checkpoint = torch.load(param["filed_location"]["trained_model"]+"id"+str(session_id))
model.load_state_dict(checkpoint['model_state_dict'])

# Initialize data loader
data_preperator = DataPreperatorPrediction(path=param['data']['path'], 
                                           ignored_features = param["preprocessing"]["droped_features_prediction"],
                                           mean_training_data=mean_training_data, 
                                           var_training_data=var_training_data, 
                                           first_order_difference=False 
                                          )                                  
preprocessed_data = data_preperator.prepare_data()
dataset = DataSet(preprocessed_data, 
                  timesteps=param["model"]["sequence_size"])
data_loader = DataLoader(dataset, 
                         batch_size=param['model']['batch_size'], 
                         num_workers=0, 
                         shuffle=False, 
                         drop_last=True)

# Initialize predictor
predictor = PredictorMse(model=model,
                         path_data=param["data"]["path"],
                         columns_to_ignore=param["preprocessing"]["droped_features_prediction"]
                         )

print("Start predicting.")
# Write header
with open(param["filed_location"]["temp_path"], "a+") as file:
            [file.write(column+";") for column in predictor.create_column_names_result()]
            file.write("\n")

for batch_number, (input_data, target_data) in enumerate(data_loader):
    # Predict sensor values in mini-batches
    batch_results = predictor.predict(input_data, target_data)
    
    # Write results to csv file
    with open(param["filed_location"]["temp_path"], "a") as file:
        for batch in batch_results:
            # Each result component of a singe prediction (ID, target, prediction, loss, latent space ...) is stored in lists
            # thus we have to unpack the list and seperate values with ;
            for value in batch:
                file.write(str(value)+";")
            file.write("\n")

    # Print status 
    if (batch_number*param['model']['batch_size'])%5000 == 0:
        print("Current status: " + str(param['model']['batch_size']*batch_number) + " samples are predicted.")

print("End of prediction.")

Start predicting.
Current status: 0 samples are predicted.
Current status: 5000 samples are predicted.
Current status: 10000 samples are predicted.
Current status: 15000 samples are predicted.
Current status: 20000 samples are predicted.
Current status: 25000 samples are predicted.
Current status: 30000 samples are predicted.
Current status: 35000 samples are predicted.
Current status: 40000 samples are predicted.
Current status: 45000 samples are predicted.
End of prediction.


In [27]:
# Get mean reconstruction error for subsequent anomaly detection phase
anomaly_classifier = AnomalyClassifier()
threshold_1, threshold_2 = anomaly_classifier.get_threshold(param["filed_location"]["temp_path"], 
                                                            param['model']['input_size'],
                                                            each_sensor=True)
print("Mean Reconstruction Error: {}". format(threshold_1))
logger.log_message("- -"*20)
logger.log_message("Mean Reconstruction Error: {}". format(threshold_1))
print("Max Reconstruction Error: {}". format(threshold_2))
logger.log_message("Max Reconstruction Error: {}". format(threshold_2))

# Delete temp file with training prediction
os.remove(param["filed_location"]["temp_path"])
print("File Removed!")

Mean Reconstruction Error: [0.267524793317451, 0.25217210557904907, 0.3347350508075944, 0.18631805760505732, 0.27893246511567554, 0.2513034659414993, 0.15804045863122743, 0.3206700669012139, 0.06309502507275307, 0.22265255676046522]
Max Reconstruction Error: [3.896674990653992, 3.16760790348053, 4.0532697439193734, 3.5825040340423584, 4.072380781173706, 3.623179197311402, 3.470192790031433, 3.4810324907302856, 3.324822783470154, 3.0716266632080083]
File Removed!
