In [13]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time

# own Modules 
from models import LstmMse
from data_set import DataSet
from cross_validation import CrossValidationProvider
from scaler import DataScaler
from trainer import Trainer
from loss_module import LossMse

## Take care of these things before training:
- Select correct path and define droped_features
- Change parameter of model
- Change step_size in cycling_lr
- Change filed_location

## Hyperparameters

In [21]:
param = {
    "data" : {
        "path" : '../../../data/phm_data_challenge/01_M01_DC_prediction_1.csv' ,
    },
    "preprocessing" : {
        "droped_features": ["ID", "stage", "Lot", "runnum", "recipe", "recipe_step",
                            "up time", "ongoing time", 
                            "ETCHSOURCEUSAGE", "ETCHAUXSOURCETIMER", 
                            "ETCHAUX2SOURCETIMER", "FIXTURESHUTTERPOSITION"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 13,
        "n_hidden_lstm" : [100],
        "sequence_size" : [100],
        "batch_size" : 8,
        "lstm_layer" : [2],
        "n_hidden_fc": [50],
        "dropout_rate": 0.2
        
    },
    "cycling_lr" : {
        "scheduler_active" : True, 
        # step_size is the number of training iterations (total samples/batch_size) per half cycle. 
        # Authors suggest setting step_size 2-8 x training iterations in epoch.
        "step_size" : (12500/8)*2, 
        # Mode can be one of {triangular, triangular2, exp_range}
        "mode" : "triangular", 
        "gamma" : 0.9995,
        "base_lr" : 0.016, 
        "max_lr" :0.75
    },
    "training": {
        "stake_training_data": 0.75,
        "total_number" : 50000,
        "n_folds_cv": 2,
        "n_epochs" : 100,
        "patience" : 5,
    },
    "filed_location": {
        "trained_model" : "../../../models/MSE_model/phm_data_large",
        "history" : "../../visualisation/files/history_training/phm_data_large.csv"
    }
}

![](../../../knowledge/pictures/nested_cv.png)

## Split Data into folds

In [22]:
cv_provider = CrossValidationProvider(path=param["data"]["path"], 
                                      no_folds=param["training"]["n_folds_cv"], 
                                      amount_data=param["training"]["total_number"],
                                      stake_training_data = param["training"]["stake_training_data"],
                                      ignored_features = param['preprocessing']['droped_features']
                                     )
test_data, folds = cv_provider.provide_data()

## Cross Validation Training

In [23]:
statistics_folds = []
for iteration in range (1, param["training"]["n_folds_cv"]):
    # Select folds for current iteration
    training_folds = folds[:iteration]
    validation_fold = folds[iteration:iteration+1]
    print("Training Data : Fold 1-" + str(iteration))
    print("Validation Data : Fold "+ str(iteration+1))
    
    # Concate data of training folds and unpack validation data
    raw_training_data = pd.concat(training_folds, axis = 0, ignore_index=True)
    raw_validation_data = validation_fold[0]
    print("Amount Training Data: {}".format(raw_training_data.shape[0]))
    print("Amount Validation Data: {}".format(raw_validation_data.shape[0]))
    print("- -"*30)
    
    # Scale training data and validation data (validation data with mean and variance of training data)
    scaler = DataScaler(features_not_to_scale= param['preprocessing']['features_not_to_scale'])
    train_data_scaled, validation_data_scaled = scaler.scale_data(raw_training_data, raw_validation_data)
    
    # Start Training
    for n_lstm_layer in param["model"]["lstm_layer"]:
        for sequence_size  in param["model"]["sequence_size"]:
            # Initialize DataSet
            dataset_train = DataSet(train_data_scaled, timesteps=sequence_size)
            dataset_validation = DataSet(validation_data_scaled, timesteps=sequence_size)
            
            # Initialize DataLoader
            data_loader_training = DataLoader(dataset_train, 
                                              batch_size=param["model"]["batch_size"], 
                                              num_workers=0, 
                                              shuffle=True, 
                                              drop_last=True
                                             )
            data_loader_validation = DataLoader(dataset_validation, 
                                                batch_size=param["model"]["batch_size"], 
                                                num_workers=0, 
                                                shuffle=True, 
                                                drop_last=True
                                               )
            
            for n_hidden_lstm in param["model"]["n_hidden_lstm"]:
                for n_hidden_fc in param["model"]["n_hidden_fc"]:
                    print("Start with new hyperparameters in grid search: ")
                    print("Sequence_size: {}".format(sequence_size))
                    print("Number LSTM Layers: {}".format(n_lstm_layer))
                    print("LSTM Number Hidden Dimensions: {}".format(n_hidden_lstm))
                    print("FC NN Number Hidden Dimensions: {}".format(n_hidden_fc))

                    # Create lists to save training loss and validation loss of each epoch
                    hist_loss = []
                    torch.manual_seed(0)
                    model = LstmMse(batch_size=param['model']['batch_size'], 
                                    input_dim=param['model']['input_size'], 
                                    n_hidden_lstm=n_hidden_lstm, 
                                    n_layers=n_lstm_layer,
                                    dropout_rate= param['model']['dropout_rate'],
                                    n_hidden_fc=n_hidden_fc
                                    )

                    # Define Loss Function
                    criterion = LossMse(param["model"]["input_size"], param["model"]["batch_size"])

                    # Initialize Optimizer and Cyclic Learning Rate Scheduler
                    optimizer = torch.optim.SGD(model.parameters(), lr=1.)  
                    scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer=optimizer, 
                                                                  base_lr=param['cycling_lr']['base_lr'], 
                                                                  max_lr=param['cycling_lr']['max_lr'], 
                                                                  step_size_up=param['cycling_lr']['step_size'], 
                                                                  mode=param['cycling_lr']['mode'],
                                                                  gamma=param['cycling_lr']['gamma']
                                                                  )
                    # Initialize Trainer
                    trainer = Trainer(model=model,
                                      optimizer=optimizer,
                                      scheduler=scheduler,
                                      scheduler_active = param["cycling_lr"]["scheduler_active"],
                                      criterion=criterion, 
                                      location_model=param["filed_location"]["trained_model"], 
                                      location_stats=param["filed_location"]["history"], 
                                      patience=param['training']['patience']
                                     )
                    
                    # Measure training time for current configuration
                    start = time.time()

                    for epoch in range(param['training']['n_epochs']):
                        # Train
                        mean_epoch_training_loss = trainer.train(data_loader_training)

                        # Evaluate
                        mean_epoch_validation_loss = trainer.evaluate(data_loader_validation, hist_loss, epoch)

                        # Cache History
                        trainer.cache_history_training(hist_loss, epoch, mean_epoch_training_loss, mean_epoch_validation_loss)

                        # Save model if its the best one since the last change in configuration of hyperparameters
                        status_ok = trainer.save_model(epoch, mean_epoch_validation_loss, param['model']['input_size'], 
                                                       n_lstm_layer, n_hidden_lstm, n_hidden_fc, sequence_size)
                        if not status_ok:
                            statistics = {"training_folds": "fold "+str(1)+"-"+str(iteration),
                                          "validation_folds":"fold "+str(iteration+1),
                                          "lowest_loss": trainer.lowest_loss, 
                                          "n_hidden_lstm" : n_hidden_lstm,
                                          "sequence_size" : sequence_size,
                                          "lstm_layer" : n_lstm_layer,
                                          "n_hidden_fc": n_hidden_fc
                                         }
                            statistics_folds.append(statistics)
                            break

                    # Time in minutes
                    execution_time = (time.time() - start)/60

                    # Save training statistics 
                    #trainer.save_statistic(hist_loss, sequence_size, n_lstm_layer, n_hidden_lstm, n_hidden_fc, execution_time)
                    
                    print("# #"*30)
    

Training Data : Fold 1-1
Validation Data : Fold 2
Amount Training Data: 9375
Amount Validation Data: 9375
- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -
Start with new hyperparameters in grid search: 
Sequence_size: 100
Number LSTM Layers: 2
LSTM Number Hidden Dimensions: 100
FC NN Number Hidden Dimensions: 50
-------- epoch_no. 0 finished with eval loss 0.037689984713803984--------
Epoch 0: best model saved with loss: 0.037689984713803984


KeyboardInterrupt: 

## Test best Model with unseen data

In [None]:

    # Scale training data and validation data (validation data with mean and variance of training data)
    scaler = DataScaler(features_not_to_scale= param['preprocessing']['features_not_to_scale'])
    
    # Start Training
    for n_lstm_layer in param["model"]["lstm_layer"]:


                    # Create lists to save training loss and validation loss of each epoch
                    hist_loss = []
                    torch.manual_seed(0)
                    model = LstmMse(batch_size=param['model']['batch_size'], 
                                    input_dim=param['model']['input_size'], 
                                    n_hidden_lstm=n_hidden_lstm, 
                                    n_layers=n_lstm_layer,
                                    dropout_rate= param['model']['dropout_rate'],
                                    n_hidden_fc=n_hidden_fc
                                    )

                    # Define Loss Function
                    criterion = LossMse(param["model"]["input_size"], param["model"]["batch_size"])

                    # Initialize Optimizer and Cyclic Learning Rate Scheduler
                    optimizer = torch.optim.SGD(model.parameters(), lr=1.)  
                    scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer=optimizer, 
                                                                  base_lr=param['cycling_lr']['base_lr'], 
                                                                  max_lr=param['cycling_lr']['max_lr'], 
                                                                  step_size_up=param['cycling_lr']['step_size'], 
                                                                  mode=param['cycling_lr']['mode'],
                                                                  gamma=param['cycling_lr']['gamma']
                                                                  )
                    # Initialize Trainer
                    trainer = Trainer(model=model,
                                      optimizer=optimizer,
                                      scheduler=scheduler,
                                      scheduler_active = param["cycling_lr"]["scheduler_active"],
                                      criterion=criterion, 
                                      location_model=param["filed_location"]["trained_model"], 
                                      location_stats=param["filed_location"]["history"], 
                                      patience=param['training']['patience']
                                     )
                    
                    # Measure training time for current configuration
                    start = time.time()

                    for epoch in range(param['training']['n_epochs']):
                        # Train
                        mean_epoch_training_loss = trainer.train(data_loader_training)

                        # Evaluate
                        mean_epoch_validation_loss = trainer.evaluate(data_loader_validation, hist_loss, epoch)

                        # Cache History
                        trainer.cache_history_training(hist_loss, epoch, mean_epoch_training_loss, mean_epoch_validation_loss)

                        # Save model if its the best one since the last change in configuration of hyperparameters
                        status_ok = trainer.save_model(epoch, mean_epoch_validation_loss, param['model']['input_size'], 
                                                       n_lstm_layer, n_hidden_lstm, n_hidden_fc, sequence_size)
                        if not status_ok:
                            statistics = {"training_folds": "fold "+str(1)+"-"+str(iteration),
                                          "validation_folds":"fold "+str(iteration+1),
                                          "lowest_loss": trainer.lowest_loss, 
                                          "n_hidden_lstm" : n_hidden_lstm,
                                          "sequence_size" : sequence_size,
                                          "lstm_layer" : n_lstm_layer,
                                          "n_hidden_fc": n_hidden_fc
                                         }
                            statistics_folds.append(statistics)
                            break

                    # Time in minutes
                    execution_time = (time.time() - start)/60

                    # Save training statistics 
                    #trainer.save_statistic(hist_loss, sequence_size, n_lstm_layer, n_hidden_lstm, n_hidden_fc, execution_time)
                    
                    print("# #"*30)
    