In [22]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import pandas as pd
import numpy as np

# own Modules 
from models_mse import LstmMse
from models_mle import LstmMle_1, LstmMle_2, LstmMle_3, LstmMle_4
from data_preperator import DataPreperatorPrediction
from data_set import DataSet
from predictor import PredictorMse, PredictorMle
from predictor_cell_state import PredictorMleCellState
from anomaly_classifier import AnomalyClassifier

# Mode: 

In [23]:
mode = "MLE"

## Parameters phm data

In [9]:
param = {
    "data" : {
        "path" : '../../data/phm_data_challenge/recipe/dataset_for_each_recipe/test/test_recipe_67_labeled_short.csv',
        "droped_feature" : ["stage", "Lot", "runnum", "recipe", "recipe_step",
                            "up time", "ongoing time", 
                            "ETCHSOURCEUSAGE", "ETCHAUXSOURCETIMER", 
                            "ETCHAUX2SOURCETIMER", "FIXTURESHUTTERPOSITION", "ROTATIONSPEED", "status"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "path" : "../../models/{}_model/phm67_SQ36_id40787.pt".format(mode),
        "input_size" : 12,
        "n_hidden_lstm" : 13,
        "n_hidden_fc_1" : 55,
        "n_hidden_fc_2" : 55,
        "sequence_size" : 8,
        "batch_size" : 50,
        "lstm_layer" : 1,
        "dropout_rate_lstm": 0.0,
        "dropout_rate_fc": 0.2,
        "K":1
    },
    "results": {
        "path" : "../visualisation/files/prediction/{}/2_research_question/phm_67_SQ36.csv".format(mode),
    }
}

## Parameters artifical data

In [None]:
param = {
    "data" : {
        "path" : '../../data/artifical_signals/artifical_2_signals_errors.csv',
        "droped_feature" : ["anomaly"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "path" : "../../models/{}_model/art2_id82478.pt".format(mode),
        "input_size" : 2,
        "n_hidden_lstm" : 8,
        "n_hidden_fc_1" : 55,
        "n_hidden_fc_2" : 25,
        "sequence_size" : 21,
        "batch_size" : 50,
        "lstm_layer" : 1,
        "dropout_rate_lstm": 0.0,
        "dropout_rate_fc": 0.2,
        "K":1
    },
    "results": {
        "path" : "../visualisation/files/prediction/{}/artfic.csv".format(mode),
    }
}

## Parameters cpps data

In [None]:
param = {
    "data" : {
        "path" : '../../data/cpps_degradation_new/data_obs10/test/obs_space_error_sinusiod_preprocessed.csv',
        "droped_feature" : ["status"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "path" : "../../models/{}_model/cpps_id61644.pt".format(mode),
        "input_size" : 10,
        "n_hidden_lstm" : 21,
        "n_hidden_fc_1" : 55,
        "n_hidden_fc_2" : 25,
        "sequence_size" : 8,
        "batch_size" : 50,
        "lstm_layer" : 1,
        "dropout_rate_lstm": 0.0,
        "dropout_rate_fc": 0.2,
        "K":1
    },
    "results": {
        "path" : "../visualisation/files/prediction/{}/cpps.csv".format(mode), 
    }
}

## Parameters ramming data

In [19]:
param = {
    "data" : {
        "path" : '../../data/ramming_data/data_train_preprocessed.csv',
        "droped_feature" : ["status"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "path" : "../../models/{}_model/ramming_id59939.pt".format(mode),
        "input_size" : 50,
        "n_hidden_lstm" : 100,
        "n_hidden_fc_1" : 100,
        "n_hidden_fc_2" : 25,
        "sequence_size" : 8,
        "batch_size" : 50,
        "lstm_layer" : 1,
        "dropout_rate_lstm": 0.0,
        "dropout_rate_fc": 0.2,
        "K":1
    },
    "results": {
        "path" : "../visualisation/files/prediction/{}/ramming_train.csv".format(mode), 
    }
}

## Parameters variance data

In [24]:
param = {
    "data" : {
        "path" : '../../data/variation_in_variance/dataset_increasing_variance.csv',
        "droped_feature" : ["time", "mu", "sigma" 
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "path" : "../../models/{}_model/variance_dataset_increasing_id30944.pt".format(mode),
        "input_size" : 1,
        "n_hidden_lstm" : 2,
        "n_hidden_fc_1" : 5,
        "n_hidden_fc_2" : 5,
        "sequence_size" : 36,
        "batch_size" : 8,
        "lstm_layer" : 1,
        "dropout_rate_lstm": 0.0,
        "dropout_rate_fc": 0.2,
        "K":1
    },
    "results": {
        "path" : "../visualisation/files/prediction/{}/variance_data_new.csv".format(mode), 
    }
}

## Standarize Data
First we have to apply normalisation to data. That is because the model works on the representation given by its input vectors. The scale of those numbers is part of the representation.
We should apply the exact same scaling as for training data. That means storing the scale and offset used with your training data, and using that again. <br>
__The mean and variance for each feature of the training data with which the model was trained (stake: 0.75):__

### Mean and variance from phm dataset - recipe 67 (29.219 samples)

In [4]:
mean_training_data = [0.00236702, 0.3953089,0.48772743 ,0.3857511, 0.49987399, 0.06291772, -0.03491417,
                      0.43371134,  0.23365129, -0.06136357, -0.12245359,  0.2516167]
var_training_data =[3.62155978e-04, 7.33502893e-01, 8.30760891e-01, 7.06805763e-01, 9.44752420e-01,
                    4.07861536e-01, 1.61079596e-01, 8.46321709e-01, 3.65172841e-01, 6.37131077e-01,
                    4.60727666e-11, 9.93519995e-01]

### Mean and variance from artifical dataset

In [None]:
mean_training_data= [-0.00526595, -0.00968424]
var_training_data = [49.30277603, 0.4232726 ]

### Mean and Variance from cpps dataset

In [None]:
mean_training_data = [-0.06942446, -0.03770532, -0.01556928, -0.04071856, -0.04357261, -0.06645444, -0.06779275, 
                      -0.01516744, -0.04079089, -0.03428566]
var_training_data = [0.15114678, 0.1605483,  0.07761351, 0.06546521, 0.09442645, 0.10429262,
                     0.09060877, 0.07936068, 0.06720505, 0.09224595]

### Mean and Variance from ramming dataset

In [4]:
mean_training_data = [ 0.66994451,  1.3511098,  -1.04790224,  1.37593449,  1.52857629,  0.83683179,
                      -0.59174875,  0.2276904,  -0.30149707,  0.00659975, -0.61039718, -1.40061621,
                      -1.53808693,  0.78614893, -2.94451175, -0.79364646,  1.07591868,  2.14055191,
                      -1.10448315,  0.76984998, -0.84240901,  0.21670491, -0.9561242,   0.61804757,
                      -1.98817668, -0.99106694, -0.54200903,  1.12792316, -0.31659197,  2.63421421,
                      1.13675707, -2.27478105,  2.37692224,  2.35124433,  1.50218261, -2.04472942,
                      2.50210775, -1.04667747, -0.95624398,  1.22236255,  0.41518363,  1.33544657,
                      0.75903185, -0.41838231, -0.4369283,   0.60359194,  1.64544459, -2.07289658,
                      2.64145022,  2.16104454]
var_training_data =[10.35878551, 11.88827756, 21.27251956,  6.79734976, 14.07217738, 10.10726723,
                    15.16597587, 18.13221091,  4.27889808,  6.80582467,  2.44855596, 10.04390927,
                    7.28379418, 20.37918374, 28.1458679,   8.56124384, 18.35279778,  9.91740212,
                    17.30631408,  6.05122757,  4.67344851,  9.97122612,  6.89223197,  8.30739432,
                    22.54071533, 13.78669072,  3.27853119,  5.49280397, 15.4525496,  36.09376821,
                    5.22595375, 36.02409948, 26.91623789, 13.31752889, 23.78231544,  9.16123925,
                    19.53221726,  7.35945676,  5.67204309,  5.67213501, 11.63879868,  3.3638181,
                    12.8887632,  15.69187744, 10.40778802,  5.5671719,  10.16745852, 17.93333693,
                    8.56480474, 15.36198149]

### Mean and Variance from variance dataset

In [25]:
mean_training_data =[0.06844859]
var_training_data = [112.93894129]

## Create DataLoader

In [26]:
data_preperator = DataPreperatorPrediction(path=param['data']['path'], 
                                           ignored_features = param["data"]["droped_feature"],
                                           mean_training_data=mean_training_data, 
                                           var_training_data=var_training_data, 
                                           first_order_difference=False 
                                          )                                  
preprocessed_data = data_preperator.prepare_data()
print(preprocessed_data.shape)

dataset = DataSet(preprocessed_data, 
                  timesteps=param["model"]["sequence_size"])
data_loader = DataLoader(dataset, 
                         batch_size=param['model']['batch_size'], 
                         num_workers=0, 
                         shuffle=False, 
                         drop_last=True)

(3000, 2)


In [27]:
for batch_idx, data in enumerate(data_loader):
    x,y = data
    print('Data of batch: {}'.format(batch_idx))
    print("Size of input data: {}".format(x.size()))
    print("Size of target data: {}".format(y.size()))
    if batch_idx >=1: break

Data of batch: 0
Size of input data: torch.Size([8, 36, 2])
Size of target data: torch.Size([8, 2])
Data of batch: 1
Size of input data: torch.Size([8, 36, 2])
Size of target data: torch.Size([8, 2])


## Define Model and load Parameters of trained model
### Model for MSE 

In [17]:
model = LstmMse(batch_size=param['model']['batch_size'], 
                input_dim=param['model']['input_size'], 
                n_hidden_lstm=param['model']['n_hidden_lstm'], 
                n_hidden_fc=param['model']['n_hidden_fc_1'], 
                n_layers=param['model']['lstm_layer'], 
                dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                dropout_rate_fc= param['model']['dropout_rate_fc']
                )

checkpoint = torch.load(param["model"]["path"])
model.load_state_dict(checkpoint['model_state_dict'])

RuntimeError: Error(s) in loading state_dict for LstmMse:
	Missing key(s) in state_dict: "fc2.weight", "fc2.bias". 
	Unexpected key(s) in state_dict: "fc_y_hat.weight", "fc_y_hat.bias", "fc_tau.weight", "fc_tau.bias". 

### Model for MLE
#### Architecture 1 - One FCNN (2 layers, last splited for mu and sigma)

In [28]:
model = LstmMle_1(batch_size=param['model']['batch_size'], 
                 input_dim=param['model']['input_size'], 
                 n_hidden_lstm=param['model']['n_hidden_lstm'], 
                 n_hidden_fc_1=param['model']['n_hidden_fc_1'], 
                 n_layers=param['model']['lstm_layer'], 
                 dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                 dropout_rate_fc= param['model']['dropout_rate_fc'],
                 K = param['model']['K'])

checkpoint = torch.load(param["model"]["path"])
model.load_state_dict(checkpoint['model_state_dict'])

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

#### Architecture 2 - Two seperate FCNN for mu and sigma each (2 layers)

In [11]:
model = LstmMle_2(batch_size=param['model']['batch_size'], 
                 input_dim=param['model']['input_size'], 
                 n_hidden_lstm=param['model']['n_hidden_lstm'], 
                 n_layers=param['model']['lstm_layer'],
                 dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                 dropout_rate_fc= param['model']['dropout_rate_fc'],
                 n_hidden_fc_1=param['model']['n_hidden_fc_1'],
                 K = param['model']['K']
                 )

checkpoint = torch.load(param["model"]["path"])
model.load_state_dict(checkpoint['model_state_dict'])

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

#### Architecture 3 - Two seperate FCNN for mu and sigma each (3 layers)

In [20]:
torch.manual_seed(0)
model = LstmMle_3(batch_size=param['model']['batch_size'], 
                 input_dim=param['model']['input_size'], 
                 n_hidden_lstm=param['model']['n_hidden_lstm'], 
                 n_layers=param['model']['lstm_layer'],
                 dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                 dropout_rate_fc= param['model']['dropout_rate_fc'],
                 n_hidden_fc_1=param['model']['n_hidden_fc_1'],
                 n_hidden_fc_2=param['model']['n_hidden_fc_2'],
                 K = param['model']['K']
                 )
checkpoint = torch.load(param["model"]["path"])
model.load_state_dict(checkpoint['model_state_dict'])

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

#### Architecture 4 - Two complete seperate subnetworks (from LSTM layer to last FC layer)

In [29]:
torch.manual_seed(0)
model = LstmMle_4(batch_size=param['model']['batch_size'], 
                 input_dim=param['model']['input_size'], 
                 n_hidden_lstm=param['model']['n_hidden_lstm'], 
                 n_layers=param['model']['lstm_layer'],
                 dropout_rate_lstm= param['model']['dropout_rate_lstm'],
                 dropout_rate_fc= param['model']['dropout_rate_fc'],
                 n_hidden_fc_1=param['model']['n_hidden_fc_1'],
                 K = param['model']['K']
                 )
checkpoint = torch.load(param["model"]["path"])
model.load_state_dict(checkpoint['model_state_dict'])

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

## Initialize Predictor
### For MSE Model

In [29]:
predictor = PredictorMse(model=model,
                         path_data=param["data"]["path"],
                         columns_to_ignore=param["data"]["droped_feature"]
                         )

### For MLE Model

In [30]:
predictor = PredictorMle(model=model,
                         path_data=param["data"]["path"],
                         columns_to_ignore=param["data"]["droped_feature"],
                         )

## Predict

In [31]:
print("Start predicting.")
# Write header
with open(param["results"]["path"], "a+") as file:
            [file.write(column+";") for column in predictor.create_column_names_result()]
            file.write("\n")

for batch_number, (input_data, target_data) in enumerate(data_loader):
    # Predict sensor values in mini-batches
    batch_results = predictor.predict(input_data, target_data)
    
    # Write results to csv file
    with open(param["results"]["path"], "a") as file:
        for batch in batch_results:
            # Each result component of a singe prediction (ID, target, prediction, loss, latent space ...) is stored in lists
            # thus we have to unpack the list and seperate values with ;
            for value in batch:
                file.write(str(value)+";")
            file.write("\n")

    # Print status 
    if (batch_number*param['model']['batch_size'])%5000 == 0:
        print("Current status: " + str(param['model']['batch_size']*batch_number) + " samples are predicted.")

print("End of prediction.")

Start predicting.
Current status: 0 samples are predicted.
End of prediction.
