In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import pandas as pd
import numpy as np

# own Modules 
from models.models import LstmMse
from utils.data_loader import DataPreperatorPrediction, DataSet
from loss_module import LossMse, LossMle
from utils.predictor import PredictorMse

## Take care of these things before training:
- Select correct path and define droped_features
- Change parameter of model
- Change filed_location

## Parameters

In [2]:
param = {
    "data" : {
        "stake_training_data" : 0.75, 
        "path" : '../../data/phm_data_challenge/01_M01_DC_preprocessed.csv',
        "droped_feature" : ["ongoing time", "up time", "RLU", "runnum", 'FIXTURESHUTTERPOSITION_0.0', 
                            'FIXTURESHUTTERPOSITION_1.0', 'FIXTURESHUTTERPOSITION_2.0', 'FIXTURESHUTTERPOSITION_3.0',
                            'FIXTURESHUTTERPOSITION_255.0', "ETCHSOURCEUSAGE", "ETCHAUXSOURCETIMER", 
                            "ETCHAUX2SOURCETIMER"
                           ],
    },
    "model" : {
        "path" : "../../models/MSE_model/phm_data_InputSize13_LayerLstm2_HiddenLstm100_HiddenFc50_Seq100.pt",
        "input_size" : 13,
        "n_hidden_lstm" : 100,
        "sequence_size" : 100,
        "batch_size" : 1,  # Has to be 1 in prediction mode!!!
        "lstm_layer" : 2,
        "n_hidden_fc": 50,
        "dropout_rate": 0.2
    },
    "results": {
        "path_prediction" : "../visualisation/files/prediction/phm_data_13_features.csv",
    }
}

## Standarize Data
First we have to apply normalisation to data. That is because the model works on the representation given by its input vectors. The scale of those numbers is part of the representation.
We should apply the exact same scaling as for training data. That means storing the scale and offset used with your training data, and using that again. <br>
__The mean and variance for each feature of the training data with which the model was trained (stake: 0.75):__

### Mean and Variance from NewBlade Training Data

In [5]:
mean_training_data = [-5.37536613e-02, -2.53111489e-04, -8.82854465e+05, 7.79034183e+02, 1.45531178e+04, 1.37766733e+03, 6.50149764e-01] 
var_training_data = [1.25303578e-01, 1.16898690e-03, 2.86060835e+06, 1.64515717e+06, 6.85728371e+06, 3.63196175e+05, 8.21463343e-03]

### Mean and Variance from Artifical Training Data

In [4]:
mean_training_data= [-5.31764899e-02, -3.98576146e-04, -8.82773455e+05,  8.25672897e+02, 1.47034247e+04,  1.42685595e+03,  6.62155736e-01,  1.23172374e-02]
var_training_data = [1.28792583e-01, 1.21258617e-03, 2.90245238e+06, 1.72279458e+06, 6.83095901e+06, 3.12357562e+05, 3.89033076e-03, 5.01164766e+01]

### Mean and Variance from Random Walk Data

In [5]:
mean_training_data =[-5.31764899e-02, -3.98576146e-04,  1.55255733e+03,  8.25672897e+02, 1.47034247e+04,  1.42685595e+03,  6.62155736e-01]
var_training_data =[1.28792583e-01, 1.21258617e-03, 2.04108811e+06, 1.72279458e+06, 6.83095901e+06, 3.12357562e+05, 3.89033076e-03]

### Mean and Variance from Random Walk Data with first order difference

In [35]:
mean_training_data =[-5.30145478e-02, -3.98852220e-04,  1.57238159e+00,  8.26196229e+02, 1.47058099e+04,  1.42778443e+03,  6.62286323e-01]
var_training_data =[1.28839165e-01, 1.21339499e-03, 1.10982448e+03, 1.72353307e+06, 6.82698220e+06, 3.11272820e+05, 3.86734667e-03]

### Mean and Variance from phm Dataset
droped features=["ongoing time", "up time", "RLU", "runnum"] + <br>
['FIXTURESHUTTERPOSITION_0.0','FIXTURESHUTTERPOSITION_1.0', 'FIXTURESHUTTERPOSITION_2.0', 'FIXTURESHUTTERPOSITION_3.0', <br>
'FIXTURESHUTTERPOSITION_255.0', "ETCHSOURCEUSAGE", "ETCHAUXSOURCETIMER", "ETCHAUX2SOURCETIMER"]

In [3]:
mean_training_data =[ 0.0632522,0.10388593, 0.09563544, 0.0777276, 0.22081628, 0.08311531, 0.01382531,
                     0.09862897, 0.07814727, -0.0185826, 0.1000127, -0.0161782, -0.22541928]
var_training_data =[0.90316232, 0.97237671, 0.98547017, 0.92090347, 1.18086523, 0.92393987,
                    0.41744699, 0.97142703, 0.92604794, 0.68786855, 1.25019607, 0.50023143, 0.69425608]

## Create DataLoader

In [4]:
data_preperator = DataPreperatorPrediction(path=param['data']['path'], 
                                           ignored_features = param["data"]["droped_feature"],
                                           mean_training_data=mean_training_data, 
                                           var_training_data=var_training_data, 
                                           first_order_difference=False 
                                          )                                  
preprocessed_data = data_preperator.prepare_data()
print(preprocessed_data.shape)

dataset = DataSet(preprocessed_data, 
                  timesteps=param["model"]["sequence_size"])
data_loader = DataLoader(dataset, 
                         batch_size=param['model']['batch_size'], 
                         num_workers=4, 
                         shuffle=False, 
                         drop_last=True)

(173175, 14)


## Define Model and load Parameters of trained model

In [5]:
model = LstmMse(batch_size=param['model']['batch_size'], 
                input_dim=param['model']['input_size'], 
                n_hidden_lstm=param['model']['n_hidden_lstm'], 
                n_hidden_fc=param['model']['n_hidden_fc'], 
                n_layers=param['model']['lstm_layer'], 
                dropout_rate= param['model']['dropout_rate'])

checkpoint = torch.load(param["model"]["path"])
model.load_state_dict(checkpoint['model_state_dict'])

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

## Define Loss Function

In [6]:
criterion = LossMse(param["model"]["input_size"], param["model"]["batch_size"])

## Initialize Trainer

In [7]:
predictor = PredictorMse(model=model,
                         criterion=criterion,
                         path_data=param["data"]["path"],
                         columns_to_ignore=param["data"]["droped_feature"]
                         )

## Predict

In [None]:
results = predictor.predict(data_loader)
results.head(5)

Start predicting.




Current status: 5000 samples are predicted.
Current status: 10000 samples are predicted.
Current status: 15000 samples are predicted.
Current status: 20000 samples are predicted.
Current status: 25000 samples are predicted.
Current status: 30000 samples are predicted.
Current status: 35000 samples are predicted.
Current status: 40000 samples are predicted.
Current status: 45000 samples are predicted.
Current status: 50000 samples are predicted.
Current status: 55000 samples are predicted.
Current status: 60000 samples are predicted.
Current status: 65000 samples are predicted.
Current status: 70000 samples are predicted.
Current status: 75000 samples are predicted.
Current status: 80000 samples are predicted.
Current status: 85000 samples are predicted.
Current status: 90000 samples are predicted.
Current status: 95000 samples are predicted.


## Combine prediction data with data which was not consider for inference

In [9]:
original_sensor_data = pd.read_csv(param["data"]["path"])
data_of_droped_feature = original_sensor_data.loc[:, param["data"]["droped_feature"]+["ID"]]
complete_data = results.merge(right=data_of_droped_feature, how="inner", on="ID")

## Save data to csv file

In [10]:
complete_data.to_csv(param["results"]["path_prediction"], sep=";", index=False)