In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils import clip_grad_norm_
from torch.autograd import Variable
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import torch.optim as optim
from sklearn.preprocessing import StandardScaler

import numpy as np
import matplotlib.pyplot as plt
from keras.preprocessing.sequence import TimeseriesGenerator

In [20]:
hyperparam = {
    "data" : {
        "stake_training_data" : 0.75, 
        "path" : '../../data/vega_shrinkwrapper_original/NewBlade/'
    },
    "model" : {
        "input_size" : 7,
        "n_hidden" : 150,
        "sequence_size" : 50,
        "batch_size" : 1,
        "lstm_layer" : 3,
    }
}

# Predict worn blade data
## Standarize Worn Data
First we have to apply normalisation to data. That is because the model works on the representation given by its input vectors. The scale of those numbers is part of the representation.
We should apply the exact same scaling as for training data. That means storing the scale and offset used with your training data, and using that again. <br>
__The mean and variance for each feature of the training data with which the model was trained (stake: 0.75):__

```python
mean_training_data = [-5.37536613e-02, -2.53111489e-04, -8.82854465e+05, 7.79034183e+02,1.45531178e+04, 1.37766733e+03, 6.50149764e-01]
variance_training_data = [1.25303578e-01, 1.16898690e-03, 2.86060835e+06, 1.64515717e+06, 6.85728371e+06, 3.63196175e+05, 8.21463343e-03]
```

In [21]:
stake_training_data = 0.75

In [27]:
class DataPreperatorTraining():
    def __init__(self, path):
        self.path = path
        
    def load_data(self):
        return pd.read_csv(self.path)
    
    def preprocess_data(self, train_data):
        # Remove time feature
        train_data = train_data.drop(labels="timestamp", axis=1)
        # Initialise standard scaler
        scaler = StandardScaler()
        scaler.fit(train_data)
        # Transform data
        train_scaled = scaler.transform(train_data)
        return train_scaled
        
    def provide_data(self):
        dataset = self.load_data()
        train_preprocessed = self.preprocess_data(dataset)
        return train_preprocessed

In [28]:
class DataPreperatorPrediction():
    def __init__(self, path_training, path_worn_blade):
        self.path_training = path_training
        self.path_worn_blade = path_worn_blade
        
    def load_data(self, path):
        return pd.read_csv(path)
    
    def preprocess_data(self):
        # Load training and worn-blade-data and select  
        dataset = self.load_data(self.path_training)
        amount_training_data = round(len(dataset)*stake_training_data)
        training_data = dataset.iloc[0:amount_training_data,:]
        worn_blade_data = self.load_data(self.path_worn_blade)
        
        #Remove time feature
        training_data = training_data.drop(labels="timestamp", axis=1).values
        worn_blade_data = worn_blade_data.drop(labels="timestamp", axis=1).values
        
        # Initialise standard scaler
        scaler = StandardScaler()
        scaler.fit(training_data)
        
        # Transform data for prediction with mean and variance of training data
        preprocessed_data = scaler.transform(worn_blade_data)
        return preprocessed_data

### Load and scale training data

In [29]:
train_loader = DataPreperatorTraining(path=hyperparam['data']['path']+'NewBlade001.csv')
preprocessed_new_blade = train_loader.provide_data()

### Load worn blade dataset and scale them with mean and variance of training data

In [30]:
path_training = '../../data/vega_shrinkwrapper_original/NewBlade/NewBlade001.csv'
path_worn_blade = '../../data/vega_shrinkwrapper_original/WornBlade/WornBlade001.csv'

preprocessed_worn_blade = DataPreperatorPrediction(path_training = path_training, path_worn_blade = path_worn_blade).preprocess_data()

In [31]:
class DataSet(Dataset):
    def __init__(self, data, timesteps):
        # All data are loaded from csv file and converted to an numpy array
        self.data = data
        # Data generator is initialized, batch_size=1 is indipendent of neural network's batch_size 
        self.generator = TimeseriesGenerator(self.data, self.data, length=timesteps, batch_size=1)

    def __getitem__(self, index):
        x, y = self.generator[index]
        x_torch = torch.from_numpy(x)
        # Dimension 0 with size 1 (created by TimeseriesGenerator because of batch_size=1) gets removed 
        # because DataLoader will add a dimension 0 with size=batch_size as well
        x_torch = torch.squeeze(x_torch) # torch.Size([1, timesteps, 7]) --> torch.Size([timesteps, 7])
        y_torch = torch.from_numpy(y)
        return (x_torch.float(), y_torch.float()) 

    def __len__(self):
        return len(self.generator)

In [32]:
dataset_worn_blade = DataSet(preprocessed_worn_blade, timesteps=50)
dataset_new_blade = DataSet(preprocessed_new_blade, timesteps=50)

data_loader_worn_blade = DataLoader(dataset_worn_blade, batch_size=1, num_workers=1, shuffle=False, drop_last=True)
data_loader_new_blade = DataLoader(dataset_new_blade, batch_size=1, num_workers=1, shuffle=False, drop_last=True)

In [33]:
class LSTM(nn.Module):
    def __init__(self, batch_size, input_dim, n_hidden, n_layers):
        super(LSTM, self).__init__()
        # Attributes for LSTM Network
        self.input_dim = input_dim
        self.n_hidden = n_hidden
        self.n_layers = n_layers
        self.batch_size = batch_size
        
        # Definition of NN layer
        # batch_first = True because dataloader creates batches and batch_size is 0. dimension
        self.lstm = nn.LSTM(input_size = self.input_dim, hidden_size = self.n_hidden, num_layers = self.n_layers, batch_first = True)
        self.fc_y_hat = nn.Linear(self.n_hidden, self.input_dim)
        
    def forward(self, input_data, hidden):
        # Forward propagate LSTM
        # LSTM in Pytorch return two results the first one usually called output and the second one (hidden_state, cell_state). 
        # As output the LSTM returns all the hidden_states for all the timesteps (seq), in other words all of the hidden states throughout
        # the sequence
        # As hidden_state the LSTM returns just the most recent hidden state
        lstm_out, (hidden_state, cell_state) = self.lstm(input_data, hidden)
        # Length of input data can varry 
        length_seq = input_data.size()[1]
        # Select the output from the last sequence 
        last_out = lstm_out[:,length_seq-1,:]
        out_y_hat = self.fc_y_hat(last_out)
        return out_y_hat
    
    def init_hidden(self):
        # This method is for initializing hidden state as well as cell state
        # We need to detach the hidden state to prevent exploding/vanishing gradients
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        h0 = torch.zeros(self.n_layers, self.batch_size, self.n_hidden, requires_grad=False)
        c0 = torch.zeros(self.n_layers, self.batch_size, self.n_hidden, requires_grad=False)
        return [t for t in (h0, c0)]

## Predict sensor data

In [48]:
# Initiate and load model
model = LSTM(batch_size=hyperparam['model']['batch_size'], input_dim=hyperparam['model']['input_size'], 
             n_hidden=hyperparam['model']['n_hidden'], n_layers=hyperparam['model']['lstm_layer'])

PATH = "../../models/MSE_model/best_model_aws.pt"
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

# Create empty dataframe
columns = ["timestamp", "cut_torque_target", "cut_lag error_target", "cut_position_target", "cut speed_target", 
           "film_position_target", "film_speed_target", "film_lag_error_target", "cut_torque_predicted", 
           "cut_lag error_predicted", "cut_position_predicted", "cut speed_predicted", "film_position_predicted", 
           "film_speed_predicted", "film_lag_error_predicted"]
df = pd.DataFrame(columns=columns)

print("Start predicting")    
##### Predict #####
for batch_number, data in enumerate(data_loader_new_blade):
    
    input_data, target_data = data
    hidden = model.init_hidden()

    # Forward propagation
    output = model(input_data, hidden)
    
    # Add values to dataframe 
    output = torch.squeeze(output)
    target_data = torch.squeeze(target_data)
    target_data_np = target_data.data.numpy().tolist()
    predicted_data_np = output.data.numpy().tolist()
    data = [batch_number] + target_data_np + predicted_data_np
    df = df.append(pd.Series(data, index=df.columns ), ignore_index=True)

# Save dataframe as csv file
df.to_csv("../visualisation/files/prediction_training_data.csv", sep=";", index=False)

print("Finished")  

Start predicting
Finished


In [46]:
df.shape

(1998, 15)