In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils import clip_grad_norm_
from torch.autograd import Variable
from torchsummary import summary
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import torch.optim as optim
from sklearn.preprocessing import StandardScaler

import numpy as np
import matplotlib.pyplot as plt
from keras.preprocessing.sequence import TimeseriesGenerator

# Predict worn blade data
## Standarize Worn Data
First we have to apply normalisation to data. That is because the model works on the representation given by its input vectors. The scale of those numbers is part of the representation.
We should apply the exact same scaling as for training data. That means storing the scale and offset used with your training data, and using that again. <br>
__The mean and variance for each feature of the training data with which the model was trained (stake: 0.75):__

```python
mean_training_data = [-5.37536613e-02, -2.53111489e-04, -8.82854465e+05, 7.79034183e+02,1.45531178e+04, 1.37766733e+03, 6.50149764e-01]
variance_training_data = [1.25303578e-01, 1.16898690e-03, 2.86060835e+06, 1.64515717e+06, 6.85728371e+06, 3.63196175e+05, 8.21463343e-03]
```

In [21]:
stake_training_data = 0.75

In [22]:
class DataPreperatorPrediction():
    def __init__(self, path_training, path_worn_blade):
        self.path_training = path_training
        self.path_worn_blade = path_worn_blade
        
    def load_data(self, path):
        return pd.read_csv(path)
    
    def preprocess_data(self):
        # Load training and worn-blade-data and select  
        dataset = self.load_data(self.path_training)
        amount_training_data = round(len(dataset)*stake_training_data)
        training_data = dataset.iloc[0:amount_training_data,:]
        worn_blade_data = self.load_data(self.path_worn_blade)
        
        #Remove time feature
        training_data = training_data.drop(labels="Timestamp", axis=1).values
        worn_blade_data = worn_blade_data.drop(labels="Timestamp", axis=1).values
        
        # Initialise standard scaler
        scaler = StandardScaler()
        scaler.fit(training_data)
        
        # Transform data for prediction with mean and variance of training data
        preprocessed_data = scaler.transform(worn_blade_data)
        return preprocessed_data

In [23]:
path_training = '../../data/vega_shrinkwrapper_original/NewBlade001.csv'
path_worn_blade = '../../data/vega_shrinkwrapper_original/WornBlade001.csv'

preprocessed_data = DataPreperatorPrediction(path_training = path_training, path_worn_blade = path_worn_blade).preprocess_data()

In [24]:
class DataSet(Dataset):
    def __init__(self, data, timesteps):
        # All data are loaded from csv file and converted to an numpy array
        self.data = data
        # Data generator is initialized, batch_size=1 is indipendent of neural network's batch_size 
        self.generator = TimeseriesGenerator(self.data, self.data, length=timesteps, batch_size=1)

    def __getitem__(self, index):
        x, y = self.generator[index]
        x_torch = torch.from_numpy(x)
        # Dimension 0 with size 1 (created by TimeseriesGenerator because of batch_size=1) gets removed 
        # because DataLoader will add a dimension 0 with size=batch_size as well
        x_torch = torch.squeeze(x_torch) # torch.Size([1, timesteps, 7]) --> torch.Size([timesteps, 7])
        y_torch = torch.from_numpy(y)
        return (x_torch.float(), y_torch.float()) 

    def __len__(self):
        return len(self.generator)

In [26]:
dataset_worn_blade = DataSet(preprocessed_data, timesteps=20)

data_loader_worn_blade = DataLoader(dataset_worn_blade, batch_size=8, num_workers=1, shuffle=False, drop_last=True)