In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils import clip_grad_norm_
from torch.autograd import Variable
from torchsummary import summary
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import torch.optim as optim

import numpy as np
import matplotlib.pyplot as plt
from keras.preprocessing.sequence import TimeseriesGenerator

# Test TimeSeriesGenerator

In [11]:
class DataProvider(Dataset):
    def __init__(self, data, timesteps):
        # Data as numpy array is provided
        self.data = data
        # Data generator is initialized, batch_size=1 is indipendent of neural network's batch_size 
        self.generator = TimeseriesGenerator(self.data, self.data, length=timesteps, batch_size=1)

    def __getitem__(self, index):
        x, y = self.generator[index]
        x_torch = torch.from_numpy(x)
        # Dimension 0 with size 1 (created by TimeseriesGenerator because of batch_size=1) gets removed 
        # because DataLoader will add a dimension 0 with size=batch_size as well
        x_torch = torch.squeeze(x_torch) # torch.Size([1, timesteps, 7]) --> torch.Size([timesteps, 7])
        y_torch = torch.from_numpy(y)
        return (x_torch.float(), y_torch.float()) 

    def __len__(self):
        return len(self.generator)

In [31]:
data = np.array([[1,2,3,4,5,6],
                 [7,8,9,10,11,12],
                  [13,14,15,16,17,18],
                  [19,20,21,22,23,24],
                  [25,26,27,28,29,30]])

data.shape

(5, 6)

In [35]:
# Data is shuffled because each mini batch is indipendent from each other, but samples of a minibatch are in chronological order
dataset_train = DataProvider(data, timesteps=3)
data_loader_training = DataLoader(dataset_train, batch_size=1, num_workers=1, shuffle=False, drop_last=True)

ValueError: `start_index+length=3 > end_index=1` is disallowed, as no part of the sequence would be left to be used as current step.

In [36]:
for batch_idx, data in enumerate(data_loader_training):
    x,y = data
    print(x)
    print("------")
    print(y)
    print("------")

tensor([[[ 1.,  2.,  3.,  4.,  5.,  6.],
         [ 7.,  8.,  9., 10., 11., 12.]]])
------
tensor([[[13., 14., 15., 16., 17., 18.]]])
------
tensor([[[ 7.,  8.,  9., 10., 11., 12.],
         [13., 14., 15., 16., 17., 18.]]])
------
tensor([[[19., 20., 21., 22., 23., 24.]]])
------
tensor([[[13., 14., 15., 16., 17., 18.],
         [19., 20., 21., 22., 23., 24.]]])
------
tensor([[[25., 26., 27., 28., 29., 30.]]])
------


In [29]:
def loss_function(output, target_data):
    y_hat, sigma = output
    # target_data.size(batch_size,1,7) because timestep = 1
    # new target_data.size(batch_size,7)
    target_data = torch.squeeze(target_data)
    
    term = ((target_data-y_hat)/sigma).pow(2) + 2*torch.log(sigma)
    print(term)
    loss_batches = torch.sum(input=term, dim=1)
    
    # The value being returned by a loss function MUST BE a scalar value. Not a vector/tensor.
    mean_loss = torch.sum(loss_batches)
    
    # The value being returned must be a Variable. This is so that it can be used to update the parameters. 
    # A Variable is tracking the operations being done on it so that it can backpropagate to get the gradient.
    return mean_loss

In [30]:
y_hat = torch.tensor([[0.4, 0.6]])
sigma = torch.tensor([[0.3, 0.2]])
output = y_hat, sigma
target_data = torch.tensor([[[0.5, 0.4]]])

In [31]:
print("y_hat: {}".format(y_hat.size()))
print("sigma: {}".format(sigma.size()))
print("target_data: {}".format(target_data.size()))

y_hat: torch.Size([1, 2])
sigma: torch.Size([1, 2])
target_data: torch.Size([1, 1, 2])


In [32]:
loss=loss_function(output, target_data)
print(loss)

tensor([[-2.2968, -2.2189]])
tensor(-4.5157)


In [6]:
def my_loss(output, target):
    loss = torch.mean((output - target)**2)
    return loss

In [7]:
model = nn.Linear(2, 2)
x = torch.randn(1, 2)
target = torch.randn(1, 2)
output = model(x)
loss = my_loss(output, target)
print(loss)

tensor(2.6438, grad_fn=<MeanBackward0>)


In [8]:
loss.backward()
print(loss)

tensor(2.6438, grad_fn=<MeanBackward0>)


In [9]:
print(model.weight.grad)

tensor([[-4.2110, -0.6090],
        [ 2.2242,  0.3216]])


In [10]:
a = Variable(torch.ones(2, 2), requires_grad=True)
print(a)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


## Test shape of LSTM output layer

In [11]:
def split_data(stake_training_data, path):
    dataset = pd.read_csv(path)
    amount_training_data = round(len(dataset)*stake_training_data)
    train_data = dataset.iloc[0:amount_training_data,:]
    validation_data = dataset.iloc[amount_training_data:,:]
    return train_data, validation_data

In [12]:
def load_test_data(path):
    dataset = pd.read_csv(path)
    return dataset

In [13]:
test_data = load_test_data(path='../../data/vega_shrinkwrapper_original/WornBlade001.csv')

In [14]:
train_data, validation_data = split_data(stake_training_data=0.75, path='../../data/vega_shrinkwrapper_original/NewBlade001.csv')
train_data.head()

Unnamed: 0,Timestamp,pCut Motor: Torque,pCut CTRL Position controller: Lag error,pCut CTRL Position controller: Actual position,pCut CTRL Position controller: Actual speed,pSvolFilm CTRL Position controller: Actual position,pSvolFilm CTRL Position controller: Actual speed,pSvolFilm CTRL Position controller: Lag error
0,-0.188,-0.112131,-0.00249,-884606,0.0,11128,2.504289,0.261085
1,-0.184,-0.088931,-0.003863,-884606,17.166138,11128,-2.504289,0.260083
2,-0.18,-0.115141,0.00163,-884606,-6.866455,11128,7.513016,0.259081
3,-0.176,-0.111815,0.003003,-884606,-13.73291,11128,-2.504289,0.260083
4,-0.172,-0.13097,0.004376,-884606,-6.866455,11128,0.0,0.261085


In [15]:
class CSVDataset(Dataset):
    def __init__(self, data, timesteps):
        # All data are loaded from csv file and converted to an numpy array
        self.data = data.values
        # Data generator is initialized 
        self.generator = TimeseriesGenerator(self.data, self.data, length=timesteps, batch_size=1)

    def __getitem__(self, index):
        x, y = self.generator[index]
        x_torch = torch.from_numpy(x)
        # Dimension 0 with size 1 (created by TimeseriesGenerator because of batch_size=1) gets removed 
        # because DataLoader will add a dimension 0 with size=batch_size as well
        x_torch = torch.squeeze(x_torch) # torch.Size([1, timesteps, 7]) --> torch.Size([timesteps, 7])
        y_torch = torch.from_numpy(y)
        return (x_torch.float(), y_torch.float()) 

    def __len__(self):
        return len(self.generator)

In [37]:
dataset_train = CSVDataset(train_data, timesteps=4)
dataset_validation = CSVDataset(validation_data, timesteps=4)
dataset_test = CSVDataset(test_data, timesteps=4)

data_loader_training = DataLoader(dataset_train, batch_size=32, num_workers=1, shuffle=False)
data_loader_validation = DataLoader(dataset_validation, batch_size=32, num_workers=1, shuffle=False)
data_loader_test = DataLoader(dataset_test, batch_size=32, num_workers=1, shuffle=False)

In [38]:
class LSTM(nn.Module):
    def __init__(self, batch_size, input_dim, seq_len, n_hidden=256, n_layers=1):
        super(LSTM, self).__init__()
        # Attributes for LSTM Network
        self.input_dim = input_dim
        self.n_hidden = n_hidden
        self.n_layers = n_layers
        self.batch_size = batch_size
        self.seq_len = seq_len
        
        # Attribut for Gaussians
        self.n_gaussians = 2
        
        # Definition of NN layer
        # batch_first = True because dataloader creates batches and batch_size is 0. dimension
        self.lstm = nn.LSTM(input_size = self.input_dim, hidden_size = self.n_hidden, num_layers = self.n_layers, batch_first = True)
        self.fc1 = nn.Linear(self.n_hidden, self.n_gaussians * self.input_dim)
        
        
    def forward(self, input_data):
        # Forward propagate LSTM
        # LSTM in Pytorch return two results the first one usually called output and the second one (hidden_state, cell_state). 
        # As output the LSTM returns all the hidden_states for all the timesteps (seq), in other words all of the hidden states throughout
        # the sequence
        # As hidden_state the LSTM returns just the most recent hidden state
        lstm_out, (hidden_state, cell_state) = self.lstm(input_data)
        # Select the output from the last sequence 
        ln = input_data.size()[1]
        print(ln)
        last_out = lstm_out[:,ln-1,:]
        out = self.fc1(last_out)
        # Reshape out to shape torch.Size(batch_size, n_features, 2)
        raw_output = out.view(self.batch_size, self.input_dim, 2)
        # y_hat and tau alternate, y_hat and tau are next to each other for each feature 
        y_hat = raw_output[:,:,0]
        tau = raw_output[:,:,1]
        #σ = exp(τ) guarantees σ > 0 and provides numerical stability in the learning process
        sigma = torch.exp(tau)
        
        return y_hat, sigma
    
    def get_tensors(self, input_data):
        # Forward propagate LSTM
        # shape of self.hidden: (h, c), where h and c both have shape (num_layers, batch_size, hidden_dim)
        lstm_out, (hidden_state, cell_state) = self.lstm(input_data)
        last_out = lstm_out[:,self.seq_len -1,:]
        out = self.fc1(last_out)
        # Reshape out to shape torch.Size(batch_size, n_features, 2)
        raw_output = out.view(self.batch_size, self.input_dim, 2)
        y_hat = raw_output[:,:,0]
        tau = raw_output[:,:,1]
        #σ = exp(τ) guarantees σ > 0 and provides numerical stability in the learning process
        sigma = torch.exp(tau)
        
        return lstm_out, last_out, out, hidden_state, raw_output
    
    def init_hidden(self):
        # This is what we'll initialise our hidden state as well as our cell state
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        h0 = torch.zeros(self.n_layers, self.batch_size, self.n_hidden)
        c0 = torch.zeros(self.n_layers, self.batch_size, self.n_hidden)
        return [t for t in (h0, c0)]

In [39]:
model = LSTM(batch_size=32, input_dim=8, seq_len=4, n_hidden=20, n_layers=1)
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [40]:
n_epochs = 1
patience = 100

In [41]:
def loss_function(output, target_data):
    y_hat, sigma = output
    # target_data.size(batch_size,1,7) because timestep = 1
    # new target_data.size(batch_size,7)
    target_data = torch.squeeze(target_data)
    
    term = ((target_data-y_hat)/sigma)**2 + 2*torch.log(sigma)
    return torch.sum(input=term, dim=1)

In [42]:
print("Start model training")
# Create lists to save training loss and validation loss of each epoch
hist_training_loss = []
hist_validation_loss = []

for epoch in range(1, n_epochs +1):
    # Empty list for recording performance 
    epoch_training_loss = []
    epoch_validation_loss = []
    
    # Also, we need to clear out the hidden state of the LSTM,
    # detaching it from its history on the last instance.
    hidden = model.init_hidden()
    
    ##### Training #####
    for batch_number, (input_data, target_data) in enumerate(data_loader_training):
        # Set model to training mode before train the neural network.
        model.train()
        
        # Zero out gradient, else they will accumulate between minibatches
        optimizer.zero_grad()
        
        # We need to detach the hidden state to prevent exploding/vanishing gradients
        hidden = [_.detach() for _ in hidden]
        
        # Forward propagation
        output = model(input_data)
        lstm_out, last_out, out, hidden_state, raw_output = model.get_tensors(input_data)
        #print("shape lstm_out: {}".format(lstm_out.size()))
        print("shape last_out: {}".format(last_out.size()))
        #print("shape hidden_state: {}".format(hidden_state.size()))
        #print("shape out: {}".format(out.size()))
        #print("raw_output: {}".format(raw_output.size()))
        #print("lstm_out")
        #print(lstm_out)
        #print("last_out")
        #print(last_out)
        #print("hidden_state")
        #print(hidden_state)
        #print("out")
        #print(out)
        #print("raw_output")
        #print(raw_output)
        print("-------- batch_no. {} --------".format(batch_number))
        
        # Calculate loss
        iteration_loss = loss_function(output, target_data)
        epoch_training_loss.append(iteration_loss.item())
    
        # Backward pass
        loss.backward()
        
        # Update LR
        scheduler.step()
        
        # Update parameters
        optimizer.step()
    
    # Save mean of loss over all training iterations
    mean_epoch_training_loss = sum(epoch_training_loss) / float(len(epoch_training_loss))
    hist_training_loss.append(mean_epoch_training_loss)
        
    ##### Evaluation #######
    for input_data, target_data in data_loader_validation:
        # Change model to evaluation (prediction) mode
        model.eval()
        
        # Predict 
        out = model(input_data)
        
         # Calculate loss
        iteration_loss = loss_function(output, target_data)
        epoch_validation_loss.append(iteration_loss.item())
        
    # Save mean of loss over all validation iterations to epoch history  
    mean_epoch_validation_loss = sum(epoch_validation_loss) / float(len(epoch_validation_loss))
    hist_validation_loss.append(mean_epoch_validation_loss)
        
    # Check after every evaluation whether the latest model is the best one or not
    # If this is the case, set current score to best_score, reset trails and save the model.
    if mean_epoch_validation_loss < lowest_loss:
        trials = 0
        lowest_loss = mean_epoch_validation_loss
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': mean_epoch_validation_loss
        }, "/best_model.pt")
        print("Epoch {}: best model saved with loss: {}".format(epoch, mean_epoch_validation_loss))
    
    # Else: Increase trails by one and start new epoch as long as not too many epochs 
    # were unsuccessful (controlled by patience)
    else:
        trials += 1
        if trials >= patience:
            print(f'Early stopping on epoch {epoch}')
            break  

Start model training
4
shape last_out: torch.Size([32, 20])
-------- batch_no. 0 --------


ValueError: only one element tensors can be converted to Python scalars

In [34]:
import torch
class MyReLU(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return input.clamp(min=0)

    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        #print(grad_output)
        grad_input = grad_output.clone()
        grad_input[input < 0] = 0
        return grad_input

class MyLoss(torch.autograd.Function):  
    @staticmethod
    def forward(ctx, y, y_pred):
        ctx.save_for_backward(y, y_pred)
        return (y_pred - y).pow(2).sum()

    @staticmethod
    def backward(ctx, grad_output):
        yy, yy_pred = ctx.saved_tensors
        grad_input = grad_output.clone()
        grad_input = torch.neg(2.0 * (yy_pred - yy))
        return grad_input, None

dtype = torch.float
device = torch.device("cpu")
N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6

for t in range(500):
    relu = MyReLU.apply
    myloss = MyLoss.apply 
    y_pred = relu(x.mm(w1)).mm(w2)
    loss = myloss(y_pred, y)
    print(t, loss.item())
    loss.backward()
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad
        w1.grad.zero_()
        w2.grad.zero_()

0 28747122.0
1 24992560.0
2 25456364.0
3 26002216.0
4 24025132.0
5 18750016.0
6 12396245.0
7 7170443.0
8 3959234.0
9 2227442.5
10 1353926.375
11 903256.9375
12 656832.125
13 508778.96875
14 410948.75
15 340759.125
16 287266.4375
17 244870.78125
18 210316.46875
19 181663.09375
20 157640.890625
21 137377.015625
22 120173.703125
23 105479.5078125
24 92875.4453125
25 82014.640625
26 72622.0546875
27 64475.6796875
28 57378.328125
29 51179.109375
30 45750.671875
31 40983.09765625
32 36785.42578125
33 33081.46484375
34 29807.4921875
35 26907.1171875
36 24327.73046875
37 22032.25390625
38 19984.181640625
39 18154.50390625
40 16517.63671875
41 15054.314453125
42 13741.15234375
43 12559.498046875
44 11494.630859375
45 10533.8994140625
46 9664.5986328125
47 8877.3798828125
48 8163.650390625
49 7515.51318359375
50 6926.6064453125
51 6390.4912109375
52 5901.822265625
53 5455.99169921875
54 5048.515625
55 4675.82080078125
56 4334.5615234375
57 4021.4599609375
58 3734.09765625
59 3470.0439453125
60 3