In [26]:
import import_ipynb
import torch
from Read_Data import Read_data
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
from tqdm import tqdm
import numpy as np
import Plots
import numpy as np
import torch.nn.functional as f

## Data Loader

In [27]:
class DataModule(Dataset):
  '''
  Format the data of features and labels using Datset for the prupose of being splited and
  seperated into batches.
  '''
  def __init__(self, history: torch.tensor, future:torch.tensor, transform=None, target_transform=None):
    self.history = history
    self.future = future
    self.transform = transform
    self.target_transform = target_transform

  def __len__(self):
    return len(self.future)

  def __getitem__(self, idx:int) -> tuple[int, int]: 
    history_item = self.history[idx]
    future_item = self.future[idx]
    if self.transform:
      history_item = self.transform(history_item)
    if self.target_transform:
      future_item = self.target_transform(future_item)
    return history_item, future_item

## Definition of the LSTM Neural Network

In [28]:
class LSTMNET(nn.Module):

  def __init__(self, input_size: int, hidden_size: int, num_layers: int, num_classes: int, seq_length: int, gear=None):
    '''
    input_size: how many kinds of features are in the input.
    hidden_size: hyper-parameter
    num_layers: hyper-parameter
    num_classes: the number of outputs each time.
    seq_length: the length of the input, which should be equal to the length of the sliding window.
    gear: the number of fc layers in the neural network +1 
          (because there should at least be one fc layer to reshape the outputs).
    '''
    super(LSTMNET, self).__init__()

    self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.num_classes = num_classes
    self.seq_length = seq_length
    self.gear = gear
    self.fc_layers = []
    

    self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
    self.sig = nn.Sigmoid()
    if gear == None: # Default configuration.
      self.fc1 = nn.Linear(self.seq_length * self.hidden_size, self.hidden_size)
      self.fc2 = nn.Linear(self.hidden_size, int(self.hidden_size/2))
      self.fc3 = nn.Linear(int(self.hidden_size/2), self.num_classes)

    else:
      start_size = self.seq_length * self.hidden_size
      end_size = self.hidden_size
      if gear == 0:
        self.fc_layers.append(nn.Linear(start_size, self.num_classes))
      else:
        for i in range(gear):
          self.fc = nn.Linear(start_size, end_size)
          self.fc_layers.append(self.fc)
          start_size = end_size
          end_size = end_size // 2
          if end_size <= self.num_classes:
            break
        self.fc_layers.append(nn.Linear(start_size, self.num_classes))

    self.trainer = None
    self.test_results = []

  def forward(self, input: torch.tensor):
    h_0 = Variable(torch.rand(self.num_layers, input.shape[0], self.hidden_size)).to(self.device)
    c_0 = Variable(torch.rand(self.num_layers, input.shape[0], self.hidden_size)).to(self.device)
    lstm_n, (h_n, c_n) = self.lstm(input, (h_0, c_0))  
    y_n = lstm_n.reshape(lstm_n.shape[0], -1)
    if self.gear == None:
      y_n = self.fc1(y_n)
      y_n = self.sig(y_n)
      y_n = self.fc2(y_n)
      y_n = self.sig(y_n)
      y = self.fc3(y_n)

    else:
      for i in range(self.gear):
        y_n = self.fc_layers[i](y_n)
        y_n = self.sig(y_n)
      y = self.fc_layers[-1](y_n)
    return y

  def training_step(self, batch: torch.tensor) -> float:
    '''
    One epoch in training.
    This function generates the prediction using the model trained so fat an then generates the loss.
    '''
    for i in range(len(batch)):
        batch[i] = batch[i].float() # Input should be the type of float64.
    loss = self.loss(self(*batch[:-1]), batch[-1]) # Predict and generate the loss at the same time.
    return loss
  
  def validation_step(self, batch: torch.tensor) -> float:
    for i in range(len(batch)):
        batch[i] = batch[i].float()
    loss = self.loss(self(*batch[:-1]), batch[-1])
    return loss

  def test_step(self, batch: torch.tensor) -> float:
    '''
    Calculate the test loss and append the prediction results.
    '''
    for i in range(len(batch)):
        batch[i] = batch[i].float()
    outputs = self(*batch[:-1])
    loss = self.loss(outputs, batch[-1])
    self.test_results.append(outputs)
    return loss

  def prediction_step(self, window: torch.tensor) -> torch.tensor:
    inputs = torch.from_numpy(window.astype('float64')).to(self.device)
    inputs = torch.unsqueeze(inputs, dim=0)
    inputs = inputs.float()
    return self(inputs)

  def loss(self, y_hat: torch.tensor, y: torch.tensor) -> float:
    l = nn.MSELoss()
    return l(y_hat, y)
    
  def configure_optimizers(self, lr: float) -> optim:
    '''
    Configure the hyper-parameters for the adam optimiser.
    '''
    return optim.Adam(self.parameters(), lr=lr)

## Training, Validation, Test and Prediction

In [29]:
class Trainer:
  def __init__(self, name: str, model: nn.Module, DataModule: DataModule, read_data: Read_data, max_epoch: int, lr: float, patience: int, warm: int):
    '''
    name: The name of the predictor, should be identical to the name of the input dataclass.
    model: neural network class.
    DataModule: DataModule class.
    read_data: The dataclass initialised by Read_data.
    max_epoch: Maximum number of epochs for training the neural network.
    lr: Learning rate.
    Patience: After num=patience epochs the training process stops.
    warm: The early stopping mechanism starts after num=warm epochs.
    '''
    self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    self.name = name
    self.model = model.to(self.device)
    self.DataModule = DataModule
    self.read_data = read_data
    self.max_epoch = max_epoch
    self.lr = lr
    self.patience = patience
    self.warm = warm

    self.best_val_loss = float('inf')
    self.patience_counter = 0

    self.features = read_data.history_t
    self.labels = read_data.future_t

    self.model.trainer = None
    self.optimiser = None
    self.train_loader = None
    self.val_loader = None
    self.test_loader = None   
    self.loss_train = []
    self.loss_val = []
    self.loss_test = []
    self.test_truth = []
    self.prediction_results = []
    
  def _prepare(self, portion_val: float, portion_test: float, batch_size: int):
    '''
    Split the dataset into trainset, validation set and testset, and format them into batches.
    self.train_loader, self.val_set and self.test_loader are used to store the sets after split.
    
    portion_val: Split ratio for the validation set.
    portion_test: Split ratio for the test set.
    batch_size: batch size.
    '''
    dataset = self.DataModule(self.features, self.labels)
    val_len = int(dataset.__len__() * portion_val)
    test_len = int(dataset.__len__() * portion_test)
    train_len = dataset.__len__() - val_len - test_len

    train_data = torch.utils.data.Subset(dataset, range(0, train_len))
    val_data = torch.utils.data.Subset(dataset, range(train_len, train_len + val_len))
    test_data = torch.utils.data.Subset(dataset, range(train_len + val_len, len(dataset)))
    
    self.train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False)
    self.val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
    self.test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

    self.model.trainer = self
    
  def fit(self, portion_val: float, portion_test: float, batch_size: int):
    '''
    Train the model until the early stopping. 
    The function calls the _prepare() automatically.
    '''
    self._prepare(portion_val, portion_test, batch_size)
    self.optim = self.model.configure_optimizers(self.lr)

    # with tqdm(total = self.max_epoch) as pbar:
    for epoch in range(self.max_epoch):
      # pbar.update(1)
      loss = self.fit_epoch()
      if epoch >= self.warm:
        if loss < self.best_val_loss:
          self.best_val_loss = loss
          self.patience_counter = 0
        else:
          self.patience_counter += 1

        if self.patience_counter >= self.patience:
          return
          
  def fit_epoch(self) -> float:
    '''
    The training and validation process in one epoch.

    return: The average validation loss for early stopping.
    '''
    # Training.
    self.model.train()
    loss_train_tmp = 0
    for batch in self.train_loader:
      batch = [item.to(self.device) for item in batch]
      loss = self.model.training_step(batch).cpu()
      loss_train_tmp += loss.detach()
      self.optim.zero_grad()
      with torch.no_grad():
        loss.backward()
        self.optim.step()
    self.loss_train.append(loss_train_tmp/len(self.train_loader))

    # Validation.
    if self.val_loader is None:
      return

    self.model.eval()
    loss_val_tmp = 0
    for batch in self.val_loader:
      batch = [item.to(self.device) for item in batch]
      with torch.no_grad():
        loss_val_tmp += self.model.validation_step(batch).cpu()
    avg_loss_val = loss_val_tmp/len(self.val_loader)
    self.loss_val.append(avg_loss_val)
    return avg_loss_val
      
  def test(self): 
    '''
    Test.
    '''
    self.model.eval()
    
    for batch in self.test_loader:
      batch = [item.to(self.device) for item in batch]
      self.test_truth.append(batch[-1])
      with torch.no_grad():
        loss = self.model.test_step(batch).cpu()
        self.loss_test.append(loss)

  def run(self, portion_val: float, portion_test, batch_size: int):
    '''
    Train, validate and test the LSTM network with calling the fit() and test() functions.   
    name: The name of this network.
    '''
    
    self.fit(portion_val, portion_test, batch_size)
    self.test()

  def test_plot(self):
    '''
    Plot two lines comparing the truth and predictions.
    The predictions are from self.test_results and the truth is from the test_loader.
    '''
    y_hat = torch.cat(self.model.test_results).cpu().numpy().reshape(1, -1)
    y = torch.cat(self.test_truth).cpu().numpy().reshape(1, -1)
    Plots.plot_lines(np.array([y_hat, y]).reshape(2, -1),
                     title='Truth VS Prediction of '+self.name+' LSTM', xlabel='Weeks', ylabel='values', label=['Prediction', 'Truth'], show_label=1)

  def predict(self):
    '''
    Predict the data.
    The number of the months predicted is equal to the read_data.predict_length.
    Finally, the predictions are transferred from torch.tensor to numpy numbers.
    '''
    self.model.eval()
    predictions_tmp = []
    with torch.no_grad():
      for time_step in range(self.read_data.predict_length):
        feature = self.read_data.data_normalised[-(self.read_data.window+self.read_data.window_interval)+time_step : -(self.read_data.window_interval)+time_step, 0].reshape(-1, 1)
        output = self.model.prediction_step(feature)
        predictions_tmp.append(self.read_data.scaler.inverse_transform(output.numpy()).reshape(-1)) # Reverse normalisation to get data in the real scale.
    self.prediction_results = [x.item() for x in predictions_tmp]


## Execution Example

In [30]:
# MUL_DEMAND = 1500
# MUL_PRICE = 1000/340.2
# MUL_YIELD = 2000
# TRUE_WEEKS = 12
# MAX_EPOCH = 10000

# hyper = {'window':          {'Demand': 17,      'Price': 12,     'Yield': 15    },
#          'hs':              {'Demand': 24,      'Price': 10,     'Yield': 16    },
#          'num_layers':      {'Demand': 1,       'Price': 2,      'Yield': 2     },
#          'lr':              {'Demand': 0.00005, 'Price': 0.0001, 'Yield': 0.0001},
#          'patience':        {'Demand': 1000,    'Price': 2000,   'Yield': 10000 },
#          'warm':            {'Demand': 500,     'Price': 200,    'Yield': 200   },
#          'portion_val':     {'Demand': 0.1,     'Price': 0.1,    'Yield': 0.1   },
#          'portion_test':    {'Demand': 0.1,     'Price': 0.1,    'Yield': 0.1   },
#          'batch_size':      {'Demand': 128,     'Price': 128,    'Yield': 128   }
#         }

# demand = Read_data('../Strawberry Demand.csv', 'Demand', true_weeks=TRUE_WEEKS, mul=MUL_DEMAND, window=hyper['window']['Demand'])
# price = Read_data('../Strawberry Price.csv', 'Price', true_weeks=TRUE_WEEKS, mul=MUL_PRICE, window=hyper['window']['Price'])
# syield = Read_data('../Yield.csv', 'Yield', true_weeks=TRUE_WEEKS, mul=MUL_YIELD, window=hyper['window']['Yield'])

In [31]:
# def neural_network_exe(data: Read_data, plot=0, gear=None) -> tuple:
#     '''
#     Execution of the training, validation and test of the neulra network.
#     data: data class that is used as the prediction target.
#     plot: choose whether to plot the results, binary.
#     '''
#     name = data.name
#     model = LSTMNET(input_size=1, hidden_size=hyper['hs'][name], num_layers=hyper['num_layers'][name], num_classes=1, seq_length=hyper['window'][name], gear=gear)
#     trainer = Trainer(name, model, DataModule, data, MAX_EPOCH, hyper['lr'][name], hyper['patience'][name], hyper['warm'][name])
#     trainer.fit(hyper['portion_val'][name], hyper['portion_test'][name], hyper['batch_size'][name])
#     trainer.test()
#     trainer.predict()

#     if plot == 1:
#         Plots.plot_lines(np.array(trainer.loss_train).reshape(1, -1), title='Training Loss of ' + str(trainer.name) + ' LSTM', xlabel='Epochs', ylabel='MSE Loss', label=['Training Loss'], show_label=1)
#         Plots.plot_lines(np.array(trainer.loss_val).reshape(1, -1), title='Validation Loss of ' + str(trainer.name) + ' LSTM', xlabel='Epochs', ylabel='MSE Loss', label=['Validation Loss'], show_label=1)
#         trainer.test_plot()
#         print('Train loss of the ' + str(trainer.name) + ' LSTM: ' + str(trainer.loss_train[-1]))
#         print('Validation loss of the ' + str(trainer.name) + ' LSTM: ' + str(trainer.loss_val[-1]))
    
#     return trainer.loss_train, trainer.loss_val

In [32]:
# for data in [demand, syield, price]:
#     _, _ = neural_network_exe(syield, plot=1)

## Structure Exploration

In [33]:
# val_loss_record = []

# for gear in range(5):
#     valLoss_total = 0
#     for _ in range(10):
#         _, val_loss = neural_network_exe(demand, plot=0, gear=gear)
#         valLoss_total += val_loss[-1]
#     val_loss_record.append(valLoss_total/3)

# Plots.plot_lines(np.array(val_loss_record).reshape(1, -1), x=np.arange(1, 6, 1), xticks=np.arange(1, 6, 1),
#                  title='Loss VS FC Layers Number', xlabel='The Number of FC Layers', ylabel='Validation Loss', label=['validation loss'], show_label=1)

## Grid Search

In [34]:
# losses = []
# for window in np.arange(15, 25):
#     demand = Read_data('../Strawberry Demand.csv', 'Demand', true_weeks=TRUE_WEEKS, mul=MUL_DEMAND, window=window)
#     name = demand.name
#     model = LSTMNET(input_size=1, hidden_size=hyper['hs'][name], num_layers=hyper['num_layers'][name], num_classes=1, seq_length=window)
#     trainer = Trainer(name, model, DataModule, demand, MAX_EPOCH, hyper['lr'][name], hyper['patience'][name], hyper['warm'][name])
#     trainer.fit(hyper['portion_val'][name], hyper['portion_test'][name], hyper['batch_size'][name])
#     trainer.test()
#     trainer.predict()
#     losses.append(trainer.loss_val[-1])
# Plots.plot_lines(np.array(losses).reshape(1, -1), x=np.arange(15, 25),
#                  title='Loss VS Window Length', xlabel='Window', ylabel='Validation Loss', label=['validation loss'], show_label=1)