# Sequential deep learning models

In [1]:
from copy import deepcopy
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
from datetime import datetime

### Vanilla RNN

In [87]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, dropout_prob):
        super(RNNModel, self).__init__()

        # Defining the number of layers and the nodes in each layer
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim

        # RNN layers
        self.rnn = nn.RNN(
            input_dim, hidden_dim, layer_dim, batch_first=True, dropout=dropout_prob
        )
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initializing hidden state for first input with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim, device=x.device).requires_grad_()

        # Forward propagation by passing in the input and hidden state into the model
        self.rnn.flatten_parameters() # ------------------------------------------------------------------
        out, h0 = self.rnn(x, h0.detach())

        # Reshaping the outputs in the shape of (batch_size, seq_length, hidden_size)
        # so that it can fit into the fully connected layer
        out = out[:, -1, :]

        # Convert the final state to our desired output shape (batch_size, output_dim)
        out = self.fc(out)
        return out

### LSTM

In [88]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, dropout_prob):
        super(LSTMModel, self).__init__()

        # Defining the number of layers and the nodes in each layer
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim

        # LSTM layers
        self.lstm = nn.LSTM(
            input_dim, hidden_dim, layer_dim, batch_first=True, dropout=dropout_prob
        )

        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initializing hidden state for first input with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim, device=x.device).requires_grad_()

        # Initializing cell state for first input with zeros
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim, device=x.device).requires_grad_()

        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        # Forward propagation by passing in the input, hidden state, and cell state into the model
        self.lstm.flatten_parameters() # ------------------------------------------------------------------
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))

        # Reshaping the outputs in the shape of (batch_size, seq_length, hidden_size)
        # so that it can fit into the fully connected layer
        out = out[:, -1, :]

        # Convert the final state to our desired output shape (batch_size, output_dim)
        out = self.fc(out)

        return out

### GRU

In [89]:
class GRUModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, dropout_prob):
        super(GRUModel, self).__init__()

        # Defining the number of layers and the nodes in each layer
        self.layer_dim = layer_dim
        self.hidden_dim = hidden_dim

        # GRU layers
        self.gru = nn.GRU(
            input_dim, hidden_dim, layer_dim, batch_first=True, dropout=dropout_prob
        )

        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Initializing hidden state for first input with zeros
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim, device=x.device).requires_grad_()

        # Forward propagation by passing in the input and hidden state into the model
        self.gru.flatten_parameters() # ------------------------------------------------------------------
        out, _ = self.gru(x, h0.detach())

        # Reshaping the outputs in the shape of (batch_size, seq_length, hidden_size)
        # so that it can fit into the fully connected layer
        out = out[:, -1, :]

        # Convert the final state to our desired output shape (batch_size, output_dim)
        out = self.fc(out)

        return out

### Model initialization

In [90]:
def get_model(model, model_params):
    models = {
        "rnn": RNNModel,
        "lstm": LSTMModel,
        "gru": GRUModel,
    }
    return models.get(model.lower())(**model_params)

### Optimisation

In [91]:
class Optimization:
    def __init__(self, model, loss_fn, optimizer):
        self.model = model
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.train_losses = []
        self.val_losses = []
    
    def train_step(self, x, y, weight):
        # Sets model to train mode
        self.model.train()

        # Makes predictions
        yhat = self.model(x)

        # Computes loss
        loss = self.loss_fn(y, yhat, weight)

        # Computes gradients
        loss.backward()

        # Updates parameters and zeroes gradients
        self.optimizer.step()
        self.optimizer.zero_grad()

        # Returns the loss
        return loss.item()
    
    def train(self, train_loader, val_loader, weight_type, alpha, batch_size=64, n_epochs=50, n_features=1):
        
        # train on GPU
        device = torch.device('cuda')
        
        best_loss = np.infty
        best_train_loss = np.infty
        stop_criterion = 1e-5
        cnt = 0
        patience = 100

        for epoch in range(1, n_epochs + 1):
            batch_losses = []
            obs_total = len(train_loader.dataset)  # number of samples
            step = 1
            for x_batch, y_batch in train_loader:
                x_batch = x_batch.view([len(y_batch), -1, n_features]).to(device)
                y_batch = y_batch.view([len(y_batch), -1]).to(device)
                t = np.arange(step,step+len(y_batch)) # array of periods of this batch
                if weight_type == 'exponential':
                    weight = np.exp(-alpha*(obs_total-t))  # exponential weight
                if weight_type == 'rayleigh':
                    weight = np.exp(-(1/2)*alpha*(obs_total-t)**2)  # Rayleigh weight
                if weight_type == 'bartlett':  # Bartlett weight
                    weight = np.zeros([t.shape[0]])
                    for i in range(t.shape[0]):
                        if ((obs_total-t[i])/alpha)<=1:
                            weight[i] = 1-(obs_total-t[i])/alpha
                        else:
                            weight[i] = 0
                if weight_type == 'parzen':  # Parzen weight
                    weight = np.zeros([t.shape[0]])
                    for i in range(t.shape[0]):
                        if ((obs_total-t[i])/alpha)>=0 and ((obs_total-t[i])/alpha)<=0.5:
                            weight[i] = (1-6*((obs_total-t[i])/alpha)**2+6*((obs_total-t[i])/alpha)**3) 
                        elif ((obs_total-t[i])/alpha)>0.5 and ((obs_total-t[i])/alpha)<=1:
                            weight[i] = 2*(1-((obs_total-t[i])/alpha))**3 
                        else:
                            weight[i] = 0
                if weight_type == 'tukey-hanning':  # Tukey-Hanning weight
                    weight = np.zeros([t.shape[0]])
                    for i in range(t.shape[0]):
                        if ((obs_total-t[i])/alpha)<=1:
                            weight[i] = (1+np.cos(np.pi*((obs_total-t[i])/alpha)))/2
                        else:
                            weight[i] = 0
                b_loss = self.train_step(x_batch, y_batch, torch.tensor(weight).to(device))
                batch_losses.append(b_loss)
                step += len(y_batch) ###########################################
            training_loss = np.sum(batch_losses)
            self.train_losses.append(training_loss)

            with torch.no_grad():
                batch_val_losses = []
                for x_val, y_val in val_loader:
                    x_val = x_val.view([len(y_val), -1, n_features]).to(device)
                    y_val = y_val.to(device)
                    self.model.eval()
                    yhat = self.model(x_val)
                    val_loss = self.loss_fn(y_val, yhat, torch.tensor(1/len(val_loader.dataset)).to(device)).item() # loss using equal weights
                    batch_val_losses.append(val_loss)
                val_loss = np.sum(batch_val_losses)  # MSE
                self.val_losses.append(val_loss)

                # save model with smallest validation loss
                if (val_loss < best_loss):
                    best_loss = val_loss
                    best_epoch = epoch
                    best_model = deepcopy(self.model)
                    
                # early stopping
                if ((best_train_loss - training_loss) > stop_criterion):
                    best_train_loss = training_loss
                    cnt = 0
                else:
                    cnt += 1
                if cnt == patience:
                    break

        return best_model, best_loss

    def evaluate(self, best_model, test_loader, batch_size=1, n_features=1):
        # evaluate on GPU
        device = torch.device('cuda')
        model = deepcopy(best_model)
        with torch.no_grad():
            predictions = []
            values = []
            for x_test, y_test in test_loader:
                x_test = x_test.view([batch_size, -1, n_features]).to(device)
                y_test = y_test.to(device)
                model.eval()
                yhat = model(x_test)
                predictions.append(yhat.to(device).detach().cpu().numpy())
                values.append(y_test.to(device).detach().cpu().numpy())

        return predictions, values
    
    def plot_losses(self):
        plt.plot(self.train_losses, label="Training loss")
        plt.plot(self.val_losses, label="Validation loss")
        plt.legend()
        plt.title("Losses")
        plt.show()
        plt.close()

### Weighted loss fuction

In [None]:
class WeightedLoss(nn.Module):
    def __init__(self):
        super(WeightedLoss, self).__init__()
 
    def forward(self, inputs, targets, weight=1):  
        
        # l_{\text{EWS}} = \sum_{t=1}^{T}\alpha^{T-(t-1)}(y_t-\hat{y}_t)^2
        
        # flatten input and target tensors
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        
        # get weighted squared differences
        weighted_sqrd_diff = weight*((inputs - targets) ** 2)
        
        return weighted_sqrd_diff.sum()

### Training 

In [1]:
def train_loop(model_name, model_params,num_sim,str_sims,path,learning_rate,weight_decay,alphas,lags=1,weight_type='exponential', batch_size = 64,n_epochs = 500):
    
    # train on GPU
    device = torch.device('cuda')
    
    best_alphas = np.zeros(len(str_sims))
    df_sim = pd.DataFrame()

    # loop over all datasets in the simulation setup
    for j in range(len(str_sims)):
        
        # print dataset
        print('Dataset: ', str_sims[j], '(',j+1,'/',len(str_sims),')')

        # load data
        data_name = 'sim'+str(num_sim)+'_'+str_sims[j]
        with np.load(path+'Temp/'+data_name+'.npz') as data:
            # extract data and the given number of lagged features
            X_train = data['X_train'][:,:lags]
            X_val = data['X_val'][:,:lags]
            X_test = data['X_test'][:,:lags]
            y_train = data['y_train']
            y_val = data['y_val']
            y_test = data['y_test']
        
        # convert data to tensors
        train_features = torch.Tensor(X_train)
        train_targets = torch.Tensor(y_train)
        val_features = torch.Tensor(X_val)
        val_targets = torch.Tensor(y_val)
        test_features = torch.Tensor(X_test)
        test_targets = torch.Tensor(y_test)

        # build tensor dataset
        train = TensorDataset(train_features, train_targets)
        val = TensorDataset(val_features, val_targets)
        test = TensorDataset(test_features, test_targets)

        # get batched data
        train_loader = DataLoader(train, batch_size=batch_size, shuffle=False, drop_last=False) 
        val_loader = DataLoader(val, batch_size=batch_size, shuffle=False, drop_last=False)
        test_loader = DataLoader(test, batch_size=1, shuffle=False, drop_last=False)

        best_loss = np.infty

        # loop over all model seeds
        for i in range(len(alphas)):

            # initialise model
            model = get_model(model_name, model_params).to(device)

            # create loss function and optimizer
            loss_fn = WeightedLoss()
            optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
            opt = Optimization(model=model, loss_fn=loss_fn, optimizer=optimizer)
            
            # train the model
            best_model_out, val_loss = opt.train(train_loader, val_loader, weight_type=weight_type, alpha=alphas[i], batch_size=batch_size, n_epochs=n_epochs, n_features=model_params['input_dim'])
            if (val_loss < best_loss):
                best_loss = val_loss
                best_model = deepcopy(best_model_out)
                best_alphas[j] = i
            #opt.plot_losses()

        # evaluate on test set
        predictions, values = opt.evaluate(best_model, test_loader, batch_size=1, n_features=model_params['input_dim'])
        df_result = format_predictions(predictions, values)
        result_metrics = calculate_metrics(df_result)
        #print(result_metrics)
        #plt.plot(df_result.value)
        #plt.plot(df_result.prediction)
        #plt.show()

        # append metrics on test set
        df_metrics = pd.DataFrame(np.expand_dims((result_metrics['rmse'],result_metrics['mae'],result_metrics['mape'],result_metrics['r2'],),axis=0),columns=['rmse','mae','mape','r2'])
        df_sim = pd.concat([df_sim,df_metrics],axis=0, ignore_index=True)

    return df_sim, best_alphas