In [26]:
# v1 was just vanila.

# v2: 24.09.01. 
#   1. Doing the CV (10 folds)
#   2. Early stopping with patience parameter.
#   3. Takings n steps into account. 

# v3: not yet
#   1. Doing the CV (10 folds)
#   2. Early stopping with patience parameter.
#   3. Takings n steps into account. not by h(t-1) + h(t-2), but with different weights.

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import random
import scipy.io as sio
from sklearn.model_selection import KFold

take_n_steps = 5

# Function to set seeds for reproducibility
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Set the seed for reproducibility
seed = 240802
set_seed(seed)

# Define the RNN model that takes two previous hidden states
# Define the custom RNN model that takes more previous hidden states into account
class PainRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_previous_steps=take_n_steps):
        super(PainRNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_previous_steps = num_previous_steps  # Number of previous steps to consider

        # Define layers
        self.rnn_cell = nn.RNNCell(input_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        # Initialize outputs and hidden states list
        outputs = []
        hidden_states = []
        
        if self.num_previous_steps != 0:
            # Initialize hidden states for the specified number of previous steps
            past_hidden_states = [torch.zeros(batch_size, self.hidden_size, device=x.device) 
                                for _ in range(self.num_previous_steps)]

            # Iterate through the sequence
            for t in range(seq_len):
                # Current input
                x_t = x[:, t, :]

                # Sum all past hidden states to compute the current hidden state
                combined_hidden = sum(past_hidden_states)

                # Update current hidden state
                h_t = self.rnn_cell(x_t, combined_hidden)

                # Update the list of past hidden states
                past_hidden_states.pop(0)  # Remove the oldest hidden state
                past_hidden_states.append(h_t)  # Add the current hidden state

                # Save the current hidden state
                hidden_states.append(h_t)

                # Compute output for the current time step
                output = self.fc(h_t)
                outputs.append(output)

            # Stack outputs and hidden states to match batch_first=True format
            outputs = torch.stack(outputs, dim=1)
            hidden_states = torch.stack(hidden_states, dim=1)
            
        else:
            for t in range(seq_len):
                # Current input
                x_t = x[:, t, :]
                if t == 0:
                    h_t_1 = torch.zeros(batch_size, self.hidden_size, device=x.device)
                else:
                    h_t_1 = h_t
                h_t = self.rnn_cell(x_t, h_t_1)
                hidden_states.append(h_t)  # Add the current hidden state
                output = self.fc(h_t)
                outputs.append(output)
            outputs = torch.stack(outputs, dim=1)
            hidden_states = torch.stack(hidden_states, dim=1)
            
        return outputs, hidden_states

In [27]:
# Define parameters
input_size = 1  # Input feature size
hidden_size = 8  # Number of features in the hidden state
output_size = 3  # Output feature size
learning_rate = 1e-3  # [0.001 or 0.0001]
num_epochs = 500
k_folds = 10
n_batch = 32
patience = 10  # Number of epochs to wait for improvement before stopping

# Load input and output data
inputoutputs = sio.loadmat('InOutputs')
Inputs = inputoutputs['Inputs']
Outputs = inputoutputs['Outputs']

Xnp = []
ynp = []
for cond_i in range(len(Outputs)):
    Xnp.append(np.tile(Inputs[cond_i, :], (Outputs[cond_i][0].shape[0], 1)))
    ynp.append(Outputs[cond_i][0])
Xnp = np.expand_dims(np.vstack(Xnp), axis=2)
ynp = np.vstack(ynp)
X = torch.from_numpy(Xnp).to(torch.float32)
y = torch.from_numpy(ynp).to(torch.float32)

# Combine inputs and outputs into a dataset
dataset = TensorDataset(X, y)

# Initialize k-fold cross-validation
kf = KFold(n_splits=k_folds, shuffle=True, random_state=seed)

# Lists to store training losses, validation losses, hidden states, and outputs
train_losses_all_epochs = []
val_losses_all_epochs = []
fold_hidden_states = []  # Store hidden states for each fold
fold_outputs = []  # Store outputs for each fold

# Model instantiation
device = 'cpu'
model = PainRNN(input_size, hidden_size, output_size).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Early stopping parameters
best_val_loss = float('inf')
epochs_without_improvement = 0
best_model_state = None

# K-fold Cross Validation model evaluation
for epoch in range(num_epochs):

    # Reset for each epoch
    epoch_train_loss = 0.0
    epoch_val_loss = 0.0
    hidden_states_all_epochs = []  # To store hidden states of all epochs
    outputs_all_epochs = []  # To store outputs of all epochs

    for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
        
        # Sample elements randomly from a given list of indices, no replacement.
        train_subsampler = torch.utils.data.SubsetRandomSampler(train_idx)
        val_subsampler = torch.utils.data.SubsetRandomSampler(val_idx)

        # Define data loaders for training and validation
        train_loader = DataLoader(dataset, batch_size=n_batch, sampler=train_subsampler)
        val_loader = DataLoader(dataset, batch_size=n_batch, sampler=val_subsampler)

        # Training loop for current fold
        model.train()  # Set the model to training mode
        fold_train_loss = 0.0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs, hidden_states = model(inputs)
            loss = criterion(outputs, targets)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            fold_train_loss += loss.item()

            # Save hidden states and outputs
            hidden_states_all_epochs.append(hidden_states.detach().cpu().numpy())
            outputs_all_epochs.append(outputs.detach().cpu().numpy())

        fold_train_loss /= len(train_loader)
        epoch_train_loss += fold_train_loss

        # Validation loop for current fold
        model.eval()  # Set the model to evaluation mode
        fold_val_loss = 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs, _ = model(inputs)
                loss = criterion(outputs, targets)
                fold_val_loss += loss.item()

        fold_val_loss /= len(val_loader)
        epoch_val_loss += fold_val_loss

    # Average train and validation loss across all folds
    epoch_train_loss /= k_folds
    epoch_val_loss /= k_folds

    train_losses_all_epochs.append(epoch_train_loss)
    val_losses_all_epochs.append(epoch_val_loss)

    print(f'Epoch [{epoch + 1}/{num_epochs}], Avg Train Loss: {epoch_train_loss:.4f}, Avg Val Loss: {epoch_val_loss:.4f}')

    # Early stopping check
    if epoch_val_loss < best_val_loss:
        best_val_loss = epoch_val_loss
        best_model_state = model.state_dict()  # Save the best model state
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1

    if epochs_without_improvement >= patience:
        print(f'Early stopping triggered after {epoch + 1} epochs.')
        break

# Load the best model state (optional, if you want to use the best model after early stopping)
if best_model_state is not None:
    model.load_state_dict(best_model_state)
    
out_t, h_t = model(torch.from_numpy(np.expand_dims(Inputs, axis = 2)).to(torch.float32))
out_t = out_t.detach().numpy()
h_t   = h_t.detach().numpy()

# Save the hidden states, outputs, and losses using scipy.io.savemat
sio.savemat('HiddenLayers.mat', {
    'out_t': out_t, 'h_t': h_t, 
    'val_losses': val_losses_all_epochs,
    'hidden_states': hidden_states_all_epochs,
    'inputs': Inputs,
})
print("Training & Saving complete!")

Epoch [1/500], Avg Train Loss: 112.4835, Avg Val Loss: 110.4013
Epoch [2/500], Avg Train Loss: 85.7206, Avg Val Loss: 84.2121
Epoch [3/500], Avg Train Loss: 68.7737, Avg Val Loss: 67.8314
Epoch [4/500], Avg Train Loss: 55.4749, Avg Val Loss: 54.9152
Epoch [5/500], Avg Train Loss: 45.7992, Avg Val Loss: 45.2683
Epoch [6/500], Avg Train Loss: 38.9361, Avg Val Loss: 38.6707
Epoch [7/500], Avg Train Loss: 33.8515, Avg Val Loss: 33.7241
Epoch [8/500], Avg Train Loss: 30.0004, Avg Val Loss: 29.8403
Epoch [9/500], Avg Train Loss: 27.1780, Avg Val Loss: 27.1232
Epoch [10/500], Avg Train Loss: 25.3284, Avg Val Loss: 25.1390
Epoch [11/500], Avg Train Loss: 24.0373, Avg Val Loss: 23.9479
Epoch [12/500], Avg Train Loss: 23.1262, Avg Val Loss: 23.0725
Epoch [13/500], Avg Train Loss: 22.5230, Avg Val Loss: 22.5797
Epoch [14/500], Avg Train Loss: 22.0938, Avg Val Loss: 22.1978
Epoch [15/500], Avg Train Loss: 21.8780, Avg Val Loss: 22.1182
Epoch [16/500], Avg Train Loss: 21.6867, Avg Val Loss: 21.7296