# Data setup

## Imports

Import modules

In [None]:
!pip install torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from tqdm import tqdm
import torchmetrics as tm
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from torch import nn
import pandas as pd
import os
import shutil
import random
from torch.utils.data import Dataset, DataLoader
from scipy.io import loadmat
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, KFold
import pickle
from time import time
import warnings
warnings.filterwarnings("ignore")
torch.manual_seed(42)
random.seed(42)

import math
from copy import copy, deepcopy
from typing import Tuple
import torch
from torch import nn, Tensor
import torch.nn.functional as F
from torch.utils.data import dataset
from torch.nn import TransformerEncoderLayer, TransformerEncoder
from torch.nn import Linear, Dropout, LayerNorm
from torchvision.models.resnet import Bottleneck

## Get data

In [None]:
# Connect to Google Drive to access data
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Read features and labels vectors
filename = '/content/drive/MyDrive/TCC Dados/dados.zip'
extract_dir = '/content/'
svpth = '/content/'
featPath = os.path.join(svpth, 'features.npy')
lblPath = os.path.join(svpth, 'labels.npy')
if (not os.path.exists(featPath)) and (not os.path.exists(lblPath)):
  shutil.unpack_archive(filename, extract_dir)
X = np.load(featPath) # features
y = np.load(lblPath) # labels

## Analyze data correlation
Plot autocorrelation function to verify redundancy in data

In [None]:
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
fig, ax = plt.subplots(10, 1, figsize = (14, 60))
idxs = np.random.randint(800, size = 10)
lags = 100 # Number of samples to calculate autocorrelation
for i, idx in enumerate(idxs):
  Xsample = MinMaxScaler().fit_transform(X[idx])
  plot_acf(np.mean(Xsample, axis = 1), lags = lags, ax = ax[i])
  ax[i].set_xticks(np.arange(0, lags + 1, 5))
  ax[i].set_xticklabels(ax[i].get_xticks(), rotation = 90)
  ax[i].grid()
plt.show()

## Data processing and auxiliar functions

In [None]:
from torch.utils.data import Dataset, DataLoader

class DatasetAVQ(Dataset):
  '''
  Create dataset for AVQA
  '''
  def __init__(self, features, labels):
    '''
    Parameters:
    -----------------------------------
    features: np.ndarray
      array of features
    labels: np.ndarray
      array of labels
    '''
      self.features = features
      self.labels = labels
      assert len(self.features) == len(self.labels), 'Features and labels should have equal length'

  def __len__(self):
      return len(self.features)

  def __getitem__(self, idx):
      feature = self.features[idx]
      label = self.labels[idx]
      return feature, label

In [None]:
def applyTransform(X, y, transform):
    '''
    Apply transformation to data (e.g. MinMaxScaler)

    Parameters:
    -----------------------------------
    X: np.ndarray
      Array of features
    y: np.ndarray
      Array of labels
    transform: sklearn.preprocessin function (MinMaxScaler, StandardScaler, etc)
      Function to transform data

    Returns:
    -----------------------------------
    Xout, yout: torch.Tensor
      Tensors with data transformed
    '''
    X_flat = np.reshape(X, (-1, X.shape[-1]))
    t = transform()
    X_flat = t.fit_transform(X_flat)
    Xout = np.reshape(X_flat, X.shape)
    Xout, yout = [torch.Tensor(i) for i in [Xout, y]]
    return Xout, yout

In [None]:
class AVQTransformerData():
  '''
  Data handler
  '''
  def __init__(self, matPath, mosPath, featuresName = 'avFeatures', val_size = 0.1, test_size = 0.1, 
               transform = None, shuffle = True, debug = True):
    '''
    Parameters:
    -----------------------------------
    matPath: str
      Path to .mat files
    mosPath: str
      Path to labels (MOS) file
    featuresName: str, optional
      Name of features array in .mat file. Default: 'avFeatures'.
    val_size: float or int, optional
      Size of validation split. If float, value represents percentage of entire dataset to split. Default: 0.1
    test_size: float or int, optional
      Size of test split. If float, value represents percentage of entire dataset to split. Default: 0.1
    transform: sklearn.preprocessing function (MinMaxScaler, StandardScaler, etc), optional
      Function to transform data. If None (default), apply no transformation to data
    shuffle: bool, optional
      If True (default), shuffle data before splitting.
    debug: bool, optional
      If True (default), show data processing progress
    '''
    self.matPath = matPath
    self.mosPath = mosPath
    self.featuresName = featuresName
    self.val_size = val_size
    self.test_size = test_size
    self.transform = transform
    self.shuffle = shuffle
    self.debug = debug

  def readFeaturesLabels(self):
    '''
    Read features and labels data from .mat files

    Returns:
    -----------------------------------
      X: np.ndarray
        Features array of shape (n_videos, n_frames, n_features)
      y: np.ndarray
        Labels array of shape (n_videos,)
    '''
    print('Reading data from files...')
    # Read MOS data
    df = pd.read_csv(self.mosPath, sep = ';')
    df = df[['testFile', 'Mqs']].rename(columns = {'Mqs': 'MOS'})
    # Generate labels array
    y = df['MOS'].values.reshape(-1, 1)
    # Treat NaN values
    dfHRC = df[df['testFile'].str.contains('HRC')]
    meanHRC = np.nanmean(dfHRC['MOS'].values)
    inds = np.where(np.isnan(y))
    y[inds] = np.take(meanHRC, inds[1])
    y = y.flatten()
    # Read features data
    files = df['testFile'].apply(lambda x: os.path.join(self.matPath, x + '.mat')).values
    # Transpose array and clip to minimum length
    arrays = [np.transpose(loadmat(i)[self.featuresName]) for i in files]
    minLen = min(arrays, key = len).shape[0]
    arrays = [t[:minLen] for t in arrays]
    X = np.stack(arrays, axis = 0)
    if self.debug:
      print(f'X shape: {X.shape}\ny shape: {y.shape}\n')
    self.X, self.y = X, y
    return X, y

  def dataPreparation(self, read_data = False, X = None, y = None, train_val_test_data = None):
    '''
    Parameters:
    -----------------------------------
    read_data: bool, optional
      If True, read data from .mat files. Else, read from X and y parameters. Ignored if train_val_test_data is not None. Default: False
    X: bool, optional
      Features array. Ignored if read_data or train_val_test_data are True. Default: None
    y: bool, optional
      Labels array. Ignored if read_data or train_val_test_data are True. Default: None
    train_val_test_data: list, optional
      List with arrays train, test and validation arrays, with format [X_train, X_val, X_test, y_train, y_val, y_test], if
      using test data, else [X_train, X_val, y_train, y_val]

    Returns:
    -----------------------------------
    X_train: torch.Tensor
      Training features array
    X_val: torch.Tensor
      Validation features array
    X_test: torch.Tensor
      Test features array, only if using test data
    y_train: torch.Tensor
      Training labels array
    y_val: torch.Tensor
      Validation labels array
    y_test: torch.Tensor
      Test labels array, only if using test data
    '''
    # Read data from .mat files
    if read_data:
      X, y = self.readFeaturesLabels()
    else:
      if X is None or y is None:
        raise ValueError('If "read_data" is False, X and y should be provided')
    if self.debug: print('Preparing data...')
    # Get train, validation and test data if provided
    if train_val_test_data:
      if self.test_size:
        X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_data
      else:
        X_train, X_val, y_train, y_val = train_val_test_data
    else:
      # Split data into train, validation and test
      tint = type(self.test_size) == int
      vint = type(self.val_size) == int
      if tint and vint:
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = self.val_size, shuffle = self.shuffle)
        if self.test_size:
          X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size = self.test_size, shuffle = self.shuffle)
      elif (tint and (not vint)) or ((not tint) and vint):
        raise ValueError('test_size and val_size shoud both be integers or both be floats')
      else:
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = self.val_size, shuffle = self.shuffle)
        if self.test_size:
          X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size = self.test_size/(1 - self.val_size), shuffle = self.shuffle)
      self.num_frames = X_train.shape[1]
      # Apply transformation to data
      if self.transform:
        if self.test_size: 
          (X_train, y_train), (X_val, y_val), (X_test, y_test) = [applyTransform(X = i, y = j, transform = self.transform) 
                                                                  for i, j in [(X_train, y_train), (X_val, y_val), (X_test, y_test)]]
        else: 
          (X_train, y_train), (X_val, y_val) = [applyTransform(X = i, y = j, transform = self.transform) 
                                                                  for i, j in [(X_train, y_train), (X_val, y_val)]]
      self.seq_len = X_val.size(1)
    # Save train, validation and test tensors
    if self.test_size: self.train_val_test_data = [X_train, X_val, X_test, y_train, y_val, y_test]
    else: self.train_val_test_data = [X_train, X_val, y_train, y_val]
    if self.debug:
      if self.test_size: print(f'Train/val/test data shapes:\n{X_train.size()}, {X_val.size()}, {X_test.size()}\n{y_train.size()}, {y_val.size()}, {y_test.size()}\n')
      else: print(f'Train/val data shapes:\n{X_train.size()}, {X_val.size()}\n{y_train.size()}, {y_val.size()}\n')
    if self.test_size:
      return X_train, X_val, X_test, y_train, y_val, y_test
    else:
      return X_train, X_val, y_train, y_val

  def createDataloaders(self, batch_size, shuffle, read_data = False, X = None, y = None, train_val_test_data = None):
    '''
    Parameters:
    -----------------------------------
    batch_size: int
      Batch size for DataLoader
    shuffle: bool
      If True, shuffle batch data each iteration
    read_data: bool, optional
      If True, read data from .mat files. Else, read from X and y parameters. Ignored if train_val_test_data is not None. Default: False
    X: bool, optional
      Features array. Ignored if read_data or train_val_test_data are True. Default: None
    y: bool, optional
      Labels array. Ignored if read_data or train_val_test_data are True. Default: None
    train_val_test_data: list, optional
      List with arrays train, test and validation arrays, with format [X_train, X_val, X_test, y_train, y_val, y_test], if
      using test data, else [X_train, X_val, y_train, y_val]

    Returns:
    -----------------------------------
    train_dataloader: torch.utils.data.DataLoader
      Pytorch train data loader
    val_dataloader: torch.utils.data.DataLoader
      Pytorch validation data loader
    test_dataloader: torch.utils.data.DataLoader
      Pytorch test data loader, only if using test data
    '''
    # Retrieve train, validation and test data
    if self.test_size:
      X_train, X_val, X_test, y_train, y_val, y_test = self.dataPreparation(read_data, X, y, train_val_test_data)
      self.X_train, self.X_val, self.X_test = X_train, X_val, X_test
      self.y_train, self.y_val, self.y_test = y_train, y_val, y_test
    else:
      X_train, X_val, y_train, y_val = self.dataPreparation(read_data, X, y, train_val_test_data)
      self.X_train, self.X_val = X_train, X_val
      self.y_train, self.y_val = y_train, y_val
    if self.debug: print('Creating dataloaders...')
    # Generate dataloaders
    train_dataset = DatasetAVQ(X_train, y_train)
    val_dataset = DatasetAVQ(X_val, y_val)
    if self.test_size: test_dataset = DatasetAVQ(X_test, y_test)
    train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = shuffle)
    val_dataloader = DataLoader(val_dataset, batch_size = batch_size, shuffle = shuffle)
    if self.test_size: test_dataloader = DataLoader(test_dataset, batch_size = batch_size, shuffle = shuffle)
    if self.debug: print('Dataloaders created!')
    if self.debug:
      if self.test_size: print(f'Dataloaders sizes:\n{len(train_dataloader)}, {len(val_dataloader)}, {len(test_dataloader)}')
      else: print(f'Dataloaders sizes:\n{len(train_dataloader)}, {len(val_dataloader)}')
    if self.test_size: return train_dataloader, val_dataloader, test_dataloader
    else: return train_dataloader, val_dataloader

In [None]:
# Auxiliar functions

def filterDict(dictionary, keys):
  '''
  Filter a dictionary by list of keys

  Parameters:
  -----------------------------------
  dictionary: dict
    Dictionary to filter
  keys: list
    List of keys to keep in dictionary

  Returns:
  -----------------------------------
  dict2: dict
    Dictionary with filtered keys
    '''
  dict2 = {k: {k2: round(v2, 4) for k2, v2 in v.items()} if type(v) == dict else v for k, v in dictionary.items() if k in keys}
  return dict2

def cleanDir(path):
  '''
  Create an empty directory

  Parameters:
  -----------------------------------
  path: str
    Path to empty directory. If path exists, the directory is deleted and recreated with no contents on it.
    '''
  if os.path.isdir(path):
    shutil.rmtree(path)
  os.mkdir(path)

def save_object(obj, filename):
  '''
  Save a python object as a pickle file

  Parameters:
  -----------------------------------
  obj: any python object
    Python object to pickle
  filename: str
    Path to saved pickle file
    '''
  with open(filename, 'wb') as outp:
    pickle.dump(obj, outp, pickle.HIGHEST_PROTOCOL)

def load_object(filename):
  '''
  Load a python object from a pickle file

  Parameters:
  -----------------------------------
  filename: str
    Path to pickle file
  '''
  with open(filename, 'rb') as f:
    obj = pickle.load(f)
  return obj

def elapsedTime(t0, t1):
  '''
  Print elapsed time

  Parameters:
  -----------------------------------
  t0: time.time
    Initial time
  t1: time.time
    Final time
  
  Returns:
  -----------------------------------
  String with elapsed time in minutes and seconds
  '''
  m, s = divmod(int(t1 - t0), 60)
  return f'{m}m{s}s'

# Model

## Positional Encoding

In [None]:
class PositionalEncoding(nn.Module):
    '''
    Transformer Positional Encoding implementation (from https://pytorch.org/tutorials/beginner/transformer_tutorial.html)
    '''
  def __init__(self, d_model, dropout = 0.1, max_len = 20000):
    '''
    Parameters:
    -----------------------------------
    d_model: int
      Dimension of model's embedding
    dropout: float
      Dropout value
    max_len: int
      Maximum length of input sequence
    '''
    super().__init__()
    self.dropout = nn.Dropout(p=dropout)
    position = torch.arange(max_len).unsqueeze(1)
    div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
    pe = torch.zeros(max_len, 1, d_model)
    pe[:, 0, 0::2] = torch.sin(position * div_term)
    pe[:, 0, 1::2] = torch.cos(position * div_term)
    self.register_buffer('pe', pe)

  def forward(self, x):
    '''
    Parameters:
    -----------------------------------
    x: torch.Tensor
      Input embedding tensor

    Returns:
    -----------------------------------
      Embedding with positional encoding
    '''
    x = x + self.pe[:x.size(0)]
    return self.dropout(x)

## Transformer Model
4 different models were trained:

1. Classification for sample prediction (CS): The model is trained as a classification problem,
assigning class probabilities to each AV sample, processing the output probabilities to make
predictions, and averaging the predictions of all AV samples of the video.
2. Classification for video prediction (CV): The model is trained as a classification problem,
predicting the quality of the whole video as a single target score.
3. Regression for sample prediction (RS): The model is trained as a regression problem, predicting the quality of each AV sample and averaging the predictions of all AV samples of
the video.
4. Regression for video prediction (RV): The model is trained as a regression problem, predicting the quality of the whole video as a single target score.

In [None]:
# CV
# Input (bs, seq_len, num_feat) --> Embedding (bs, seq_len, embdim) --> Encoder (bs, seq_len, embdim) --> 
# --> LinearMap (bs, seq_len, 1) --> Squeeze (bs, seq_len) --> Decoder (bs, num_classes)

# CS
# Input (bs_smpl, num_feat, 1) --> Embedding (bs_smpl, num_feat, embdim) --> Encoder (bs_smpl, num_feat, embdim) --> 
# --> LinearMap (bs_smpl, num_feat, 1) --> Squeeze (bs_smpl, num_feat) --> Decoder (bs_smpl, num_classes)

# RV
# Input (bs, seq_len, num_feat) --> Embedding (bs, seq_len, embdim) --> Encoder (bs, seq_len, embdim) --> 
# --> LinearMap (bs, seq_len, 1) --> Squeeze (bs, seq_len) --> Decoder (bs, 1)

# RS
# Input (bs_smpl, num_feat, 1) --> Embedding (bs_smpl, num_feat, embdim) --> Encoder (bs_smpl, num_feat, embdim) --> 
# --> LinearMap (bs_smpl, num_feat, 1) --> Squeeze (bs_smpl, num_feat) --> Decoder (bs_smpl, 1)


In [None]:
class AVQTransformer(nn.Module):
  '''
  Transformer for AVQA Pytorch model implementation
  '''
  def __init__(self, clf, framePred, emb_dim, seq_len, num_features, nhead, d_hid,
                nlayers, num_classes = 4, emb_activ = nn.GELU, linear_activ = nn.GELU, dropout = 0.2, batch_first = True):
    '''
    Parameters:
    -----------------------------------
    clf: bool
      If True, model is for classification (CV and CS). Else, model is for regression (RV and RS).
    framePred: bool
      If True, model is for sample prediction (RS and CS). Else, model is for video prediction (CV and RV).
    emb_dim: int
      Dimension of embedding
    seq_len: int
      Length of input sequence
    num_features: int
      Number of features on input data
    nhead: int
      Number of attention heads
    d_hid: int
      Dimension of the hidden FC FFN
    nlayers: int
      Number of encoder layers stacked in series
    num_classes: int, optional
      Number of classes for classification. Ignored if clf is False. Default: 4
    emb_activ: torch.nn activation function, optional
      Activation function for embedding linear layer. Default: torch.nn.GELU
    linear_activ: torch.nn activation function, optional
      Activation function for linear mapping layer. Default: torch.nn.GELU
    dropout: float, optional
      Dropout for positional encoding and Transformer encoder. Default: 0.2
    batch_first: bool, optional
      If True (default), consider first dimension of input tensor as batch size. Else, second dimension is batch size.
    '''
    super().__init__()
    self.emb_dim = emb_dim
    embdim1 = 1 if framePred else num_features
    self.embedding = nn.Linear(embdim1, emb_dim)
    self.activ1 = emb_activ() if emb_activ else None
    self.pos_encoder = PositionalEncoding(emb_dim, dropout)
    encoder_layers = TransformerEncoderLayer(emb_dim, nhead, d_hid, dropout, batch_first = batch_first)
    self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
    self.linearmap = nn.Linear(emb_dim, 1)
    self.activ2 = linear_activ() if linear_activ else None
    dim1 = num_features if framePred else seq_len
    self.decoder = nn.Linear(dim1, num_classes)

  def forward(self, src, src_mask = None, debug = False):
    '''
    Parameters:
    -----------------------------------
    src: torch.Tensor
      Input tensor to the model
    src_mask: torch.Tensor, optional
      Tensor for masking input. If None (default), no masking is applied
    debug: bool, optional
      If True, print dimensions of the outputs of each layer after the first batch of the first epoch. Default: False
    
    Returns:
    -----------------------------------
    output: torch.Tensor
      Output of the model
    '''
    if debug: print(f'\nInput shape: {src.size()}')
    bs = src.size(0)
    # Embedding
    src = self.embedding(src) * math.sqrt(self.emb_dim)
    if self.activ1: src = self.activ1(src)
    if debug: print(f'Embedding output shape: {src.size()}')
    # Positional Encoding
    src = self.pos_encoder(src)
    if debug: print(f'Positional Encoder output shape: {src.size()}')
    # Encoder
    src = self.transformer_encoder(src, src_mask)
    if debug: print(f'Encoder output shape: {src.size()}')
    # Linear Mapping
    src = self.linearmap(src)
    if self.activ2: src = self.activ2(src)
    src = src.squeeze(-1)
    if debug: print(f'Linear mapping output shape: {src.size()}')
    # Decoder
    output = self.decoder(src)
    output = output.squeeze(-1) if bs == 1 else output.squeeze()
    if debug: print(f'Model output shape: {output.size()}')
    return output

## Performance metrics

In [None]:
from scipy.stats import pearsonr, spearmanr

class RMSE(nn.Module):
    '''
    RMSE metric calculation
    '''
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self, predictions, target):
        '''
        Parameters:
        -----------------------------------
        predictions: torch.Tensor
          Tensor of predictions
        target: torch.Tensor
          Tensor of targets
        
        Returns:
        -----------------------------------
          RMSE between prediction and target
        '''
        return torch.sqrt(self.mse(predictions, target))

from scipy.stats import pearsonr, spearmanr

class PearsonCorrelation(nn.Module):
  def __init__(self):
    super().__init__()

  def forward(self, predictions, target):
    '''
    Parameters:
    -----------------------------------
    predictions: torch.Tensor
      Tensor of predictions
    target: torch.Tensor
      Tensor of targets
    
    Returns:
    -----------------------------------
      PCC between prediction and target
    '''
    predictions, target = predictions.detach().to('cpu').numpy(), target.detach().to('cpu').numpy()
    corr = pearsonr(predictions, target)[0]
    # Treat nan values as correlation 0
    if corr == np.nan or corr == float('nan') or corr == 'nan' or math.isnan(corr):
      corr = 0
    return torch.Tensor([corr])

class SpearmanCorrelation(nn.Module):
  def __init__(self):
    super().__init__()

  def forward(self, predictions, target):
    '''
    Parameters:
    -----------------------------------
    predictions: torch.Tensor
      Tensor of predictions
    target: torch.Tensor
      Tensor of targets
    
    Returns:
    -----------------------------------
      SCC between prediction and target
    '''
    predictions, target = predictions.detach().to('cpu').numpy(), target.detach().to('cpu').numpy()
    corr = spearmanr(predictions, target)[0]
    # Treat nan values as correlation 0
    if corr == np.nan or corr == float('nan') or corr == 'nan' or math.isnan(corr):
      corr = 0
    return torch.Tensor([corr])

# Training setup

## Train/val/test epoch functions

In [None]:
# Auxiliary functions
def replicateValues(arr, num):
    '''
    Pre-processing function to replicate quality values for all AV samples of the signal, with a gaussian noise addition

    Parameters:
    -----------------------------------
    arr: np.ndarray
      Array with values to be replicated
    num: int
      Number of times each value is replicated
    
    Returns:
    -----------------------------------
      Array with replicated values with gaussian noise addition
    '''
    sz = len(arr)
    out = np.zeros(sz*num)
    for i in range(sz):
      out[i*num:(i+1)*num] = arr[i]
    # Apply gaussian noise in the resultant array
    return out + np.random.normal(0, 0.1, out.shape)

def checkCUDAusage():
  '''
  Function to check CUDA memory usage in the moment
  '''
  usage = {'allocated': round(torch.cuda.memory_allocated(0)/1024/1024/1024, 5),
  'reserved': round(torch.cuda.memory_reserved(0)/1024/1024/1024, 5),
  'max_reserved': round(torch.cuda.max_memory_reserved(0)/1024/1024/1024, 5)}
  return usage

In [None]:
def outputProcessing(outputs, seq_len, clf = True, framePred = False):
  '''
    Function to process output of Transformer model

    Parameters:
    -----------------------------------
    outputs: torch.Tensor
      Output of the Transformer model
    seq_len: int
      Length of the output
    clf: bool, optional
      If True (default), treat as classification (CV and CS) output. Else, treat as regression (RV and RS) output.
    framePred: bool, optional
      If True, treat as AV sample prediction (CS and RS) output. Else, treat as video prediction (CV and RV) output.
    
    Returns:
    -----------------------------------
    outputs: torch.Tensor
      Processed outputs
    '''
  num_samples = len(outputs)
  # If classification, apply softmax and assign prediction value as sum of class index + max probability + 1.
  if clf:
    predictions = F.softmax(outputs, dim = 1)
    probs, idxs = torch.max(predictions, axis = 1)
    outputs = probs + idxs + 1
  # If sample prediction, get average prediction for every AV sample of each signal
  if framePred:
    outputs = torch.Tensor(np.array([torch.mean(outputs[i:i + seq_len]).item() for i in range(0, num_samples, seq_len)]))
  return outputs

In [None]:
from copy import copy

def trainEpoch(model, epoch, train_dl, device, criterion, optimizer, scheduler = None,
               clf = True, framePred = True, debug = False, print_CUDA = False, show_progress = True,
               **metrics):
  '''
    Function to perform a training epoch

    Parameters:
    -----------------------------------
    model: torch.nn.Module
      Transformer model class
    epoch: int
      Epoch number
    train_dl: torch.utils.data.DataLoader
      Train data loader
    device: str
      Device to train the model on. Either 'cuda' or 'cpu'.
    criterion: torch.nn.Module
      Loss function of the model (e.g. torch.nn.MSELoss)
    optimizer: torch.optim class
      Optimizer of the model (e.g. torch.optim.Adam)
    scheduler: torch.optim class, optional
      Learning rate scheduler (e.g. torch.optim.StepLR)
    clf: bool, optional
      If True (default), treat as classification (CV and CS) output. If False (default), treat as regression (RV and RS) output.
    framePred: bool, optional
      If True, treat as AV sample prediction (CS and RS) output. If False (default), treat as video prediction (CV and RV) output.
    debug: bool, optional
      If True, print debugging. Default: False
    print_CUDA: bool, optional
      If True, print CUDA memory usage at the end of the epoch. Default: False
    show_progress: bool, optional
      If True (default), show training progress with tqdm bar.
    metrics: list
      List of metrics to evaluate during training.
    
    Returns:
    -----------------------------------
    totalLoss: list
      List with loss values for every batch
    totalMetrics: dict
      Dictionary {metric: [values]} with the metrics results for every batch.
    epochMetrics: dict
      Dictionary {metric: value} with the metrics calcularions for the entire epoch.
    allOutputs: torch.Tensor
      Concatenated outputs of every batch.
    allLabels: torch.Tensor
      Concatenated labels of every batch.
    '''
  totalLoss = []
  totalMetrics = {i:[] for i in metrics}
  allOutputs = torch.tensor([]).to('cpu')
  allLabels = torch.tensor([]).to('cpu')
  epochMetrics = {}
  data_usage = {'allocated': [], 'reserved': [], 'max_reserved': []}
  with tqdm(train_dl, unit="batch", disable = not show_progress) as tepoch:
    for idx, (inputs, labels) in enumerate(tepoch):
      if show_progress: tepoch.set_description(f"Epoch {epoch + 1} train")
      seq_len = inputs.size(1)
      allLabels = torch.cat([allLabels, labels], dim = 0).to('cpu')
      if debug: print(f'Labels size: {labels.size()}')
      if clf:
        # Assign MOS values to classes
        target = copy(labels)
        # Avoid class confusion when MOS is 5 (class would be 5 even though classes should range from 1 to 4)
        target[target == 5] = 4.9999 
        target = target.long() - 1
        if framePred:
          # Replicate values for AV sample prediction
          inputs = inputs.reshape(-1, inputs.size(-1), 1)
          target = torch.LongTensor(replicateValues(target, seq_len))
        else:
          target = target.long()
      else:
        target = copy(labels)
        if framePred:
          # Replicate values for AV sample prediction
          inputs = inputs.reshape(-1, inputs.size(-1), 1)
          target = torch.FloatTensor(replicateValues(target, seq_len))
      if debug: print(f'Num repetitions: {seq_len}')
      if debug: print(f'Target size: {target.size()}\n')
      # Send tensors to cuda
      inputs, target = inputs.to(device), target.to(device)
      # Training epoch
      optimizer.zero_grad()
      debug = debug and (idx == 0) and (epoch == 0)
      outputs = model(inputs, debug = debug)
      loss = criterion(outputs, target)
      loss.backward()
      optimizer.step()
      # Detach from CUDA to save memory
      inputs, labels, loss, outputs, target = [inputs.to('cpu'), labels.to('cpu'), loss.to('cpu'), 
                                               outputs.to('cpu'), target.to('cpu')]
      # Save loss and output values
      totalLoss.append(loss.to('cpu').item())
      outputs = outputProcessing(outputs = outputs, seq_len = seq_len, clf = clf, framePred = framePred)
      if debug: print(f'Outputs processing size: {outputs.size()}')
      outputs = outputs.to('cpu')
      allOutputs = torch.cat([allOutputs, outputs], dim = 0).to('cpu')
      # Show loss/metrics progress bar
      if idx == len(train_dl) - 1:
        # If end of epoch, calculate mean loss and metrics
        avgLoss = np.mean(totalLoss)
        metricsVals = {}
        for name, metric in metrics.items():
          metricValue = metric(allOutputs, allLabels).item()
          metricsVals[f'train_{name}'] = metricValue
          epochMetrics[name] = metricValue
          del metricValue
        if show_progress: tepoch.set_postfix(train_loss = avgLoss, **metricsVals)
        del metricsVals
        del avgLoss
      else:
        if show_progress: tepoch.set_postfix(train_loss = loss.item())
      cuda_usage = checkCUDAusage()
      if idx % 10 == 9:
        for k,v in cuda_usage.items(): data_usage[k].append(v)
      del loss
      del outputs
      del inputs
      del labels
      del target
      allOutputs.to('cpu')
      allLabels.to('cpu')
  if print_CUDA:
    print(f"Allocated: {data_usage['allocated']}")
    print(f"Reserved: {data_usage['reserved']}")
    print(f"Max reserved: {data_usage['max_reserved']}")
  if scheduler:
    scheduler.step()
  return totalLoss, totalMetrics, epochMetrics, allOutputs, allLabels

def valEpoch(model, epoch, val_dl, device, criterion, clf = True, framePred = True, debug = False, 
             print_CUDA = False, show_progress = True, **metrics):
  '''
  Function to perform a validation epoch

  Parameters:
  -----------------------------------
  model: torch.nn.Module
    Transformer model class
  epoch: int
    Epoch number
  val_dl: torch.utils.data.DataLoader
    Validation data loader
  device: str
    Device to validate the model on. Either 'cuda' or 'cpu'.
  criterion: torch.nn.Module
    Loss function of the model (e.g. torch.nn.MSELoss)
  clf: bool, optional
    If True (default), treat as classification (CV and CS) output. Else, treat as regression (RV and RS) output.
  framePred: bool, optional
    If True, treat as AV sample prediction (CS and RS) output. Else, treat as video prediction (CV and RV) output.
  debug: bool, optional
    If True, print debugging. Default: False
  print_CUDA: bool, optional
    If True, print CUDA memory usage at the end of the epoch. Default: False
  show_progress: bool, optional
    If True (default), show validation progress with tqdm bar.
  metrics: list
    List of metrics to evaluate during validation.
  
  Returns:
  -----------------------------------
  totalLoss: list
    List with loss values for every batch
  totalMetrics: dict
    Dictionary {metric: [values]} with the metrics results for every batch.
  epochMetrics: dict
    Dictionary {metric: value} with the metrics calcularions for the entire epoch.
  allOutputs: torch.Tensor
    Concatenated outputs of every batch.
  allLabels: torch.Tensor
    Concatenated labels of every batch.
  '''
  totalLoss = []
  totalMetrics = {i:[] for i in metrics}
  allOutputs = torch.tensor([])
  allLabels = torch.tensor([])
  epochMetrics = {}
  data_usage = {'allocated': [], 'reserved': [], 'max_reserved': []}
  with tqdm(val_dl, unit="batch", disable = not show_progress) as tepoch:
    for idx, (inputs, labels) in enumerate(tepoch):
      if show_progress: tepoch.set_description(f"Epoch {epoch + 1} validation")
      seq_len = inputs.size(1)
      allLabels = torch.cat([allLabels, labels], dim = 0)
      if clf:
        # # Assign MOS values to classes
        target = copy(labels)
        # Avoid class confusion when MOS is 5 (class would be 5 even though classes should range from 1 to 4)
        target[target == 5] = 4.9999
        target = target.long() - 1
        if framePred:
          # Replicate values for AV sample prediction
          inputs = inputs.reshape(-1, inputs.size(-1), 1)
          target = torch.LongTensor(replicateValues(target, seq_len))
        else:
          target = target.long()
      else:
        target = copy(labels)
        if framePred:
          # Replicate values for AV sample prediction
          inputs = inputs.reshape(-1, inputs.size(-1), 1)
          target = torch.FloatTensor(replicateValues(target, seq_len))
      # Send tensors to cuda
      inputs, target = inputs.to(device), target.to(device)
      # Evaluation process
      debug = debug and (idx == 0) and (epoch == 0)
      outputs = model(inputs, debug = debug)
      loss = criterion(outputs, target)
      # Detach from CUDA to save memory
      inputs, labels, loss, outputs, target = [inputs.to('cpu'), labels.to('cpu'), loss.to('cpu'), 
                                               outputs.to('cpu'), target.to('cpu')]
      totalLoss.append(loss.item())
      # Show evaluation progress bar
      # Save loss and output values
      outputs = outputProcessing(outputs = outputs, seq_len = seq_len, clf = clf, framePred = framePred)
      if debug: print(f'Outputs processing size: {outputs.size()}')
      outputs = outputs.to('cpu')
      allOutputs = torch.cat([allOutputs, outputs], dim = 0).to('cpu')
      if idx == len(val_dl) - 1:
        # If end of epoch, calculate mean loss and metrics
        avgValLoss = np.mean(totalLoss)
        metricsVals = {}
        for name, metric in metrics.items():
          metricValue = metric(allOutputs, allLabels).to('cpu').item()
          metricsVals[f'val_{name}'] = metricValue
          epochMetrics[name] = metricValue
          del metricValue
        if show_progress: tepoch.set_postfix(Val_Loss = avgValLoss, **metricsVals)
        del avgValLoss
        del metricsVals
      else:
        if show_progress: tepoch.set_postfix(val_loss = loss.item())
      cuda_usage = checkCUDAusage()
      if idx % 10 == 9:
        for k,v in cuda_usage.items(): data_usage[k].append(v)
      del loss
      del outputs
      del inputs
      del labels
      del target
      allOutputs.to('cpu')
      allLabels.to('cpu')
  if print_CUDA:
    print(f"Allocated: {data_usage['allocated']}")
    print(f"Reserved: {data_usage['reserved']}")
    print(f"Max reserved: {data_usage['max_reserved']}")
  return totalLoss, totalMetrics, epochMetrics, allOutputs, allLabels



## Pytorch Trainer

In [None]:
class PytorchTrainer():
  ''' Train a Pyrotch model'''
  def __init__(self):
    self.epochMetrics = {'Train': [], 'Val': []}
  
  def train_validate(self, model, epochs, train_dl, val_dl, device, criterion, optimizer, savePath, 
                     scheduler = None, clf = False, framePred = False, debug = False, 
                     saveBest = 'PCC', highestBest = True, print_CUDA = False, show_progress = True,
                     **metrics):
    '''
    Function to perform training and validation of the model

    Parameters:
    -----------------------------------
    model: torch.nn.Module
      Transformer model class
    epochs: int
      Number of epochs
    train_dl: torch.utils.data.DataLoader
      Train data loader
    val_dl: torch.utils.data.DataLoader
      Validation data loader
    device: str
      Device to train and validate the model on. Either 'cuda' or 'cpu'.
    criterion: torch.nn.Module
      Loss function of the model (e.g. torch.nn.MSELoss)
    optimizer: torch.optim class
      Optimizer of the model (e.g. torch.optim.Adam)
    savePath: str
      Path to save results of training and validation
    scheduler: torch.optim class, optional
      Learning rate scheduler (e.g. torch.optim.StepLR)
    clf: bool, optional
      If True (default), treat as classification (CV and CS) output. Else, treat as regression (RV and RS) output.
    framePred: bool, optional
      If True, treat as AV sample prediction (CS and RS) output. Else, treat as video prediction (CV and RV) output.
    debug: bool, optional
      If True, print debugging. Default: False
    saveBest: str, optional
      Metric to consider when evaluating best model. Either 'PCC' (default), 'SCC' or 'RMSE'.
    highestBest: bool, optional
      If True (default), model is considered best if metric (defined in saveBest) is the highest. If False, if its the lowest.
    print_CUDA: bool, optional
      If True, print CUDA memory usage at the end of the epoch. Default: False
    show_progress: bool, optional
      If True (default), show training and validation progress with tqdm bar.
    metrics: list
      List of metrics to evaluate during training and validation.
    '''
    # Initialize best metric value
    if highestBest:
      best_metric = float('-inf')
    else:
      best_metric = float('inf')
    for epoch in range(epochs):
      # Train and validate model, saving the results
      t0 = time()
      model.train()
      train_loss, train_metrics, epoch_train_metrics, train_outputs, train_labels = trainEpoch(model, epoch, train_dl, device, 
                                                                                               criterion, optimizer, scheduler, 
                                                                                               clf, framePred, debug, 
                                                                                               print_CUDA, show_progress, 
                                                                                               **metrics)
      torch.cuda.empty_cache() 
      self.epochMetrics['Train'].append(epoch_train_metrics)
      model.eval()
      val_loss, val_metrics, epoch_val_metrics, val_outputs, val_labels = valEpoch(model, epoch, val_dl, device, criterion, 
                                                                                  clf, framePred, debug, print_CUDA, 
                                                                                   show_progress, **metrics)
      
      torch.cuda.empty_cache()
      # Check if resultant metric is the best so far
      checkMetric = epoch_val_metrics[saveBest]
      isBest = (highestBest and checkMetric >= best_metric) or ((not highestBest) and checkMetric <= best_metric)
      # If its best metric, update best_metric and its state dictionary
      if isBest:
        best_metric = epoch_val_metrics[saveBest]
        self.best_state_dict = {
              'epoch': epoch+1,
              # 'model_state_dict': {k: v.detach().to('cpu') for k, v in model.state_dict().items()},
              'epoch_val_metrics': epoch_val_metrics,
              'epoch_train_metrics': epoch_train_metrics
              }
        torch.save(self.best_state_dict, os.path.join(savePath, 'best.pt'))
      self.epochMetrics['Val'].append(epoch_val_metrics)
      if print_CUDA:
        print('='*200)
        print("Epoch allocated: %fGB"%(torch.cuda.memory_allocated(0)/1024/1024/1024))
        print("Epoch reserved: %fGB"%(torch.cuda.memory_reserved(0)/1024/1024/1024))
        print("Epoch memory_reserved: %fGB"%(torch.cuda.max_memory_reserved(0)/1024/1024/1024))
        print('='*200)
        print('='*200)
      t1 = time()
      if (not show_progress) and (epoch == 0): 
        print(f'Time for 1st epoch: {elapsedTime(t0, t1)}')
        print(f'Estimated time for {epochs} epochs: {elapsedTime(epochs*t0, epochs*t1)}')
    self.last_state_dict = {
              'epoch': epoch+1,
              # 'model_state_dict': {k: v.detach().to('cpu') for k, v in model.state_dict().items()},
              'epoch_val_metrics': epoch_val_metrics,
              'epoch_train_metrics': epoch_train_metrics
              }
    torch.save(self.last_state_dict, os.path.join(savePath, 'last.pt'))


## Progress plot functions

In [None]:
def plot_epoch_metrics(listEpochMetrics, metrics, prefixs, colors, plot_train = True, plot_val = True, 
                       num_div = 8, **kwargs):
  '''
    Plot metrics results per epoch

    Parameters:
    -----------------------------------
    listEpochMetrics: list
      List contaning the state dictionaries to plot.
    metrics: dict
      Dictionary {metric name: metric class}
    prefixs: list
      List with names of models being compared
    colors: list
      List of colors for every line in plot, sequentially
    plot_train: bool, optional
      If True (default), plot training metrics
    plot_val: bool, optional
      If True (default), plot validation metrics
    num_div: int, optional
      Number of y-axis points. Default: 8
    kwargs: dict
      Optional arguments for plot.
    '''
  assert plot_train or plot_val, 'At leat one of plot_train or plot_val should be set to True'
  fig, ax = plt.subplots(len(metrics), 1, figsize = kwargs.get('figsize', (8, 8)))
  if len(metrics) == 1:
    iterator = [(list(metrics.keys())[0], ax)]
  else:
    iterator = [(m, a) for m, a in zip(list(metrics.keys()), ax)]
  for metric, axis in iterator:
    allYs = np.array([])
    allXs = np.array([])
    for idx, epochMetrics in enumerate(listEpochMetrics):
      # print(f'Ep Metrics: {epochMetrics}')
      y = [i[metric] for i in epochMetrics['Train']]
      val_y = [i[metric] for i in epochMetrics['Val']]
      x = np.arange(1, len(y) + 1)
      val_x = np.arange(1, len(val_y) + 1)

      if plot_train:
        axis.plot(x, y, color = colors[idx], label = f'{prefixs[idx]} Train', 
                  linestyle = kwargs.get('train_linestyle', 'solid'))
        allYs = np.concatenate([allYs, y])
        allXs = np.concatenate([allXs, x])
      if plot_val:
        axis.plot(val_x, val_y, color = colors[idx], label = f'{prefixs[idx]} Val', 
                  linestyle = kwargs.get('val_linestyle', 'dashed'))
        allXs = np.concatenate([allXs, val_x])
        allYs = np.concatenate([allYs, val_y])
    step = (1/num_div)*(max(allYs) - min(allYs))
    low = min(allYs) - step/2
    high = max(allYs) + step/2
    ticks = np.arange(low, high, step)
    # if metric == 'RMSE': print(f'{metric}\nallYs: {allYs}\nmin: {min(allYs)}\nmax: {max(allYs)}\nlow: {low}\nhigh: {high}\nstep: {step}\nticks: {ticks}')
    axis.set_title(f'{metric} per epoch')
    axis.set_xlabel('Epoch')
    axis.set_ylabel(metric)
    xstep = max(allXs)//10 if max(allXs) > 10 else 1
    axis.set_xticks(np.arange(1, max(allXs), xstep))
    if f'{metric}_xticks' in kwargs:
      axis.set_xticks(kwargs[f'{metric}_xticks'])
    elif 'xticks' in kwargs:
      axis.set_xticks(kwargs['xticks'])
    if f'{metric}_rotationx' in kwargs:
      axis.set_xticklabels(axis.get_xticks(), rotation = kwargs[f'{metric}_rotationx'])
    elif 'rotationx' in kwargs:
      axis.set_xticklabels(axis.get_xticks(), rotation = kwargs['rotationx'])
    if f'{metric}_yticks' in kwargs:
      axis.set_yticks(kwargs[f'{metric}_yticks'])
    elif 'yticks' in kwargs:
      axis.set_yticks(kwargs['yticks'])
    else:
      axis.set_yticks(ticks)
      axis.set_yticklabels([round(i, 2) for i in ticks])
    if f'{metric}_rotationy' in kwargs:
      axis.set_yticklabels(axis.get_yticks(), rotation = kwargs[f'{metric}_rotationy'])
    elif 'rotationy' in kwargs:
      axis.set_yticklabels(axis.get_yticks(), rotation = kwargs['rotationy'])
    axis.legend(loc = kwargs.get('legendloc', 'best'))
    if kwargs.get(f'{metric}_grid', False):
      axis.grid()
    if kwargs.get('suptitle', False):
      plt.suptitle(kwargs['suptitle'])
  plt.tight_layout(4)
  if kwargs.get('savePath', False):
    plt.savefig(kwargs['savePath'])
  plt.show()

## Training data setup

In [None]:
class TrainingDataSetup():
  '''Setup data for training'''
  def __init__(self, dataParams):
    '''
    Parameters:
    -----------------------------------
    dataParams: dict
      Parameters for data preparation (train_size, val_size, test_size, transform, ...)
    '''
    self.dataParams = dataParams
    self.gotData = False
    
  def getData(self, X, y, train_val_test_data = None, sampling_rate = 1, plot = False, figsize = (6, 4)):
    '''
    Function to perform a training epoch

    Parameters:
    -----------------------------------
    X: torch.Tensor
      Features tensor
    y: torch.Tensor
      Labels tensor
    train_val_test_data: list, optional
      List with arrays train, test and validation arrays, with format [X_train, X_val, X_test, y_train, y_val, y_test], if
      using test data, else [X_train, X_val, y_train, y_val]
    sampling_rate: int, optional
      Rate for downsampling (e.g. for sample_rate=30, only one sample is retained every 30 samples). Default: 1
    plot: bool, optional
      If True (default), plot labels train and test distribution
    figsize: tuple, optional
      Figure size of the plotted distribution (only used if plot=True)
    '''
    assert len(X) == len(y), 'Lengths of X and y should be equal'
    params = self.dataParams
    train_size, val_size, test_size = [params['train_size'], params['val_size'], 
                                       params['test_size']]
    if not params['allData']:
      if params['idxs'] is None:
        if not ((type(train_size) == int) and (type(val_size) == int) and (type(test_size) == int)):
          raise ValueError('Train, val and test sizes should be integers')
        self.idxs = np.random.randint(len(X), size = train_size + val_size + test_size)
      else:
        self.idxs = params['idxs']
      self.X, self.y = X[self.idxs], y[self.idxs]
    else:
      self.X, self.y = copy(X), copy(y)

    self.dataHolder = AVQTransformerData(params['matPath'], params['mosPath'], params['featuresName'], 
                                          params['val_size'], params['test_size'], params['transform'], 
                                          params['shuffle'], params['debug'])
    if train_val_test_data:
      train_val_test_data = [i[:, ::sampling_rate, :] if (i.dim() == 3) else i for i in train_val_test_data]
    if params['test_size'] == 0:
      self.train_dl, self.val_dl = self.dataHolder.createDataloaders(params['batch_size'], 
                                                                     params['shuffle'], 
                                                                     params['read_data'], 
                                                                     self.X, self.y,
                                                                     train_val_test_data)
      self.train_dl_smpl, self.val_dl_smpl = self.dataHolder.createDataloaders(params['bs_smpl'], 
                                                                     params['shuffle'], 
                                                                     params['read_data'], 
                                                                     self.X, self.y,
                                                                     self.dataHolder.train_val_test_data)
    else:
      self.train_dl, self.val_dl, self.test_dl = self.dataHolder.createDataloaders(params['batch_size'], 
                                                                                   params['shuffle'], 
                                                                                   params['read_data'], 
                                                                                   self.X, self.y,
                                                                                   train_val_test_data)
      self.train_dl_smpl, self.val_dl_smpl, self.test_dl_smpl = self.dataHolder.createDataloaders(params['bs_smpl'], 
                                                                                   params['shuffle'], 
                                                                                   params['read_data'], 
                                                                                   self.X, self.y,
                                                                                   self.dataHolder.train_val_test_data)
    if plot:
      self.plotLabelsDistribution(figsize = figsize)
    self.gotData = True

  def plotLabelsDistribution(self, figsize = (6, 4)):
    '''
    Function to plot data distribution

    Parameters:
    -----------------------------------
    figsize: tuple, optional
      Figure size of the plot
    '''
    if self.dataParams['test_size'] == 0: fig, ax = plt.subplots(2, 1, figsize = figsize)
    else: fig, ax = plt.subplots(3, 1, figsize = figsize)
    # Histogram plot of the labels in train and testing
    sns.histplot(self.dataHolder.y_train, ax = ax[0])
    ax[0].set_title('Train')
    sns.histplot(self.dataHolder.y_val, ax = ax[1])
    ax[1].set_title('Validation')
    if self.dataParams['test_size'] != 0:
      sns.histplot(self.dataHolder.y_test, ax = ax[2])
      ax[2].set_title('Test')
    plt.tight_layout(2)
    plt.show()

## Training setup

In [None]:
class TrainingSetup(TrainingDataSetup):
  '''Setup training for the 4 model configurations'''
  def __init__(self, modelVidParams, modelSmplParams, dataParams, metrics, device):
    '''
    Parameters:
    -----------------------------------
    modelVidParams: dict
      Parameters of the Transformer model for video prediction (CV and RV).
    modelSmplParams: dict
      Parameters of the Transformer model for sample prediction (CS and RS).
    dataParams: dict
      Parameters for dataloaders creation
    metrics: dict
      Dictionary {metric: metric class} for metrics used in evaluation
    device: str
      Device to train the model on. Either 'cuda' or 'cpu'.
    '''
    super().__init__(dataParams)
    self.modelVidParams = modelVidParams
    self.modelSmplParams = modelSmplParams
    self.device = device
    self.metrics = metrics
    self.createModels()

  def createModels(self):
    '''Create the CV, CS, RV and RS models'''
    self.CVFitted, self.CSFitted, self.RVFitted, self.RSFitted = [False, False, False, False]
    self.CVParams = {'clf': True, 'framePred': False, 'num_classes': 4}
    self.CVParams.update(self.modelVidParams)
    self.modelCV = AVQTransformer(**self.CVParams)
    self.CSParams = {'clf': True, 'framePred': True, 'num_classes': 4}
    self.CSParams.update(self.modelSmplParams)
    self.modelCS = AVQTransformer(**self.CSParams)
    self.RVParams = {'clf': False, 'framePred': False, 'num_classes': 1}
    self.RVParams.update(self.modelVidParams)
    self.modelRV = AVQTransformer(**self.RVParams)
    self.RSParams = {'clf': False, 'framePred': True, 'num_classes': 1}
    self.RSParams.update(self.modelSmplParams)
    self.modelRS = AVQTransformer(**self.RSParams)

  def train_val_dl(self, framePred):
    '''
    Create training and validation data loaders
    
    Parameters:
    -----------------------------------
    framePred: bool
      If True, treat as AV sample prediction (CS and RS). If False, as video prediction (CV and RV)
    
    Returns:
    -----------------------------------
    self.train_dl: torch.data.utils.DataLoader
      Train data loader
    self.val_dl: torch.data.utils.DataLoader
      Validation data loader
    '''
    if not self.gotData: raise ValueError('Data not loaded yet')
    if framePred: return self.train_dl_smpl, self.val_dl_smpl
    else: return self.train_dl, self.val_dl

  def setupTrainer(self, model, criterion, scheduler = None, schedulerParams = None, optimizerLR = 1e-3):
    '''
    Setup optimizer and scheduler for model

    Parameters:
    -----------------------------------
    model: torch.nn.Module
      Transformer model class
    criterion: torch.nn.Module
      Loss function of the model (e.g. torch.nn.MSELoss)
    scheduler: torch.optim class, optional
      Learning rate scheduler (e.g. torch.optim.StepLR)
    schedulerParams: dict
      Parameters of the scheduler
    optimizerLR: float
      Initial learning rate of the optimizer

    Returns:
    -----------------------------------
    optimizer: torch.optim class
      Optimizer of the model
    scheduler: torch.optim class, optional
      Learning rate scheduler of the model
    '''
    model.to(self.device)
    optimizer = torch.optim.Adam(model.parameters(), lr = optimizerLR)
    if scheduler:
      if not schedulerParams:
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = epochs//5, gamma = 0.5, 
                                                    verbose = False)
      else:
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, **schedulerParams)
    return optimizer, scheduler

  def CVTrain(self, epochs, savePath, debug = True, scheduler = None, schedulerParams = None,
              optimizerLR = 1e-3, saveBest = 'PCC', highestBest = True, print_CUDA = False, 
              show_progress = True):
    '''
    Train CV model

    Parameters:
    -----------------------------------
    epochs: int
      Number of epochs
    savePath: str
      Path to save results of training and validation
    scheduler: torch.optim class, optional
      Learning rate scheduler (e.g. torch.optim.StepLR)
    schedulerParams: dict
      Parameters of the scheduler
    optimizerLR: float
      Initial learning rate of the optimizer
    saveBest: str, optional
      Metric to consider when evaluating best model. Either 'PCC' (default), 'SCC' or 'RMSE'.
    highestBest: bool, optional
      If True (default), model is considered best if metric (defined in saveBest) is the highest. If False, if its the lowest.
    print_CUDA: bool, optional
      If True, print CUDA memory usage at the end of the epoch. Default: False
    show_progress: bool, optional
      If True (default), show training and validation progress with tqdm bar.
    '''
    # Get data loaders
    if not self.gotData:
      raise ValueError('Data not loaded yet')
    train_dl, val_dl = self.train_val_dl(framePred = False)
    # Setup loss, optimizer and scheduler
    criterion = nn.CrossEntropyLoss()
    optimizer, scheduler = self.setupTrainer(self.modelCV, criterion, scheduler, schedulerParams, optimizerLR)
    # Train model
    self.trainerCV = PytorchTrainer()
    self.trainerCV.train_validate(self.modelCV, epochs, train_dl, val_dl, self.device,
                                  criterion, optimizer, savePath, scheduler, self.CVParams['clf'], 
                                  self.CVParams['framePred'], debug, saveBest, highestBest, print_CUDA,
                                  show_progress, **self.metrics)

  def CSTrain(self, epochs, savePath, debug = True, scheduler = None, schedulerParams = None,
              optimizerLR = 1e-3, saveBest = 'PCC', highestBest = True, print_CUDA = False, 
              show_progress = True):
    '''
    Train CS model

    Parameters:
    -----------------------------------
    epochs: int
      Number of epochs
    savePath: str
      Path to save results of training and validation
    scheduler: torch.optim class, optional
      Learning rate scheduler (e.g. torch.optim.StepLR)
    schedulerParams: dict
      Parameters of the scheduler
    optimizerLR: float
      Initial learning rate of the optimizer
    saveBest: str, optional
      Metric to consider when evaluating best model. Either 'PCC' (default), 'SCC' or 'RMSE'.
    highestBest: bool, optional
      If True (default), model is considered best if metric (defined in saveBest) is the highest. If False, if its the lowest.
    print_CUDA: bool, optional
      If True, print CUDA memory usage at the end of the epoch. Default: False
    show_progress: bool, optional
      If True (default), show training and validation progress with tqdm bar.
    '''
    # Get data loaders
    train_dl, val_dl = self.train_val_dl(framePred = True)
    # Setup loss, optimizer and scheduler
    criterion = nn.CrossEntropyLoss()
    optimizer, scheduler = self.setupTrainer(self.modelCS, criterion, scheduler, schedulerParams, optimizerLR)
    # Train model
    self.trainerCS = PytorchTrainer()
    self.trainerCS.train_validate(self.modelCS, epochs, train_dl, val_dl, self.device,
                                  criterion, optimizer, savePath, scheduler, self.CSParams['clf'], 
                                  self.CSParams['framePred'], debug, saveBest, highestBest, print_CUDA,
                                  show_progress, **self.metrics)

  def RVTrain(self, epochs, savePath, debug = True, scheduler = None, schedulerParams = None,
              optimizerLR = 1e-3, saveBest = 'PCC', highestBest = True, print_CUDA = False, 
              show_progress = True):
    '''
    Train RV model

    Parameters:
    -----------------------------------
    epochs: int
      Number of epochs
    savePath: str
      Path to save results of training and validation
    scheduler: torch.optim class, optional
      Learning rate scheduler (e.g. torch.optim.StepLR)
    schedulerParams: dict
      Parameters of the scheduler
    optimizerLR: float
      Initial learning rate of the optimizer
    saveBest: str, optional
      Metric to consider when evaluating best model. Either 'PCC' (default), 'SCC' or 'RMSE'.
    highestBest: bool, optional
      If True (default), model is considered best if metric (defined in saveBest) is the highest. If False, if its the lowest.
    print_CUDA: bool, optional
      If True, print CUDA memory usage at the end of the epoch. Default: False
    show_progress: bool, optional
      If True (default), show training and validation progress with tqdm bar.
    '''
    # Get data loaders
    train_dl, val_dl = self.train_val_dl(framePred = False)
    # Setup loss, optimizer and scheduler
    criterion = nn.MSELoss()
    optimizer, scheduler = self.setupTrainer(self.modelRV, criterion, scheduler, schedulerParams, optimizerLR)
    # Train model
    self.trainerRV = PytorchTrainer()
    self.trainerRV.train_validate(self.modelRV, epochs, train_dl, val_dl, self.device,
                                  criterion, optimizer, savePath, scheduler, self.RVParams['clf'], 
                                  self.RVParams['framePred'], debug, saveBest, highestBest, print_CUDA,
                                  show_progress, **self.metrics)

  def RSTrain(self, epochs, savePath, debug = True, scheduler = None, schedulerParams = None,
              optimizerLR = 1e-3, saveBest = 'PCC', highestBest = True, print_CUDA = False, 
              show_progress = True):
    '''
    Train RS model

    Parameters:
    -----------------------------------
    epochs: int
      Number of epochs
    savePath: str
      Path to save results of training and validation
    scheduler: torch.optim class, optional
      Learning rate scheduler (e.g. torch.optim.StepLR)
    schedulerParams: dict
      Parameters of the scheduler
    optimizerLR: float
      Initial learning rate of the optimizer
    saveBest: str, optional
      Metric to consider when evaluating best model. Either 'PCC' (default), 'SCC' or 'RMSE'.
    highestBest: bool, optional
      If True (default), model is considered best if metric (defined in saveBest) is the highest. If False, if its the lowest.
    print_CUDA: bool, optional
      If True, print CUDA memory usage at the end of the epoch. Default: False
    show_progress: bool, optional
      If True (default), show training and validation progress with tqdm bar.
    '''
    # Get data loaders
    train_dl, val_dl = self.train_val_dl(framePred = True)
    # Setup loss, optimizer and scheduler
    criterion = nn.MSELoss()
    optimizer, scheduler = self.setupTrainer(self.modelRS, criterion, scheduler, schedulerParams, optimizerLR)
    # Train model
    self.trainerRS = PytorchTrainer()
    self.trainerRS.train_validate(self.modelRS, epochs, train_dl, val_dl, self.device,
                                  criterion, optimizer, savePath, scheduler, self.RSParams['clf'], 
                                  self.RSParams['framePred'], debug, saveBest, highestBest, print_CUDA,
                                  show_progress, **self.metrics)
  
  def train(self, epochs, savePath, debug = True, scheduler = None, schedulerParams = None,
            optimizerLR = 1e-3, saveBest = 'PCC', highestBest = True, print_CUDA = False,
            saveEpochMetrics = True, show_progress = True):
    '''
    Train CV, CS, RV and RS models

    Parameters:
    -----------------------------------
    epochs: int
      Number of epochs
    savePath: str
      Path to save results of training and validation
    debug: bool, optional
      If True (default), show debugging of models' layers output shapes
    scheduler: torch.optim class, optional
      Learning rate scheduler (e.g. torch.optim.StepLR)
    schedulerParams: dict, optional
      Parameters of the scheduler
    optimizerLR: float
      Initial learning rate of the optimizer
    saveBest: str, optional
      Metric to consider when evaluating best model. Either 'PCC' (default), 'SCC' or 'RMSE'.
    highestBest: bool, optional
      If True (default), model is considered best if metric (defined in saveBest) is the highest. If False, if its the lowest.
    print_CUDA: bool, optional
      If True, print CUDA memory usage at the end of the epoch. Default: False
    saveEpochMetrics: bool, optional
      If True (default), save metrics calculated every epoch to a pickle file (with path defined on savePath)
    show_progress: bool, optional
      If True (default), show training and validation progress with tqdm bar.
    '''
    t0 = time()
    # Train CV model
    print('Training CV')
    if show_progress: print('\n')
    CVPath = os.path.join(savePath, 'CVModel/')
    cleanDir(CVPath)
    self.CVTrain(epochs, CVPath, debug, scheduler, schedulerParams,
            optimizerLR, saveBest, highestBest, print_CUDA, show_progress)
    printDict = filterDict(self.trainerCV.best_state_dict, ["epoch", "epoch_val_metrics"])
    # Save metrics calculated every epoch
    if saveEpochMetrics: save_object(self.trainerCV.epochMetrics, os.path.join(CVPath, 'CVEpochMetrics.pkl'))
    t1 = time()
    print(f'Finished CV training with best metrics:\n{printDict}')
    if show_progress: print('\n' + '='*100 + '\n' + '='*100 + '\n')
    else: print(f'Time elapsed: {elapsedTime(t0, t1)}' + '\n' + '='*100)
    t0 = time()
    # Train CS model
    print('Training CS')
    if show_progress: print('\n')
    CSPath = os.path.join(savePath, 'CSModel/')
    cleanDir(CSPath)
    self.CSTrain(epochs, CSPath, debug, scheduler, schedulerParams,
            optimizerLR, saveBest, highestBest, print_CUDA, show_progress)
    printDict = filterDict(self.trainerCS.best_state_dict, ["epoch", "epoch_val_metrics"])
    # Save metrics calculated every epoch
    if saveEpochMetrics: save_object(self.trainerCS.epochMetrics, os.path.join(CSPath, 'CSEpochMetrics.pkl'))
    t1 = time()
    print(f'Finished CS training with best metrics:\n{printDict}')
    if show_progress: print('\n' + '='*100 + '\n' + '='*100 + '\n')
    else: print(f'Time elapsed: {elapsedTime(t0, t1)}' + '\n' + '='*100)
    t0 = time()
    # Train RV model
    print('Training RV')
    if show_progress: print('\n')
    RVPath = os.path.join(savePath, 'RVModel/')
    cleanDir(RVPath)
    self.RVTrain(epochs, RVPath, debug, scheduler, schedulerParams,
            optimizerLR, saveBest, highestBest, print_CUDA, show_progress)
    printDict = filterDict(self.trainerRV.best_state_dict, ["epoch", "epoch_val_metrics"])
    # Save metrics calculated every epoch
    if saveEpochMetrics: save_object(self.trainerRV.epochMetrics, os.path.join(RVPath, 'RVEpochMetrics.pkl'))
    t1 = time()
    print(f'Finished RV training with best metrics:\n{printDict}')
    if show_progress: print('\n' + '='*100 + '\n' + '='*100 + '\n')
    else: print(f'Time elapsed: {elapsedTime(t0, t1)}' + '\n' + '='*100)
    t0 = time()
    # Train RS model
    print('Training RS')
    if show_progress: print('\n')
    RSPath = os.path.join(savePath, 'RSModel/')
    cleanDir(RSPath)
    self.RSTrain(epochs, RSPath, debug, scheduler, schedulerParams,
            optimizerLR, saveBest, highestBest, print_CUDA, show_progress)
    printDict = filterDict(self.trainerRS.best_state_dict, ["epoch", "epoch_val_metrics"])
    # Save metrics calculated every epoch
    if saveEpochMetrics: save_object(self.trainerRS.epochMetrics, os.path.join(RSPath, 'RSEpochMetrics.pkl'))
    t1 = time()
    print(f'Finished RS training with best metrics:\n{printDict}')
    if show_progress: print('\n' + '='*100 + '\n' + '='*100 + '\n' + 'Finished training!')
    else: print(f'Time elapsed: {elapsedTime(t0, t1)}' + '\n' + '='*100)

  def load_trainedData(self, epochMetricsDict, bestStatesDict):
    '''
    Load metrics and trained model parameters

    Parameters:
    -----------------------------------
    epochMetricsDict: dict
      Dictionary with metrics for every epoch
    bestStatedDict: dict
      Models' state dictionaries on the best epoch
    '''
    self.trainerCV = PytorchTrainer()
    self.trainerCS = PytorchTrainer()
    self.trainerRV = PytorchTrainer()
    self.trainerRS = PytorchTrainer()
    self.trainerCV.epochMetrics = load_object(epochMetricsDict['CV'])
    self.trainerCS.epochMetrics = load_object(epochMetricsDict['CS'])
    self.trainerRV.epochMetrics = load_object(epochMetricsDict['RV'])
    self.trainerRS.epochMetrics = load_object(epochMetricsDict['RS'])
    self.trainerCV.best_state_dict = torch.load(bestStatesDict['CV'])
    self.trainerCS.best_state_dict = torch.load(bestStatesDict['CS'])
    self.trainerRV.best_state_dict = torch.load(bestStatesDict['RV'])
    self.trainerRS.best_state_dict = torch.load(bestStatesDict['RS'])

  def testModel(self, state_dicts, device, debug = False):
    '''
    Test trained models

    Parameters:
    -----------------------------------
    state_dicts: dict
      Models' state dictionaries
    device: str
      Device to test the models on. Either 'cuda' or 'cpu'
    debug: bool, optional
      If True, show debugging of models' layers output shapes. Default: False
    '''
    for modelName, model in {'CV': self.modelCV, 'CS': self.modelCS, 'RV': self.modelRV, 'RS': self.modelRS}.items():
      try:
        model.load_state_dict(state_dicts[modelName])
      except:
        print(f'{modelName} doesn\'t exist!\n')
      model.to(device)
      # state_dicts = {m: {k: v.to(device) for k, v in sd.items()} for m, sd in state_dicts.items()}
      model.eval()
      allOutputs = torch.tensor([]).detach().to('cpu')
      allLabels = torch.tensor([]).detach().to('cpu')
      for inputs, labels in self.test_dl:
        seq_len = inputs.size(1)
        inputs = inputs.to(device)
        outputs = model(inputs, debug = debug)
        outputs = outputProcessing(outputs = outputs, seq_len = seq_len, clf = model.clf, framePred = model.framePred)
        if debug: print(f'Outputs processing size: {outputs.size()}')
        outputs = outputs.detach().to('cpu')
        allOutputs = torch.cat([allOutputs, outputs], dim = 0).detach().to('cpu')
        allLabels = torch.cat([allLabels, labels], dim = 0).detach().to('cpu')
        del inputs
        del labels
        del outputs
        torch.cuda.empty_cache()
      for name, metric in self.metrics.items():
        value = metric(allOutputs, allLabels).detach().to('cpu')
        print(f'{modelName} test {name}: {value.item():.4f}')
        del value
      print('='*50)
    
  def plot_metrics(self, types, colors, mapper = False, plot_train = True, plot_val = True, **kwargs):
    if not mapper:
      try:
        mapper = {'CV': self.trainerCV.epochMetrics, 'CS': self.trainerCS.epochMetrics, 
                  'RV': self.trainerRV.epochMetrics, 'RS': self.trainerRS.epochMetrics}
      except:
        raise ValueError(f'Not all training setups in types have been trained yet!')
    listMetrics = [mapper[i] for i in types]
    plot_epoch_metrics(listEpochMetrics = listMetrics, metrics = self.metrics,
                       prefixs = types, colors = colors, plot_train = plot_train,
                       plot_val = plot_val, **kwargs)




## KFold Setup

In [None]:
class KFoldSetup(TrainingSetup):
  '''Setup KFold cross-validation training'''
  def __init__(self, n_splits, modelVidParams, modelSmplParams, dataParams, metrics, device, shuffle = True,
               start_fold = 0, end_fold = 10):
    '''
    Parameters:
    -----------------------------------
    modelVidParams: dict
      Parameters of the Transformer model for video prediction (CV and RV).
    modelSmplParams: dict
      Parameters of the Transformer model for sample prediction (CS and RS).
    dataParams: dict
      Parameters for dataloaders creation
    metrics: dict
      Dictionary {metric: metric class} for metrics used in evaluation
    device: str
      Device to train the model on. Either 'cuda' or 'cpu'
    shuffle: bool, optional
      If True (default), shuffle data when creating folds
    start_fold: int
      First fold to train (for training models on multiple devices concurrently)
    end_fold: int
      Last fold to train (for training models on multiple devices concurrently)
    '''
    # For KFold training, eliminating test data split, keeping only train and validation
    if dataParams['test_size'] != 0: 
      print('test_size should be 0 for KFold. Setting test_size to 0...')
      dataParams['test_size'] = 0
    super().__init__(modelVidParams, modelSmplParams, dataParams, metrics, device)
    self.n_splits = n_splits
    self.shuffle = shuffle
    self.loaders_created = False
    self.start_fold = start_fold
    self.end_fold = end_fold

  def generate_kfold_loaders(self, X, y, splitsPath, sampling_rate = 1, kfold_splits = None):
    '''
    Create data loaders for each fold

    Parameters:
    -----------------------------------
    X: torch.Tensor
      Features tensor
    y: torch.Tensor
      Labels tensor
    splitsPath: str
      Path to save KFold splits' indexes
    sampling_rate: int, optional
      Rate for downsampling (e.g. for sample_rate=30, only one sample is retained every 30 samples). Default: 1
    kfold_splits: list
      List of pre-determined indexes for each KFold split. If None, generate KFold splits.
    '''
    # Generate KFold splits
    train_size, val_size = [self.dataParams['train_size'], self.dataParams['val_size']]
    if not self.dataParams['allData']:
      idxs = np.random.randint(len(X), size = train_size + val_size)
      Xset, yset = X[idxs], y[idxs]
    else: Xset, yset = copy(X), copy(y)
    if not kfold_splits:
      kf = KFold(n_splits = self.n_splits, shuffle = self.shuffle)
      kf.get_n_splits(Xset)
      kfold_splits = [i for i in kf.split(Xset)]
      save_object(kfold_splits, os.path.join(splitsPath, 'kfold_splits.pkl'))
    self.kfold_loaders = []
    self.kfold_splits = kfold_splits
    # For each split, create train and validation data loaders
    for train_index, val_index in kfold_splits[self.start_fold:self.end_fold]:
      random.shuffle(train_index)
      random.shuffle(val_index)
      if self.dataParams['transform']:
        (Xt, yt), (Xv, yv) = [applyTransform(X = i, y = j, transform = self.dataParams['transform'])
                              for i, j in [(Xset[train_index], yset[train_index]), (Xset[val_index], yset[val_index])]]
      else: 
        (Xt, yt), (Xv, yv) = [(torch.Tensor(i), torch.Tensor(j)) 
                              for i, j in [(Xset[train_index], yset[train_index]), (Xset[val_index], yset[val_index])]]
      train_val_test_data = [Xt, Xv, yt, yv]
      self.getData(X, y, train_val_test_data, sampling_rate, plot = False, figsize = None)
      self.dataParams['debug'] = False
      self.kfold_loaders.append((self.train_dl, self.val_dl))
    self.loaders_created = True

  def kfold_train(self, epochs, savePath, debug = False, scheduler = None, schedulerParams = None,
                  optimizerLR = 1e-3, saveBest = 'PCC', highestBest = True, print_CUDA = False,
                  saveEpochMetrics = False, fold_metrics = None, train = True):
    '''
    Train models using KFold cross-validation

    Parameters:
    -----------------------------------
    epochs: int
      Number of epochs
    savePath: str
      Path to save results of training and validation
    debug: bool, optional
      If True (default), show debugging of models' layers output shapes
    scheduler: torch.optim class, optional
      Learning rate scheduler (e.g. torch.optim.StepLR)
    schedulerParams: dict, optional
      Parameters of the scheduler
    optimizerLR: float
      Initial learning rate of the optimizer
    saveBest: str, optional
      Metric to consider when evaluating best model. Either 'PCC' (default), 'SCC' or 'RMSE'.
    highestBest: bool, optional
      If True (default), model is considered best if metric (defined in saveBest) is the highest. If False, if its the lowest.
    print_CUDA: bool, optional
      If True, print CUDA memory usage at the end of the epoch. Default: False
    saveEpochMetrics: bool, optional
      If True (default), save metrics calculated every epoch to a pickle file (with path defined on savePath)
    fold_metrics: list, optional
      List of previously trained metrics dictionaries for each fold
    train: bool, optional
      If True (default), train the model. If False, only load metrics
    '''
    if not self.loaders_created:
      raise ValueError('KFold dataloaders were not created yet')
    self.fold_metrics = fold_metrics if fold_metrics is not None else []
    if train:
      for idx, (train_dl, val_dl) in enumerate(self.kfold_loaders):
        print('*'*200)
        print(f'Training fold {self.start_fold+idx+1}...')
        print('*'*200 + '\n')
        t0fold = time()
        svPth = os.path.join(savePath, f'fold{self.start_fold+idx+1}')
        if not os.path.isdir(svPth): os.mkdir(svPth)
        self.train(epochs, svPth, debug, scheduler, schedulerParams, optimizerLR, saveBest, 
                  highestBest, print_CUDA, saveEpochMetrics, show_progress = False)
        t1fold = time()
        print(f'Total time elapsed on fold {self.start_fold+idx+1}: {elapsedTime(t0fold, t1fold)}\n')
        bestMetrics = {
          'CV': self.trainerCV.best_state_dict['epoch_val_metrics'],
          'CS': self.trainerCS.best_state_dict['epoch_val_metrics'],
          'RV': self.trainerRV.best_state_dict['epoch_val_metrics'],
          'RS': self.trainerRS.best_state_dict['epoch_val_metrics'],
        }
        self.fold_metrics.append(bestMetrics)
        save_object(bestMetrics, os.path.join(svPth, f'fold{self.start_fold+idx+1}_bestMetrics.pkl'))
        save_object(self.fold_metrics, os.path.join(savePath, f'kfold_{self.start_fold+1}-{self.end_fold}-_bestMetrics.pkl'))
        self.createModels()
      print('*'*100)
      print(f'Finished training KFold Cross Validation!')
      print('*'*100)
      save_object(self.fold_metrics, os.path.join(svPth, f'kfold_{start_fold+1}-{end_fold}_bestMetrics.pkl'))
      

# Train using audiovisual features

In [None]:
start_fold = 0
end_fold = 9

## Original data

### Params and data

In [None]:
modelVidParams = {
'emb_dim': 8,
'seq_len': 1187,
'num_features': 115,
'nhead': 4,
'd_hid': 32,
'nlayers': 4,
'emb_activ': nn.GELU,
'linear_activ': nn.GELU,
'dropout': 0.1,
'batch_first': True
}

modelSmplParams = {
'emb_dim': 4,
'seq_len': 1187,
'num_features': 115,
'nhead': 4,
'd_hid': 32,
'nlayers': 2,
'emb_activ': nn.GELU,
'linear_activ': nn.GELU,
'dropout': 0.1,
'batch_first': True
}

dataParams = {
'mosPath': '/content/drive/MyDrive/TCC Dados/Experiment_3/UnB-AVQ-2018-Experiment3.csv',
'matPath': '/content/drive/MyDrive/TCC Dados/Experiment_3/Features',
'featuresName': 'avFeatures',
'read_data': False,
'batch_size': 4,
'bs_smpl': 2,
'train_size': 720,
'val_size': 80,
'test_size': 0,
'allData': False,
'transform': MinMaxScaler,
'debug': True,
'shuffle': True,
'idxs': None
}
metrics = {'RMSE': RMSE(), 'PCC': PearsonCorrelation(), 'SCC': SpearmanCorrelation()}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_splits = 10
shuffle = True

splitsPath = '/content/drive/MyDrive/TCC Resultados/AVQ_KFold_1-2'
if not os.path.isdir(splitsPath): os.mkdir(splitsPath)
sampling_rate = 1
kfold_splits = load_object('/content/drive/MyDrive/TCC Dados/kfold_splits.pkl')

NameError: ignored

In [None]:
kfsetup = KFoldSetup(n_splits, modelVidParams, modelSmplParams, dataParams, metrics, device, shuffle,
                     start_fold, end_fold)
kfsetup.generate_kfold_loaders(X, y, splitsPath, sampling_rate, kfold_splits)

Preparing data...
Train/val data shapes:
torch.Size([720, 1187, 115]), torch.Size([80, 1187, 115])
torch.Size([720]), torch.Size([80])

Creating dataloaders...
Dataloaders created!
Dataloaders sizes:
180, 20
Preparing data...
Train/val data shapes:
torch.Size([720, 1187, 115]), torch.Size([80, 1187, 115])
torch.Size([720]), torch.Size([80])

Creating dataloaders...
Dataloaders created!
Dataloaders sizes:
360, 40


### Training

In [None]:
epochs = 10
saveBest = 'PCC'
highestBest = True
debug = False
scheduler = True
print_CUDA = False
saveEpochMetrics = False
fold_metrics = None
# fold_metrics = load_object('/content/drive/MyDrive/TCC Dados/KFold/kfold_bestMetrics.pkl')
train = True

In [None]:
savePath = '/content/drive/MyDrive/TCC Resultados/AVQ_KFold_1-2'
if not os.path.isdir(savePath):
  os.mkdir(savePath)
kfsetup.kfold_train(epochs, savePath, debug = debug, scheduler = scheduler,
                    saveBest = saveBest, highestBest = highestBest, print_CUDA = print_CUDA,
                    saveEpochMetrics = saveEpochMetrics, fold_metrics = fold_metrics, train = train)

********************************************************************************************************************************************************************************************************
Training fold 1...
********************************************************************************************************************************************************************************************************

Training CV
Time for 1st epoch: 0m11s
Estimated time for 10 epochs: 1m50s
Finished CV training with best metrics:
{'epoch': 10, 'epoch_val_metrics': {'RMSE': 1.0493, 'PCC': 0.1261, 'SCC': 0.0341}}
Time elapsed: 1m32s
Training CS
Time for 1st epoch: 0m47s
Estimated time for 10 epochs: 7m59s
Finished CS training with best metrics:
{'epoch': 3, 'epoch_val_metrics': {'RMSE': 1.6776, 'PCC': -0.0481, 'SCC': 0.0086}}
Time elapsed: 8m5s
Training RV
Time for 1st epoch: 0m8s
Estimated time for 10 epochs: 1m25s
Finished RV training with best metrics:
{'epoch': 9, 'epoch_val_metrics'

## Downsampled data

### Params and data

In [None]:
del kfsetup

modelVidParams = {
'emb_dim': 8,
'seq_len': 40,
'num_features': 115,
'nhead': 4,
'd_hid': 32,
'nlayers': 4,
'emb_activ': nn.GELU,
'linear_activ': nn.GELU,
'dropout': 0.1,
'batch_first': True
}

modelSmplParams = {
'emb_dim': 4,
'seq_len': 40,
'num_features': 115,
'nhead': 4,
'd_hid': 32,
'nlayers': 2,
'emb_activ': nn.GELU,
'linear_activ': nn.GELU,
'dropout': 0.1,
'batch_first': True
}

splitsPath = '/content/drive/MyDrive/TCC Resultados/AVQ_Sample_KFold_1-2'
if not os.path.isdir(splitsPath): os.mkdir(splitsPath)
sampling_rate = 30

In [None]:
dataParams['debug'] = True
kfsetup_smpl = KFoldSetup(n_splits, modelVidParams, modelSmplParams, dataParams, metrics, device, shuffle,
                     start_fold, end_fold)
kfsetup_smpl.generate_kfold_loaders(X, y, splitsPath, sampling_rate, kfold_splits)

Preparing data...
Train/val data shapes:
torch.Size([720, 40, 115]), torch.Size([80, 40, 115])
torch.Size([720]), torch.Size([80])

Creating dataloaders...
Dataloaders created!
Dataloaders sizes:
180, 20
Preparing data...
Train/val data shapes:
torch.Size([720, 40, 115]), torch.Size([80, 40, 115])
torch.Size([720]), torch.Size([80])

Creating dataloaders...
Dataloaders created!
Dataloaders sizes:
360, 40


### Training

In [None]:
epochs = 10
saveBest = 'PCC'
highestBest = True
debug = False
scheduler = True
print_CUDA = False
saveEpochMetrics = False
fold_metrics = None
# fold_metrics = load_object('/content/drive/MyDrive/TCC Dados/KFold/kfold_bestMetrics.pkl')
train = True

In [None]:
savePath = '/content/drive/MyDrive/TCC Resultados/AVQ_Sample_KFold_1-2'
if not os.path.isdir(savePath):
  os.mkdir(savePath)
kfsetup_smpl.kfold_train(epochs, savePath, debug = debug, scheduler = scheduler,
                    saveBest = saveBest, highestBest = highestBest, print_CUDA = print_CUDA,
                    saveEpochMetrics = saveEpochMetrics, fold_metrics = fold_metrics, train = train)

********************************************************************************************************************************************************************************************************
Training fold 1...
********************************************************************************************************************************************************************************************************

Training CV
Time for 1st epoch: 0m2s
Estimated time for 10 epochs: 0m23s
Finished CV training with best metrics:
{'epoch': 7, 'epoch_val_metrics': {'RMSE': 1.0085, 'PCC': 0.1405, 'SCC': 0.1113}}
Time elapsed: 0m24s
Training CS
Time for 1st epoch: 0m3s
Estimated time for 10 epochs: 0m32s
Finished CS training with best metrics:
{'epoch': 8, 'epoch_val_metrics': {'RMSE': 1.5907, 'PCC': 0.1498, 'SCC': 0.2646}}
Time elapsed: 0m31s
Training RV
Time for 1st epoch: 0m2s
Estimated time for 10 epochs: 0m24s
Finished RV training with best metrics:
{'epoch': 1, 'epoch_val_metrics': {

# Train using only audio features

## Original data

### Params and data

In [None]:
del kfsetup_smpl

modelVidParams = {
'emb_dim': 8,
'seq_len': 1187,
'num_features': 25,
'nhead': 4,
'd_hid': 32,
'nlayers': 4,
'emb_activ': nn.GELU,
'linear_activ': nn.GELU,
'dropout': 0.1,
'batch_first': True
}

modelSmplParams = {
'emb_dim': 4,
'seq_len': 1187,
'num_features': 25,
'nhead': 4,
'd_hid': 32,
'nlayers': 2,
'emb_activ': nn.GELU,
'linear_activ': nn.GELU,
'dropout': 0.1,
'batch_first': True
}

splitsPath = '/content/drive/MyDrive/TCC Resultados/AQ_KFold_1-2'
if not os.path.isdir(splitsPath): os.mkdir(splitsPath)
sampling_rate = 1

In [None]:
dataParams['debug'] = True
kfsetup_AQ = KFoldSetup(n_splits, modelVidParams, modelSmplParams, dataParams, metrics, device, shuffle,
                     start_fold, end_fold)
kfsetup_AQ.generate_kfold_loaders(X[:, :, 90:], y, splitsPath, sampling_rate, kfold_splits)

Preparing data...
Train/val data shapes:
torch.Size([720, 1187, 25]), torch.Size([80, 1187, 25])
torch.Size([720]), torch.Size([80])

Creating dataloaders...
Dataloaders created!
Dataloaders sizes:
180, 20
Preparing data...
Train/val data shapes:
torch.Size([720, 1187, 25]), torch.Size([80, 1187, 25])
torch.Size([720]), torch.Size([80])

Creating dataloaders...
Dataloaders created!
Dataloaders sizes:
360, 40


### Training

In [None]:
savePath = '/content/drive/MyDrive/TCC Resultados/AQ_KFold_1-2'
if not os.path.isdir(savePath):
  os.mkdir(savePath)
kfsetup_AQ.kfold_train(epochs, savePath, debug = debug, scheduler = scheduler,
                    saveBest = saveBest, highestBest = highestBest, print_CUDA = print_CUDA,
                    saveEpochMetrics = saveEpochMetrics, fold_metrics = fold_metrics, train = train)

********************************************************************************************************************************************************************************************************
Training fold 1...
********************************************************************************************************************************************************************************************************

Training CV
Time for 1st epoch: 0m8s
Estimated time for 10 epochs: 1m22s
Finished CV training with best metrics:
{'epoch': 6, 'epoch_val_metrics': {'RMSE': 1.3083, 'PCC': 0.2894, 'SCC': 0.3546}}
Time elapsed: 1m23s
Training CS
Time for 1st epoch: 0m5s
Estimated time for 10 epochs: 0m59s
Finished CS training with best metrics:
{'epoch': 6, 'epoch_val_metrics': {'RMSE': 1.9652, 'PCC': 0.1992, 'SCC': 0.1091}}
Time elapsed: 0m58s
Training RV
Time for 1st epoch: 0m8s
Estimated time for 10 epochs: 1m21s
Finished RV training with best metrics:
{'epoch': 10, 'epoch_val_metrics': 

## Downsampled data

### Params and data

In [None]:
del kfsetup_AQ

modelVidParams = {
'emb_dim': 8,
'seq_len': 40,
'num_features': 25,
'nhead': 4,
'd_hid': 32,
'nlayers': 4,
'emb_activ': nn.GELU,
'linear_activ': nn.GELU,
'dropout': 0.1,
'batch_first': True
}

modelSmplParams = {
'emb_dim': 4,
'seq_len': 40,
'num_features': 25,
'nhead': 4,
'd_hid': 32,
'nlayers': 2,
'emb_activ': nn.GELU,
'linear_activ': nn.GELU,
'dropout': 0.1,
'batch_first': True
}

splitsPath = '/content/drive/MyDrive/TCC Resultados/AQ_Sample_KFold_1-2'
if not os.path.isdir(splitsPath): os.mkdir(splitsPath)
sampling_rate = 30

In [None]:
dataParams['debug'] = True
kfsetup_AQ_smpl = KFoldSetup(n_splits, modelVidParams, modelSmplParams, dataParams, metrics, device, shuffle,
                     start_fold, end_fold)
kfsetup_AQ_smpl.generate_kfold_loaders(X[:, :, 90:], y, splitsPath, sampling_rate, kfold_splits)

Preparing data...
Train/val data shapes:
torch.Size([720, 40, 25]), torch.Size([80, 40, 25])
torch.Size([720]), torch.Size([80])

Creating dataloaders...
Dataloaders created!
Dataloaders sizes:
180, 20
Preparing data...
Train/val data shapes:
torch.Size([720, 40, 25]), torch.Size([80, 40, 25])
torch.Size([720]), torch.Size([80])

Creating dataloaders...
Dataloaders created!
Dataloaders sizes:
360, 40


### Training

In [None]:
savePath = '/content/drive/MyDrive/TCC Resultados/AQ_Sample_KFold_1-2'
if not os.path.isdir(savePath):
  os.mkdir(savePath)
kfsetup_AQ_smpl.kfold_train(epochs, savePath, debug = debug, scheduler = scheduler,
                    saveBest = saveBest, highestBest = highestBest, print_CUDA = print_CUDA,
                    saveEpochMetrics = saveEpochMetrics, fold_metrics = fold_metrics, train = train)

********************************************************************************************************************************************************************************************************
Training fold 1...
********************************************************************************************************************************************************************************************************

Training CV
Time for 1st epoch: 0m2s
Estimated time for 10 epochs: 0m23s
Finished CV training with best metrics:
{'epoch': 1, 'epoch_val_metrics': {'RMSE': 1.08, 'PCC': 0.2646, 'SCC': 0.2391}}
Time elapsed: 0m24s
Training CS
Time for 1st epoch: 0m2s
Estimated time for 10 epochs: 0m29s
Finished CS training with best metrics:
{'epoch': 9, 'epoch_val_metrics': {'RMSE': 1.6669, 'PCC': 0.1836, 'SCC': 0.1908}}
Time elapsed: 0m30s
Training RV
Time for 1st epoch: 0m2s
Estimated time for 10 epochs: 0m22s
Finished RV training with best metrics:
{'epoch': 9, 'epoch_val_metrics': {'R

# Train using only video features

## Original data

### Params and data

In [None]:
del kfsetup_AQ_smpl

modelVidParams = {
'emb_dim': 8,
'seq_len': 1187,
'num_features': 90,
'nhead': 4,
'd_hid': 32,
'nlayers': 4,
'emb_activ': nn.GELU,
'linear_activ': nn.GELU,
'dropout': 0.1,
'batch_first': True
}

modelSmplParams = {
'emb_dim': 4,
'seq_len': 1187,
'num_features': 90,
'nhead': 4,
'd_hid': 32,
'nlayers': 2,
'emb_activ': nn.GELU,
'linear_activ': nn.GELU,
'dropout': 0.1,
'batch_first': True
}

splitsPath = '/content/drive/MyDrive/TCC Resultados/VQ_KFold_1-2'
if not os.path.isdir(splitsPath): os.mkdir(splitsPath)
sampling_rate = 1

In [None]:
dataParams['debug'] = True
kfsetup_VQ = KFoldSetup(n_splits, modelVidParams, modelSmplParams, dataParams, metrics, device, shuffle,
                     start_fold, end_fold)
kfsetup_VQ.generate_kfold_loaders(X[:, :, :90], y, splitsPath, sampling_rate, kfold_splits)

Preparing data...
Train/val data shapes:
torch.Size([720, 1187, 90]), torch.Size([80, 1187, 90])
torch.Size([720]), torch.Size([80])

Creating dataloaders...
Dataloaders created!
Dataloaders sizes:
180, 20
Preparing data...
Train/val data shapes:
torch.Size([720, 1187, 90]), torch.Size([80, 1187, 90])
torch.Size([720]), torch.Size([80])

Creating dataloaders...
Dataloaders created!
Dataloaders sizes:
360, 40


### Training

In [None]:
savePath = '/content/drive/MyDrive/TCC Resultados/VQ_KFold_1-2'
if not os.path.isdir(savePath):
  os.mkdir(savePath)
kfsetup_VQ.kfold_train(epochs, savePath, debug = debug, scheduler = scheduler,
                    saveBest = saveBest, highestBest = highestBest, print_CUDA = print_CUDA,
                    saveEpochMetrics = saveEpochMetrics, fold_metrics = fold_metrics, train = train)

## Downsampled data

### Params and data

In [None]:
del kfsetup_VQ

modelVidParams = {
'emb_dim': 8,
'seq_len': 40,
'num_features': 90,
'nhead': 4,
'd_hid': 32,
'nlayers': 4,
'emb_activ': nn.GELU,
'linear_activ': nn.GELU,
'dropout': 0.1,
'batch_first': True
}

modelSmplParams = {
'emb_dim': 4,
'seq_len': 40,
'num_features': 90,
'nhead': 4,
'd_hid': 32,
'nlayers': 2,
'emb_activ': nn.GELU,
'linear_activ': nn.GELU,
'dropout': 0.1,
'batch_first': True
}

splitsPath = '/content/drive/MyDrive/TCC Resultados/VQ_Sample_KFold_1-2'
if not os.path.isdir(splitsPath): os.mkdir(splitsPath)
sampling_rate = 30

In [None]:
dataParams['debug'] = True
kfsetup_VQ_smpl = KFoldSetup(n_splits, modelVidParams, modelSmplParams, dataParams, metrics, device, shuffle,
                     start_fold, end_fold)
kfsetup_VQ_smpl.generate_kfold_loaders(X[:, :, :90], y, splitsPath, sampling_rate, kfold_splits)

### Training

In [None]:
savePath = '/content/drive/MyDrive/TCC Resultados/VQ_Sample_KFold_1-2'
if not os.path.isdir(savePath):
  os.mkdir(savePath)
kfsetup_VQ_smpl.kfold_train(epochs, savePath, debug = debug, scheduler = scheduler,
                    saveBest = saveBest, highestBest = highestBest, print_CUDA = print_CUDA,
                    saveEpochMetrics = saveEpochMetrics, fold_metrics = fold_metrics, train = train)