### Hi :)

In [1]:
# imports
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch
import numpy as np
from tqdm import tqdm
from itertools import product # used for hyperparameter grid search, unused if not doing hyperparameter tuning
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
import warnings
warnings.filterwarnings('ignore', category=UserWarning)
import os
import shutil

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
seed = 42 # for reproducibility
torch.manual_seed(seed)
# torch.cuda.manual_seed_all(seed) # why did this break my predictions?!?

data = pd.read_csv('data.csv', delimiter=';')

In [2]:
class CustomDataset(Dataset):
    """
    CustomDataset is a class for creating a dataset in PyTorch, inheriting from the PyTorch Dataset class.
    This dataset is designed to handle tabular data provided as pandas DataFrames.

    Attributes:
        features (pd.DataFrame): A DataFrame containing the features of the dataset.
        labels (pd.Series or pd.DataFrame): A Series or DataFrame containing the labels of the dataset.
    Methods:
        __getitem__(self, index): Returns the features and label for a given index.
        __len__(self): Returns the total number of samples in the dataset.
    """
    def __init__(self, features, labels):
        """
        Parameters:
            features (pd.DataFrame): The features of the dataset.
            labels (pd.Series or pd.DataFrame): The labels of the dataset.
        """
        self.features = features
        self.labels = labels

    def __getitem__(self, index):
        """
        Parameters:
            index (int): The index of the item to retrieve.
        Returns:
            tuple: A tuple containing the features as a numpy array and the label.
        """
        features = self.features.iloc[index].to_numpy()
        label = self.labels.iloc[index]
        return features, label

    def __len__(self):
        """
        Returns:
            int: The total number of samples.
        """
        return len(self.features)

In [3]:
def split_data(data, batch_size, task, alpha):
    """
    Parameters:
        data (pd.DataFrame or dict): The dataset to split. If `task` is 0 or 1, `data` should be a tuple 
            of (features, labels) where both are pandas DataFrames. If `task` is 2 or 3, `data` should be 
            a dictionary where keys are subject or item IDs and values are pandas DataFrames corresponding 
            to data for each subject/item.
        batch_size (int): The size of each batch to be loaded by the DataLoader.
        task (int): An integer indicating the type of task to prepare data for. Valid values are:
            0 - Known subjects and items with k-fold cross-validation.
            1 - Known subjects and items with leave-one-out cross-validation.
            2 - Held-out subjects, known items.
            3 - Held-out items, known subjects.
        alpha (float): A scaling factor used in computing positive class weights for class imbalance handling in BCELoss.

    Returns:
        tuple: A tuple containing two elements:
            - A list of tuples, each containing DataLoader objects for the training, validation, 
              and testing sets in that order.
            - A list of torch.tensor objects representing the positive class weights computed for 
              each split to address class imbalance.
    """
    
    dataloaders = []
    pos_weights = []
    # global class_weights

    if task == 0 or task == 1: # Known subjects and items
        features, labels = data
        dataset = CustomDataset(features, labels)
        n = len(dataset)
        if task == 0:
            k = 10 # k-fold cross-validation
        elif task == 1:
            k = n # leave-one-out cross-validation
        fold_size = n // k
        folds = []
        for i in range(k):
            start = i * fold_size
            end = (i + 1) * fold_size if i < k - 1 else n
            folds.append(torch.utils.data.Subset(dataset, range(start, end)))

        for i in range(k):
            # splits for cross-validation, validation set = test set (since we're doing k-fold, we won't use a separate test set)
            validation_dataset = folds[i]
            t = i + 1 if i < k - 1 else 0
            test_dataset = folds[t]
            train_folds = [folds[j] for j in range(k) if j != i]# and j != t]
            train_dataset = torch.utils.data.ConcatDataset(train_folds)

            # class weights for weighted cross-entropy loss (to handle class imbalance)
            y = torch.tensor([label for _, label in train_dataset], dtype=torch.long)
            sum = y.sum().item()
            weight = alpha*(len(train_dataset)-sum) / sum
            pos_weights.append(torch.tensor(weight, dtype=torch.float))

            train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)
            test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
            dataloaders.append((train_dataloader, validation_dataloader, test_dataloader))
            #dataloaders.append((train_dataloader, validation_dataloader))

        return dataloaders, pos_weights
    
    elif task == 2: # Held-out subjects, known items
        test_items_count = 0
        subject_ids = list(data.groups.keys())

        for i in range(0, len(subject_ids), 2):
            subject_indexes = [i, i+1]
            subjects = [subject_ids[subject_index] for subject_index in subject_indexes]

            others = [subject for j, subject in enumerate(subject_ids) if j != i and j != i+1]

            test = pd.concat([data.get_group(subject) for subject in subjects])
            test_items_count += len(test)
            train_eval = pd.concat([data.get_group(subject) for subject in others])
            shuffled = train_eval.sample(frac = 1, random_state=seed) # shuffle the data -> wrecked.
            
            # splitting data into features and labels for dataset creation
            test_labels = test["condition"].copy()
            test_features = test.copy().drop(["condition", "sentenceCondition", "RECORDING_SESSION_LABEL", "item"], axis=1)
            test_dataset = CustomDataset(test_features, test_labels)
            
            train_eval_labels = shuffled["condition"].copy()
            train_eval_features = shuffled.copy().drop(["condition", "sentenceCondition", "RECORDING_SESSION_LABEL", "item"], axis=1)
            train_eval_dataset = CustomDataset(train_eval_features, train_eval_labels)


            train_eval_split = 0.9
            train_size = int(train_eval_split * len(train_eval_dataset))
            validation_size = len(train_eval_dataset) - train_size
            train_dataset, validation_dataset = torch.utils.data.random_split(train_eval_dataset, [train_size, validation_size])

            # class weights for weighted cross-entropy loss (to handle class imbalance)
            y = torch.tensor([label for _, label in train_dataset], dtype=torch.long)
            sum = y.sum().item()
            weight = alpha*(len(train_dataset)-sum) / sum
            pos_weights.append(torch.tensor(weight, dtype=torch.float))

            train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)
            test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
            dataloaders.append((train_dataloader, validation_dataloader, test_dataloader))
        return dataloaders, pos_weights
    
    elif task == 3: # Held-out items, known subjects
        test_items_count = 0
        for i, item in enumerate(data.groups.keys()):
            test = data.get_group(item)
            test_items_count += len(test)
            train_eval = pd.concat([data.get_group(i) for i in data.groups.keys() if i != item])
            shuffled = train_eval.sample(frac = 1, random_state=seed) # shuffle the data -> wrecked.
            
            # splitting data into features and labels for dataset creation
            test_labels = test["condition"].copy()
            test_features = test.copy().drop(["condition", "sentenceCondition", "RECORDING_SESSION_LABEL", "item"], axis=1)
            test_dataset = CustomDataset(test_features, test_labels)
            
            train_eval_labels = shuffled["condition"].copy()
            train_eval_features = shuffled.copy().drop(["condition", "sentenceCondition", "RECORDING_SESSION_LABEL", "item"], axis=1)
            train_eval_dataset = CustomDataset(train_eval_features, train_eval_labels)


            train_eval_split = 0.9
            train_size = int(train_eval_split * len(train_eval_dataset))
            validation_size = len(train_eval_dataset) - train_size
            train_dataset, validation_dataset = torch.utils.data.random_split(train_eval_dataset, [train_size, validation_size])

            # class weights for weighted cross-entropy loss (to handle class imbalance)
            y = torch.tensor([label for _, label in train_dataset], dtype=torch.long)
            sum = y.sum().item()
            weight = alpha*(len(train_dataset)-sum) / sum
            pos_weights.append(torch.tensor(weight, dtype=torch.float))

            train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)
            test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
            dataloaders.append((train_dataloader, validation_dataloader, test_dataloader))
        return dataloaders, pos_weights
    else:
        raise ValueError("Task argument must be either 1, 2, or 3")

    


In [4]:
def preprocess_and_split_data(data, batch_size, task, alpha):
    """
    Preprocesses the provided data by normalizing features, mapping "condition" to {0,1},
    and then splits the data into training, validation, and testing sets based on the task. 
    This function also prepares DataLoader objects for each split.
    The preprocessing steps include selecting critical (non-filler) rows, and normalizing eye-movement features.
    After preprocessing, the data is split according to the task.

    Parameters:
        data (pd.DataFrame): The raw dataset to preprocess and split. Kindly provided by Iza+Rita.
        batch_size (int): The size of each batch to be loaded by the DataLoader.
        task (int): An integer indicating the type of task to prepare data for. Valid values are:
            0 - Known subjects and items with k-fold cross-validation.
            1 - Known subjects and items with leave-one-out cross-validation.
            2 - Held-out subjects, known items.
            3 - Held-out items, known subjects.
        alpha (float): A scaling factor used in computing positive class weights for imbalance handling with BCELoss.

    Returns:
        tuple: A tuple containing two elements:
            - A list of tuples, each containing DataLoader objects for the training, validation, 
              and testing sets in that order.
            - A list of torch.tensor objects representing the alpha-scaled positive class weights computed for 
              each split to address class imbalance.
    """

# all tasks
    data_copy = data.loc[data['is_critical'] == 1].copy()
    dropped = data_copy.drop(['composite', 'LF', 'HF', "IA_ID", "trial", "list", "IA_LABEL", "wordlength", "is_critical", 
                'is_spill1', 'is_spill2', 'is_spill3', 'filler', 'function_word', 'other_filler'], axis=1)

    # normalizing input features beforehand, increased performance vs adding batchnorm layer to model
    temp = dropped[['fixation_duration',
        'duration_firstpass', 'duration_firstfixation', 'fix_count',
        'avg_pupil', 'IA_REGRESSION_IN_COUNT', 'IA_REGRESSION_OUT_COUNT',
        'saccade_length', 'saccade_duration', 'go_past_time']]
    temp = (temp - temp.mean()) / temp.std()
    dropped[['fixation_duration',
        'duration_firstpass', 'duration_firstfixation', 'fix_count',
        'avg_pupil', 'IA_REGRESSION_IN_COUNT', 'IA_REGRESSION_OUT_COUNT',
        'saccade_length', 'saccade_duration', 'go_past_time']] = temp
    normalized = dropped
    # mapping condition and sentenceCondition to 0 and 1 for critical word classification
    normalized[["condition", "sentenceCondition"]] = normalized[["condition", "sentenceCondition"]].map(lambda x: x.replace("none", "0"))
    normalized[["condition", "sentenceCondition"]] = normalized[["condition", "sentenceCondition"]].map(lambda x: x.replace("control", "0"))
    normalized[["condition", "sentenceCondition"]] = normalized[["condition", "sentenceCondition"]].map(lambda x: x.replace("pseudo", "1"))
    normalized[["condition", "sentenceCondition"]] = normalized[["condition", "sentenceCondition"]].map(lambda x: x.replace("filler", "0"))
    normalized[["condition", "sentenceCondition"]] = normalized[["condition", "sentenceCondition"]].astype(int)
    mapped = normalized

# task specific steps
    if task == 0 or task == 1: # Known subjects and items
        shuffled = mapped.sample(frac = 1, random_state=seed) # shuffle the data -> wrecked.
        # splitting data into features and labels for dataset creation
        labels = shuffled["condition"].copy()
        features = shuffled.copy().drop(["condition", "sentenceCondition", "RECORDING_SESSION_LABEL", "item"], axis=1)
        data = (features, labels)
        return split_data(data, batch_size, task, alpha)
    
    elif task == 2: # Held-out subjects, known items
        subjects = mapped.groupby('RECORDING_SESSION_LABEL')
        return split_data(subjects, batch_size, task, alpha)
    elif task == 3: # Held-out items, known subjects
        items = mapped.groupby('item')
        return split_data(items, batch_size, task, alpha)
    else:
        raise ValueError("Task argument must be either 1, 2, or 3")

In [5]:
def train_test(model, dataloader, optimizer, pos_weight, training):
    """
    Performs a single epoch of training, validation, or testing on the given model using the specified DataLoader. 
    This function adapts its behavior based on the 'training' parameter to correctly handle the model's state and 
    perform necessary operations such as backpropagation and optimizer updates during training.

    Parameters:
        model (torch.nn.Module): The neural network model to be trained, validated, or tested.
        dataloader (DataLoader): A DataLoader providing batches of data (features and labels) for processing.
        optimizer (torch.optim.Optimizer): The optimizer (AdamW) to use for updating model parameters during training.
        pos_weight (torch.Tensor): A tensor specifying the weight for the positive class to handle class imbalance.
        training (str): A string specifying the mode of operation. Must be 'train', 'validation', or 'test'.

    Returns:
        None if training.
        Cumulative loss (float) if validation.
        A tuple (label_list, prediction_list) containing lists of true labels and predicted labels for 
        each sample if testing.
    """
    # BCEWithLogitsLoss combines sigmoid with BCELoss for better stability, and handles class imbalance via pos_weight
    loss_function = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    if training == "train":
        model.train()
    elif training == "validation":
        model.eval()
    elif training == "test":
        model.eval()
    else:
        raise ValueError("training argument must be either 'train', 'validation' or 'test'")
        
    cumulative_loss = 0
    prediction_list = [] # store predictions accross folds for calculating accuracy and f1
    label_list = [] # store labels accross folds for calculating accuracy and f1
    sigmoid = torch.nn.Sigmoid()

    for sample in dataloader: # iterate over batches in the DataLoader
        data, targets = sample[0].float().to(device), sample[1].type(torch.LongTensor).to(device)
        output = model(data) # forward pass
        loss_value = loss_function(output, targets.unsqueeze(1).float())
        cumulative_loss += loss_value.item()

        if training == "train":
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
        
        predictions = [round(x) for x in sigmoid(output).to('cpu').detach().squeeze(1).numpy().tolist()] # gets {0,1} predictions from 1d logits
        target_labels = targets.to('cpu').detach().numpy()
        prediction_list.extend(predictions)
        label_list.extend(target_labels)

    if training == "train":
        return
    elif training == "validation":
        return cumulative_loss
    elif training == "test":
        return label_list, prediction_list
    else:
        raise ValueError("Ya Done Fuck'd up, son!")

In [6]:
class TuneableModel(torch.nn.Module):
    """
    A customizable neural network model for binary classification tasks, with a variable number of layers 
    and dropout rate. The model architecture consists of an input layer followed by a configurable number 
    of hidden layers, and an output layer. Each hidden layer includes a linear transformation, an activation 
    function (LeakyReLU), and (optional) dropout for regularization. Batch normalization can be added as needed, 
    but ended up unused in the final version because normalizing the dataset before training resulted in better predictions.

    Parameters:
        input_size (int): The number of input features.
        layer_size (int): The size of each hidden layer.
        dropout_rate (float): The dropout rate for regularization, applied to each hidden layer.
        n_layers (int): The number of hidden layers in the network, ranging from 1 to 10.

    Attributes:
        input_layer (Linear): The input layer.
        linear2 to linear10 (Linear): Optional hidden layers, activated based on `n_layers`.
        output_layer (Linear): The output layer for binary classification output.
        dropout (Dropout): Dropout layer for regularization.
        activation (LeakyReLU): Activation function used after each linear layer.
    """
    def __init__(self, input_size, layer_size, dropout_rate, n_layers):
        super(TuneableModel, self).__init__()
        self.n_layers = n_layers
        self.input_size = input_size
        self.input_layer = torch.nn.Linear(input_size, layer_size)
        # is there a way to do this not manually? :-\
        self.linear2 = torch.nn.Linear(layer_size, layer_size)
        self.linear3 = torch.nn.Linear(layer_size, layer_size)
        self.linear4 = torch.nn.Linear(layer_size, layer_size)
        self.linear5 = torch.nn.Linear(layer_size, layer_size)
        self.linear6 = torch.nn.Linear(layer_size, layer_size)
        self.linear7 = torch.nn.Linear(layer_size, layer_size)
        self.linear8 = torch.nn.Linear(layer_size, layer_size)
        self.linear9 = torch.nn.Linear(layer_size, layer_size)
        self.linear10 = torch.nn.Linear(layer_size, layer_size)

        self.output_layer = torch.nn.Linear(layer_size, 1)
        self.dropout = torch.nn.Dropout(dropout_rate) # rarely useful...
        self.activation = torch.nn.LeakyReLU() # oh, I forgot I'd chosen LeakyReLU!! uh oh...

    def forward(self, x):
        """
        Forward pass of the model. Applies linear transformations, activation functions, and dropout 
        sequentially based on the configured number of layers (`n_layers`), and finally outputs the prediction.

        Parameters:
            x (torch.Tensor): Input tensor of shape (batch_size, input_size).

        Returns:
            torch.Tensor: The output predictions of shape (batch_size, 1).
        """
        x = self.input_layer(x)
        x = self.activation(x)
        x = self.dropout(x)
        if self.n_layers > 1:
            x = self.linear2(x)
            x = self.activation(x)
            x = self.dropout(x)
            if self.n_layers > 2:
                x = self.linear3(x)
                x = self.activation(x)
                x = self.dropout(x)
                if self.n_layers > 3:
                    x = self.linear4(x)
                    x = self.activation(x)
                    x = self.dropout(x)
                    if self.n_layers > 4:
                        x = self.linear5(x)
                        x = self.activation(x)
                        x = self.dropout(x)
                        if self.n_layers > 5:
                            x = self.linear6(x)
                            x = self.activation(x)
                            x = self.dropout(x)
                            if self.n_layers > 6:
                                x = self.linear7(x)
                                x = self.activation(x)
                                x = self.dropout(x)
                                if self.n_layers > 7:
                                    x = self.linear8(x)
                                    x = self.activation(x)
                                    x = self.dropout(x)
                                    if self.n_layers > 8:
                                        x = self.linear9(x)
                                        x = self.activation(x)
                                        x = self.dropout(x)
                                        if self.n_layers > 9:
                                            x = self.linear10(x)
                                            x = self.activation(x)
                                            x = self.dropout(x)
        x = self.output_layer(x)
        return x

In [7]:
# Training sample
def evaluate(data, parameters, task):
    """
    Evaluates neural model's performance on a given task using specified parameters. 
    The function preprocesses the data, splits it according to the task, initializes a TuneableModel, 
    and trains it. It then evaluates the model on a test set and returns performance metrics.

    The function asserts the task to be one of the predefined tasks and initializes the model based on 
    the provided parameters. It supports dynamic pos_weight handling and uses early stopping based on 
    validation loss to prevent overfitting.

    Parameters:
        data (pd.DataFrame): The dataset to evaluate the model on.
        parameters (dict): A dictionary containing model hyperparameters and training settings. Expected 
            keys include "pos_weight", "batch_size", "alpha", "hidden_size", "dropout", "n_hidden", 
            "learning_rate", "beta_1", and "beta_2".
        task (int): An integer indicating the task type. Valid values are 0, 1, 2, and 3, each representing 
            a different way of splitting the data for training and testing:
                0 - Known subjects and items with k-fold cross-validation.
                1 - Known subjects and items with leave-one-out cross-validation.
                2 - Held-out subjects, known items.
                3 - Held-out items, known subjects.

    Returns:
        tuple: A tuple containing the accuracy score, F1 score, and confusion matrix of the model evaluated 
            on a given test set.
    """
    assert task in [0, 1, 2, 3], "Task argument must be either 1, 2 or 3"
    
    weight = None if parameters["pos_weight"] == None else torch.tensor(parameters["pos_weight"], dtype=torch.float).to(device)

    max_epochs = 1000

    dataloaders, class_weights = preprocess_and_split_data(data, parameters["batch_size"], task, alpha=parameters["alpha"])

    input_size = 10 # number of features :( -> this is hardcoded for now, try to get it from the dataset
    predictions = []
    labels = []
    torch.manual_seed(seed)
    model = TuneableModel(input_size, parameters["hidden_size"], parameters["dropout"], parameters["n_hidden"])
    model.to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=parameters["learning_rate"], 
                                  betas=(parameters["beta_1"], parameters["beta_2"]), weight_decay=1e-2)

    for i, dataloader in tqdm(enumerate(dataloaders)):
        pos_weight = class_weights[i] if weight is None else weight
        max_patience = 10 if i < 35 else 2
        last_loss = 1000000
        PATH = f"./models/model_{i}.pt"
        train_dataloader, validation_dataloader, test_dataloader = dataloader[0], dataloader[1], dataloader[2]
        for epoch in range(max_epochs):
            # training
            train_test(model, train_dataloader, optimizer, pos_weight, training="train")
            # validation at end of epoch
            with torch.no_grad():
                validation_loss = train_test(model, validation_dataloader, optimizer, pos_weight, training="validation")
    
            if validation_loss < last_loss:
                last_loss = validation_loss
                current_patience = 0
            else:
                if current_patience == 0:
                    torch.save({
                        'epoch': epoch,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'loss': last_loss,
                        }, PATH)
                current_patience += 1
            if current_patience == max_patience:
                break   

        # Testing once patience is reached
        torch.manual_seed(seed)
        model = TuneableModel(input_size, parameters["hidden_size"], parameters["dropout"], parameters["n_hidden"])
        model.to(device)
        optimizer = torch.optim.AdamW(model.parameters(), lr=parameters["learning_rate"], betas=(0.99, 0.99), weight_decay=1e-4)
        checkpoint = torch.load(PATH)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        with torch.no_grad():
            prediction_list, label_list = train_test(model, test_dataloader, optimizer, pos_weight, training="test")
        predictions.extend(prediction_list)
        labels.extend(label_list)

    return accuracy_score(labels, predictions), f1_score(labels, predictions), confusion_matrix(labels, predictions)


 ### Parameters:
    Patience = 10
    @ model_params(
        dropout: 0.0
        layer size: 500
        lr: 0.001
        batch_size: 16
        n_layers: 6)
    @ optimizer_AdamW(
        beta_1: 0.999,
        beta_2: 0.999, 
        weight_decay: 1e-2)
    @ BCEWithLogitsLoss(
        pos_weight: vairable)

### Performance:

    Task 1: Known subjects, known items

        Train:Eval:Test - 80:10:10 - 10-fold
            @ alpha = 0.21
        Acc: 98.25%
        F1: 0.9643
        Confusion:
            [[256   4]
            [  2  81]]

        Blind LOOCV: Train:Eval:Test - n-2:1:1
            @ pos_weight = 0.65
        Acc: 100.0%
        F1: 1.0
        Confusion:
            [[258   0]
            [  0  85]]
            
    Task 2: Left-out subjects, known items
    
        Train:eval:test - 90:10:(1 subject) <- sadface
            @ alpha = 0.21
        Acc: 98.54%
        F1: 0.9701
        Confusion:
            [[257   4]
            [  1  81]]
        
        Train:eval:test - 90:10:(2 subjects)
            @ alpha = 0.21
        Acc: 97.08%
        F1: 0.9405
        Confusion:
            [[254   6]
            [  4  79]]

    Task 3: Left-out items, known subjects, train:eval:test - 90:10:item
            @ pos_weight = 0.8, dropout =0.01
        Acc: 97.08%
        F1: 0.9419
        Confusion:
            [[252   4]
            [  6  81]]

In [8]:
# set up the directory for saving models
directory = "./models/"
if not os.path.exists(directory):
    os.makedirs(directory)

    
parameters = {
    "dropout": 0.0,
    "hidden_size": 500,
    "learning_rate": 0.001,
    "batch_size": 16,
    "n_hidden": 6,
    "beta_1": 0.999,
    "beta_2": 0.999,
    "alpha": 0.21
}


tasks = ["Known subjects and items, 10-fold CV", "Known subjects and items, LOOCV", "2 Held-out subjects, known items", "1 Held-out items, known subjects"]

for task in [0, 1, 2, 3]:
    if task == 0:
        parameters["alpha"] = 0.21
        parameters["pos_weight"] = 0.7
    elif task == 1:
        parameters["alpha"] = 1
        parameters["pos_weight"] = 0.65
    elif task == 2:
        parameters["alpha"] = 0.19
        parameters["pos_weight"] = None
    elif task == 3:
        parameters["alpha"] = 1
        parameters["pos_weight"] = .8
        parameters["dropout"] = 0.01 # dumb.
        parameters["beta_2"] = 0.99

    print(f"Task: {tasks[task]}")
    print(f'Training on {device} with parameters: \n@ model_params(\n\tdropout={parameters["dropout"]},\
      \n\thidden_size={parameters["hidden_size"]},\n\tlearning_rate={parameters["learning_rate"]},\
      \n\tbatch_size={parameters["batch_size"]},\n\tn_hidden={parameters["n_hidden"]}),')
    print(f'@ optimizer_AdamW(\n\tbeta_1={parameters["beta_1"]},\n\tbeta_2={parameters["beta_2"]})')
    print(f'@ BCEWithLogitsLoss(\n\talpha={parameters["alpha"]})\n')
    accuracy, f1, confusion = evaluate(data, parameters, task)
    print(f"Acc: {round(accuracy*100,2)}%\nF1: {round(f1,4)}")
    print("Confusion:\n", confusion, "\n\n")

    # remove all files in the models directory after training and evaluation
    shutil.rmtree(directory, ignore_errors=True)
    break

Task: Known subjects and items, 10-fold CV
Training on cuda with parameters: 
@ model_params(
	dropout=0.0,      
	hidden_size=500,
	learning_rate=0.001,      
	batch_size=16,
	n_hidden=6),
@ optimizer_AdamW(
	beta_1=0.999,
	beta_2=0.999)
@ BCEWithLogitsLoss(
	alpha=0.21)



10it [00:52,  5.23s/it]

Acc: 96.5%
F1: 0.9259
Confusion:
 [[256  10]
 [  2  75]] 





