# FAST: Feedforward-Augmented Sentence Transformers

## Imports & Setup

In [1]:
# REQUIRED IMPORTS & SETUP

import os
import pickle
import numpy as np 
import pandas as pd 
import warnings
import itertools
from tqdm import tqdm

import torch
from sentence_transformers import SentenceTransformer
from datasets import Dataset, load_dataset
from sklearn.metrics import accuracy_score

from utils.feed_forward import FeedForward
from utils.cls import extract_cls_embeddings
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Set Device ##########################################################
device_name = "cpu"  # default device is CPU
if torch.cuda.is_available():
    device_name = "cuda:0"  # CUDA for NVIDIA GPU
elif torch.backends.mps.is_available():
    device_name = torch.device("mps")  # Metal Performance Shaders for Apple M-series GPU
device = torch.device(device_name)
#######################################################################

# Load Models
Load models from HuggingFace and send to GPU device

In [2]:
mpnetv2 = SentenceTransformer("all-mpnet-base-v2").to(device)

## Load Data

In [3]:
data = load_dataset("glue", "mrpc")
data

DatasetDict({
    train: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 3668
    })
    validation: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 408
    })
    test: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 1725
    })
})

# Encode Sentence Embeddings

Generate sentence embeddings using sentence transformer model.

In [4]:
# for QNLI
# X_train = np.concatenate((mpnetv2.encode(data["train"]["question"]), mpnetv2.encode(data["train"]["sentence"])), axis=1)
# X_val = np.concatenate((mpnetv2.encode(data["validation"]["question"]), mpnetv2.encode(data["validation"]["sentence"])), axis=1)
# X_test = np.concatenate((mpnetv2.encode(data["test"]["question"]), mpnetv2.encode(data["test"]["sentence"])), axis=1)

# for MRPC
X_train = np.concatenate((mpnetv2.encode(data["train"]["sentence1"]), mpnetv2.encode(data["train"]["sentence2"])), axis=1)
X_val = np.concatenate((mpnetv2.encode(data["validation"]["sentence1"]), mpnetv2.encode(data["validation"]["sentence2"])), axis=1)
X_test = np.concatenate((mpnetv2.encode(data["test"]["sentence1"]), mpnetv2.encode(data["test"]["sentence2"])), axis=1)

Y_train = np.array(data["train"]["label"])
Y_val = np.array(data["validation"]["label"])
Y_test = np.array(data["test"]["label"])

In [21]:
X_train = mpnetv2.encode(data["train"]["sentence"])
X_val = mpnetv2.encode(data["validation"]["sentence"])
X_test = mpnetv2.encode(data["test"]["sentence"])

Y_train = np.array(data["train"]["label"])
Y_val = np.array(data["validation"]["label"])
Y_test = np.array(data["test"]["label"])

Save encodings and labels to disk for reuse. This is done because encoding embeddings takes a significant time but the encodings do not change throughout training, so we can cache it.

In [41]:
with open('./output/X_train.pt', 'wb') as X_train_file:
    torch.save(X_train, X_train_file)
with open('./output/X_val.pt', 'wb') as X_val_file:
    torch.save(X_val, X_val_file)
with open('./output/X_test.pt', 'wb') as X_test_file:
    torch.save(X_test, X_test_file)
with open('./output/Y_train.npy', 'wb') as Y_train_file:
    np.save(Y_train_file, Y_train)
with open('./output/Y_val.npy', 'wb') as Y_val_file:
    np.save(Y_val_file, Y_val)
with open('./output/Y_test.npy', 'wb') as Y_test_file:
    np.save(Y_test_file, Y_test)

Load saved encodings and labels from disk, if previously saved.

In [42]:
with open('./output/X_train.pt', 'rb') as X_train_file:
    X_train = torch.load(X_train_file)
with open('./output/X_val.pt', 'rb') as X_val_file:
    X_val = torch.load(X_val_file)
with open('./output/X_test.pt', 'rb') as X_test_file:
    X_test = torch.load(X_test_file)
with open('./output/Y_train.npy', 'rb') as Y_train_file:
    Y_train = np.load(Y_train_file)
with open('./output/Y_val.npy', 'rb') as Y_val_file:
    Y_val = np.load(Y_val_file)
with open('./output/Y_test.npy', 'rb') as Y_test_file:
    Y_test = np.load(Y_test_file)

## Define Hyperparameters
Defining hyperparameter grid for grid search

In [6]:
param_grid = {
    'num_epochs': [100],
    'batch_size': [32, 128, 512],
    'learning_rate': [1e-2, 1e-3, 1e-4, 1e-5],
    'category': ['C'],
    'norm': [False],
    'input_size': [768*2],
    'layer_size': [768*2],
    'num_layers': [1, 2, 3],
    'weight_decay':[1e-2, 1e-3, 1e-4, 1e-5],
    'patience': [3],
    'min_delta': [0],
    'device': [device_name]
}

# Create a list of all combinations of hyperparameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
print(f"{len(all_params)} hyperparameter combos")

best_params = None
highest_val_accuracy = 0

144 hyperparameter combos


# Training Loop for Hyperparameter Grid Search

In [7]:
verbose = True

# Iterate over all combinations of hyperparameters
bar = tqdm(all_params)
for params in bar:
    if verbose: print("\nTraining with parameters:\n", params)
    # Initialize the model with current set of hyperparameters
    feed_forward = FeedForward(**params)
    
    epoch, val_loss, val_accuracy, val_f1, val_mcc = feed_forward.fit(X_train, Y_train, X_val, Y_val)

    if verbose:
        print("Early stopped on epoch:", epoch)
        print("Validation accuracy:", val_accuracy)
        print("Validation f1-score:", val_f1)
        print("Validation MCC     :", val_mcc)
    
    # Save the parameters if they provide a better accuracy
    if val_accuracy > highest_val_accuracy:
        highest_val_accuracy = val_accuracy
        best_params = params

    bar.set_description(f"Current best val acc: {highest_val_accuracy:.5f}")

# Print the best parameters
print("\nBest Parameters:", best_params)
print("Highest Validation Accuracy:", highest_val_accuracy)

  0%|          | 0/144 [00:00<?, ?it/s]


Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.62745:   1%|          | 1/144 [00:00<01:28,  1.62it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6274509803921569
Validation f1-score: 0.742372881355932
Validation MCC     : 0.07839013055430459

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.65686:   1%|▏         | 2/144 [00:01<01:24,  1.68it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6568627450980392
Validation f1-score: 0.7741935483870968
Validation MCC     : 0.09698162152983697

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.65686:   2%|▏         | 3/144 [00:01<01:22,  1.70it/s]

Early stopped on epoch: 4
Validation accuracy: 0.5808823529411765
Validation f1-score: 0.6666666666666667
Validation MCC     : 0.11708259582604517

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.65686:   3%|▎         | 4/144 [00:02<01:21,  1.72it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6299019607843137
Validation f1-score: 0.7401032702237521
Validation MCC     : 0.10199378355454142

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.65686:   3%|▎         | 5/144 [00:03<01:36,  1.45it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6372549019607843
Validation f1-score: 0.7439446366782007
Validation MCC     : 0.1255225187286956

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.65686:   4%|▍         | 6/144 [00:04<01:44,  1.33it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6421568627450981
Validation f1-score: 0.745644599303136
Validation MCC     : 0.14455409692771182

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.65686:   5%|▍         | 7/144 [00:04<01:48,  1.26it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6397058823529411
Validation f1-score: 0.7434554973821991
Validation MCC     : 0.14044903135475256

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.65686:   6%|▌         | 8/144 [00:05<01:51,  1.22it/s]

Early stopped on epoch: 4
Validation accuracy: 0.5882352941176471
Validation f1-score: 0.6744186046511628
Validation MCC     : 0.12748837618352024

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.65686:   6%|▋         | 9/144 [00:06<02:02,  1.10it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6446078431372549
Validation f1-score: 0.7495682210708118
Validation MCC     : 0.1416174696288792

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.65686:   7%|▋         | 10/144 [00:08<02:09,  1.04it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6372549019607843
Validation f1-score: 0.7557755775577557
Validation MCC     : 0.07122162638502226

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.65686:   8%|▊         | 11/144 [00:09<02:24,  1.09s/it]

Early stopped on epoch: 5
Validation accuracy: 0.6274509803921569
Validation f1-score: 0.7351916376306621
Validation MCC     : 0.10921672631866063

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.65686:   8%|▊         | 12/144 [00:10<02:23,  1.09s/it]

Early stopped on epoch: 4
Validation accuracy: 0.6544117647058824
Validation f1-score: 0.7564766839378239
Validation MCC     : 0.1655132213280449

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68137:   9%|▉         | 13/144 [00:11<02:02,  1.07it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6813725490196079
Validation f1-score: 0.7962382445141066
Validation MCC     : 0.13794704207879543

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68137:  10%|▉         | 14/144 [00:11<01:47,  1.21it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6299019607843137
Validation f1-score: 0.747068676716918
Validation MCC     : 0.0704791778121475

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68137:  10%|█         | 15/144 [00:12<01:37,  1.32it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6568627450980392
Validation f1-score: 0.773462783171521
Validation MCC     : 0.10101700011146234

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68137:  11%|█         | 16/144 [00:12<01:30,  1.42it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6421568627450981
Validation f1-score: 0.7533783783783785
Validation MCC     : 0.11178770292773703

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68137:  12%|█▏        | 17/144 [00:13<01:36,  1.32it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6200980392156863
Validation f1-score: 0.7322970639032815
Validation MCC     : 0.08187809038096489

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68137:  12%|█▎        | 18/144 [00:14<01:39,  1.26it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6691176470588235
Validation f1-score: 0.7692307692307693
Validation MCC     : 0.19172686248267184

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68137:  13%|█▎        | 19/144 [00:15<01:42,  1.22it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6740196078431373
Validation f1-score: 0.768695652173913
Validation MCC     : 0.21955660196955432

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68137:  14%|█▍        | 20/144 [00:16<01:43,  1.19it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6642156862745098
Validation f1-score: 0.7658119658119658
Validation MCC     : 0.1795537283567879

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68137:  15%|█▍        | 21/144 [00:17<01:52,  1.09it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6568627450980392
Validation f1-score: 0.7674418604651164
Validation MCC     : 0.13141577911512117

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68137:  15%|█▌        | 22/144 [00:18<02:08,  1.05s/it]

Early stopped on epoch: 5
Validation accuracy: 0.678921568627451
Validation f1-score: 0.7841845140032948
Validation MCC     : 0.18196517966946543

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68137:  16%|█▌        | 23/144 [00:20<02:18,  1.15s/it]

Early stopped on epoch: 5
Validation accuracy: 0.6740196078431373
Validation f1-score: 0.7757166947723438
Validation MCC     : 0.19126745989892166

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68627:  17%|█▋        | 24/144 [00:21<02:25,  1.21s/it]

Early stopped on epoch: 5
Validation accuracy: 0.6862745098039216
Validation f1-score: 0.785953177257525
Validation MCC     : 0.21519788951608285

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68873:  17%|█▋        | 25/144 [00:22<02:11,  1.11s/it]

Early stopped on epoch: 6
Validation accuracy: 0.6887254901960784
Validation f1-score: 0.804915514592934
Validation MCC     : 0.14156677317614555

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68873:  18%|█▊        | 26/144 [00:23<02:01,  1.03s/it]

Early stopped on epoch: 6
Validation accuracy: 0.6813725490196079
Validation f1-score: 0.7962382445141066
Validation MCC     : 0.13794704207879543

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.68873:  19%|█▉        | 27/144 [00:24<01:54,  1.02it/s]

Early stopped on epoch: 6
Validation accuracy: 0.6764705882352942
Validation f1-score: 0.7943925233644861
Validation MCC     : 0.11395263148967148

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.69118:  19%|█▉        | 28/144 [00:25<01:49,  1.05it/s]

Early stopped on epoch: 6
Validation accuracy: 0.6911764705882353
Validation f1-score: 0.8061538461538462
Validation MCC     : 0.15238086095517642

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.69363:  20%|██        | 29/144 [00:26<01:54,  1.01it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6936274509803921
Validation f1-score: 0.7987117552334944
Validation MCC     : 0.2022984990145712

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.69363:  21%|██        | 30/144 [00:27<01:56,  1.03s/it]

Early stopped on epoch: 5
Validation accuracy: 0.6691176470588235
Validation f1-score: 0.7783251231527094
Validation MCC     : 0.15198628524239083

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.69363:  22%|██▏       | 31/144 [00:28<01:57,  1.04s/it]

Early stopped on epoch: 5
Validation accuracy: 0.6617647058823529
Validation f1-score: 0.77
Validation MCC     : 0.1479010078489271

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.69363:  22%|██▏       | 32/144 [00:29<01:58,  1.06s/it]

Early stopped on epoch: 5
Validation accuracy: 0.6470588235294118
Validation f1-score: 0.76
Validation MCC     : 0.10929345109757568

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.69363:  23%|██▎       | 33/144 [00:30<02:07,  1.15s/it]

Early stopped on epoch: 5
Validation accuracy: 0.6323529411764706
Validation f1-score: 0.743150684931507
Validation MCC     : 0.10233381945368783

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.69363:  24%|██▎       | 34/144 [00:31<02:05,  1.14s/it]

Early stopped on epoch: 4
Validation accuracy: 0.6053921568627451
Validation f1-score: 0.6979362101313322
Validation MCC     : 0.13384511736496305

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.69608:  24%|██▍       | 35/144 [00:33<02:11,  1.20s/it]

Early stopped on epoch: 5
Validation accuracy: 0.696078431372549
Validation f1-score: 0.7940199335548173
Validation MCC     : 0.23525046878632802

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.69608:  25%|██▌       | 36/144 [00:34<02:06,  1.17s/it]

Early stopped on epoch: 4
Validation accuracy: 0.6544117647058824
Validation f1-score: 0.7714748784440842
Validation MCC     : 0.09602046482612785

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70588:  26%|██▌       | 37/144 [00:38<03:52,  2.17s/it]

Early stopped on epoch: 32
Validation accuracy: 0.7058823529411765
Validation f1-score: 0.8187311178247733
Validation MCC     : 0.19990543703818464

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70588:  26%|██▋       | 38/144 [00:43<04:56,  2.80s/it]

Early stopped on epoch: 30
Validation accuracy: 0.7058823529411765
Validation f1-score: 0.8192771084337349
Validation MCC     : 0.19947144516088813

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70588:  27%|██▋       | 39/144 [00:47<05:42,  3.26s/it]

Early stopped on epoch: 31
Validation accuracy: 0.7034313725490197
Validation f1-score: 0.8174962292609351
Validation MCC     : 0.18844260071584729

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  28%|██▊       | 40/144 [00:51<06:18,  3.64s/it]

Early stopped on epoch: 32
Validation accuracy: 0.7083333333333334
Validation f1-score: 0.8199697428139183
Validation MCC     : 0.21103650383300465

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  28%|██▊       | 41/144 [00:55<06:16,  3.65s/it]

Early stopped on epoch: 17
Validation accuracy: 0.6985294117647058
Validation f1-score: 0.8133535660091046
Validation MCC     : 0.16986083096841023

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  29%|██▉       | 42/144 [00:59<06:19,  3.72s/it]

Early stopped on epoch: 18
Validation accuracy: 0.7009803921568627
Validation f1-score: 0.8157099697885196
Validation MCC     : 0.1779271513169291

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  30%|██▉       | 43/144 [01:03<06:26,  3.83s/it]

Early stopped on epoch: 19
Validation accuracy: 0.6838235294117647
Validation f1-score: 0.8024502297090353
Validation MCC     : 0.11919772430258552

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  31%|███       | 44/144 [01:07<06:17,  3.78s/it]

Early stopped on epoch: 17
Validation accuracy: 0.7009803921568627
Validation f1-score: 0.8140243902439025
Validation MCC     : 0.18299114964184818

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  31%|███▏      | 45/144 [01:10<05:49,  3.53s/it]

Early stopped on epoch: 11
Validation accuracy: 0.6887254901960784
Validation f1-score: 0.804915514592934
Validation MCC     : 0.14156677317614555

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  32%|███▏      | 46/144 [01:13<05:37,  3.44s/it]

Early stopped on epoch: 12
Validation accuracy: 0.6838235294117647
Validation f1-score: 0.7962085308056872
Validation MCC     : 0.15440435854986953

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  33%|███▎      | 47/144 [01:16<05:19,  3.29s/it]

Early stopped on epoch: 11
Validation accuracy: 0.7009803921568627
Validation f1-score: 0.8117283950617284
Validation MCC     : 0.1912697207374706

Training with parameters:
 {'num_epochs': 100, 'batch_size': 32, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  33%|███▎      | 48/144 [01:19<05:21,  3.34s/it]

Early stopped on epoch: 13
Validation accuracy: 0.6764705882352942
Validation f1-score: 0.7884615384615384
Validation MCC     : 0.1470557229164403

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  34%|███▍      | 49/144 [01:20<03:49,  2.42s/it]

Early stopped on epoch: 4
Validation accuracy: 0.6666666666666666
Validation f1-score: 0.7655172413793103
Validation MCC     : 0.1937645797079966

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  35%|███▍      | 50/144 [01:20<02:45,  1.77s/it]

Early stopped on epoch: 4
Validation accuracy: 0.6397058823529411
Validation f1-score: 0.7360861759425494
Validation MCC     : 0.16852706790792543

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  35%|███▌      | 51/144 [01:20<02:01,  1.31s/it]

Early stopped on epoch: 4
Validation accuracy: 0.6299019607843137
Validation f1-score: 0.7327433628318584
Validation MCC     : 0.13155652112452218

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  36%|███▌      | 52/144 [01:20<01:31,  1.01it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6813725490196079
Validation f1-score: 0.7804054054054054
Validation MCC     : 0.21156212522911347

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  37%|███▋      | 53/144 [01:21<01:11,  1.27it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6176470588235294
Validation f1-score: 0.7337883959044369
Validation MCC     : 0.061874776784440796

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  38%|███▊      | 54/144 [01:21<00:58,  1.55it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6642156862745098
Validation f1-score: 0.7750410509031198
Validation MCC     : 0.13858152856731615

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  38%|███▊      | 55/144 [01:21<00:48,  1.83it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6274509803921569
Validation f1-score: 0.7285714285714284
Validation MCC     : 0.13485430586346753

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  39%|███▉      | 56/144 [01:22<00:43,  2.00it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6299019607843137
Validation f1-score: 0.7364746945898778
Validation MCC     : 0.11695448244424045

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  40%|███▉      | 57/144 [01:22<00:40,  2.16it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6838235294117647
Validation f1-score: 0.7922705314009661
Validation MCC     : 0.1736693670000221

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  40%|████      | 58/144 [01:22<00:37,  2.28it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6740196078431373
Validation f1-score: 0.7808896210873146
Validation MCC     : 0.1686887502515216

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  41%|████      | 59/144 [01:23<00:35,  2.38it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6102941176470589
Validation f1-score: 0.7103825136612022
Validation MCC     : 0.11551063872038249

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  42%|████▏     | 60/144 [01:23<00:36,  2.31it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6593137254901961
Validation f1-score: 0.7725040916530278
Validation MCC     : 0.12145301226897084

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  42%|████▏     | 61/144 [01:24<00:32,  2.54it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6691176470588235
Validation f1-score: 0.7797716150081566
Validation MCC     : 0.14504423274473194

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  43%|████▎     | 62/144 [01:24<00:28,  2.87it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6764705882352942
Validation f1-score: 0.7931034482758621
Validation MCC     : 0.1217320051533709

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  44%|████▍     | 63/144 [01:24<00:25,  3.12it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6715686274509803
Validation f1-score: 0.7919254658385093
Validation MCC     : 0.0927750029861375

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  44%|████▍     | 64/144 [01:24<00:25,  3.15it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6691176470588235
Validation f1-score: 0.7819063004846526
Validation MCC     : 0.13436708536098824

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  45%|████▌     | 65/144 [01:25<00:25,  3.15it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6764705882352942
Validation f1-score: 0.777027027027027
Validation MCC     : 0.1990903224414414

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  46%|████▌     | 66/144 [01:25<00:24,  3.16it/s]

Early stopped on epoch: 4
Validation accuracy: 0.625
Validation f1-score: 0.725314183123878
Validation MCC     : 0.13458874426210432

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  47%|████▋     | 67/144 [01:25<00:24,  3.15it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6372549019607843
Validation f1-score: 0.7299270072992701
Validation MCC     : 0.17852313037231374

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  47%|████▋     | 68/144 [01:26<00:25,  2.95it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6642156862745098
Validation f1-score: 0.7658119658119658
Validation MCC     : 0.1795537283567879

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  48%|████▊     | 69/144 [01:26<00:26,  2.85it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6593137254901961
Validation f1-score: 0.7582608695652174
Validation MCC     : 0.1841218339301642

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  49%|████▊     | 70/144 [01:27<00:26,  2.79it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6495098039215687
Validation f1-score: 0.7432675044883303
Validation MCC     : 0.19115261700513952

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  49%|████▉     | 71/144 [01:27<00:26,  2.74it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6225490196078431
Validation f1-score: 0.7317073170731707
Validation MCC     : 0.09743760278231023

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  50%|█████     | 72/144 [01:27<00:28,  2.55it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6544117647058824
Validation f1-score: 0.7521968365553603
Validation MCC     : 0.18242262904601042

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  51%|█████     | 73/144 [01:28<00:32,  2.21it/s]

Early stopped on epoch: 10
Validation accuracy: 0.696078431372549
Validation f1-score: 0.8109756097560975
Validation MCC     : 0.16309762737862568

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  51%|█████▏    | 74/144 [01:29<00:34,  2.02it/s]

Early stopped on epoch: 10
Validation accuracy: 0.6985294117647058
Validation f1-score: 0.8122137404580153
Validation MCC     : 0.1741495355681114

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  52%|█████▏    | 75/144 [01:29<00:36,  1.90it/s]

Early stopped on epoch: 10
Validation accuracy: 0.696078431372549
Validation f1-score: 0.8109756097560975
Validation MCC     : 0.16309762737862568

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  53%|█████▎    | 76/144 [01:30<00:37,  1.83it/s]

Early stopped on epoch: 10
Validation accuracy: 0.696078431372549
Validation f1-score: 0.8109756097560975
Validation MCC     : 0.16309762737862568

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  53%|█████▎    | 77/144 [01:30<00:37,  1.77it/s]

Early stopped on epoch: 8
Validation accuracy: 0.6544117647058824
Validation f1-score: 0.7722132471728594
Validation MCC     : 0.09193537419436038

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  54%|█████▍    | 78/144 [01:31<00:36,  1.80it/s]

Early stopped on epoch: 7
Validation accuracy: 0.6764705882352942
Validation f1-score: 0.7931034482758621
Validation MCC     : 0.1217320051533709

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  55%|█████▍    | 79/144 [01:31<00:34,  1.90it/s]

Early stopped on epoch: 6
Validation accuracy: 0.6936274509803921
Validation f1-score: 0.8108925869894099
Validation MCC     : 0.14629748611581225

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  56%|█████▌    | 80/144 [01:32<00:35,  1.81it/s]

Early stopped on epoch: 8
Validation accuracy: 0.6666666666666666
Validation f1-score: 0.7854889589905362
Validation MCC     : 0.09787682587959345

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  56%|█████▋    | 81/144 [01:32<00:34,  1.81it/s]

Early stopped on epoch: 6
Validation accuracy: 0.6225490196078431
Validation f1-score: 0.7363013698630136
Validation MCC     : 0.07806634003659707

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  57%|█████▋    | 82/144 [01:33<00:32,  1.91it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6715686274509803
Validation f1-score: 0.7831715210355986
Validation MCC     : 0.14320219565442716

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  58%|█████▊    | 83/144 [01:33<00:32,  1.88it/s]

Early stopped on epoch: 6
Validation accuracy: 0.6348039215686274
Validation f1-score: 0.7408695652173914
Validation MCC     : 0.1250638871978474

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  58%|█████▊    | 84/144 [01:34<00:32,  1.87it/s]

Early stopped on epoch: 6
Validation accuracy: 0.6691176470588235
Validation f1-score: 0.78330658105939
Validation MCC     : 0.127037326384777

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  59%|█████▉    | 85/144 [01:38<01:28,  1.51s/it]

Early stopped on epoch: 65
Validation accuracy: 0.7034313725490197
Validation f1-score: 0.8169440242057489
Validation MCC     : 0.1894568312606072

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  60%|█████▉    | 86/144 [01:41<02:04,  2.15s/it]

Early stopped on epoch: 63
Validation accuracy: 0.7083333333333334
Validation f1-score: 0.8205128205128206
Validation MCC     : 0.21084486793381516

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  60%|██████    | 87/144 [01:45<02:30,  2.64s/it]

Early stopped on epoch: 65
Validation accuracy: 0.7058823529411765
Validation f1-score: 0.8187311178247733
Validation MCC     : 0.19990543703818464

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  61%|██████    | 88/144 [01:49<02:44,  2.94s/it]

Early stopped on epoch: 63
Validation accuracy: 0.7058823529411765
Validation f1-score: 0.8192771084337349
Validation MCC     : 0.19947144516088813

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  62%|██████▏   | 89/144 [01:51<02:33,  2.80s/it]

Early stopped on epoch: 34
Validation accuracy: 0.7034313725490197
Validation f1-score: 0.8185907046476761
Validation MCC     : 0.1874105071769544

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  62%|██████▎   | 90/144 [01:54<02:30,  2.79s/it]

Early stopped on epoch: 38
Validation accuracy: 0.7034313725490197
Validation f1-score: 0.8158295281582952
Validation MCC     : 0.19215515813765438

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  63%|██████▎   | 91/144 [01:56<02:21,  2.67s/it]

Early stopped on epoch: 33
Validation accuracy: 0.7058823529411765
Validation f1-score: 0.8192771084337349
Validation MCC     : 0.19947144516088813

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  64%|██████▍   | 92/144 [01:59<02:13,  2.57s/it]

Early stopped on epoch: 32
Validation accuracy: 0.7058823529411765
Validation f1-score: 0.8214285714285715
Validation MCC     : 0.2032822906724814

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  65%|██████▍   | 93/144 [02:01<02:02,  2.40s/it]

Early stopped on epoch: 23
Validation accuracy: 0.7058823529411765
Validation f1-score: 0.8159509202453988
Validation MCC     : 0.20580069088252695

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  65%|██████▌   | 94/144 [02:03<01:53,  2.27s/it]

Early stopped on epoch: 23
Validation accuracy: 0.6985294117647058
Validation f1-score: 0.8139183055975796
Validation MCC     : 0.1678771586882097

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  66%|██████▌   | 95/144 [02:05<01:45,  2.16s/it]

Early stopped on epoch: 22
Validation accuracy: 0.7009803921568627
Validation f1-score: 0.8162650602409639
Validation MCC     : 0.1766170925561023

Training with parameters:
 {'num_epochs': 100, 'batch_size': 128, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  67%|██████▋   | 97/144 [02:07<01:10,  1.50s/it]

Early stopped on epoch: 21
Validation accuracy: 0.7058823529411765
Validation f1-score: 0.8198198198198199
Validation MCC     : 0.19944338327248648

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}
Early stopped on epoch: 5
Validation accuracy: 0.6470588235294118
Validation f1-score: 0.7591973244147158
Validation MCC     : 0.11308916566370686

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  69%|██████▉   | 99/144 [02:07<00:38,  1.17it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6519607843137255
Validation f1-score: 0.7641196013289036
Validation MCC     : 0.11843644290622031

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}
Early stopped on epoch: 5
Validation accuracy: 0.6691176470588235
Validation f1-score: 0.7753743760399335
Validation MCC     : 0.16553707884319585

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  69%|██████▉   | 100/144 [02:07<00:28,  1.55it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6813725490196079
Validation f1-score: 0.7949526813880127
Validation MCC     : 0.14491303902314365

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  70%|███████   | 101/144 [02:08<00:22,  1.90it/s]

Early stopped on epoch: 6
Validation accuracy: 0.6568627450980392
Validation f1-score: 0.75
Validation MCC     : 0.20316039747401346

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  71%|███████   | 102/144 [02:08<00:18,  2.30it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6568627450980392
Validation f1-score: 0.765886287625418
Validation MCC     : 0.13861634662680086

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  72%|███████▏  | 103/144 [02:08<00:15,  2.72it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6666666666666666
Validation f1-score: 0.7663230240549829
Validation MCC     : 0.19052183232255254

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  72%|███████▏  | 104/144 [02:08<00:12,  3.11it/s]

Early stopped on epoch: 5
Validation accuracy: 0.5906862745098039
Validation f1-score: 0.6913123844731979
Validation MCC     : 0.08618951344938493

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  73%|███████▎  | 105/144 [02:09<00:11,  3.33it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6348039215686274
Validation f1-score: 0.7381370826010545
Validation MCC     : 0.135919390901198

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  74%|███████▎  | 106/144 [02:09<00:10,  3.50it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6666666666666666
Validation f1-score: 0.7740863787375415
Validation MCC     : 0.15737445153292287

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  74%|███████▍  | 107/144 [02:09<00:10,  3.47it/s]

Early stopped on epoch: 6
Validation accuracy: 0.6470588235294118
Validation f1-score: 0.7575757575757576
Validation MCC     : 0.1205696661658673

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.01, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  75%|███████▌  | 108/144 [02:09<00:09,  3.60it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6911764705882353
Validation f1-score: 0.801261829652997
Validation MCC     : 0.17627051445217715

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  76%|███████▋  | 110/144 [02:10<00:08,  4.12it/s]

Early stopped on epoch: 7
Validation accuracy: 0.6862745098039216
Validation f1-score: 0.8006230529595016
Validation MCC     : 0.14760639453548108

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}
Early stopped on epoch: 5
Validation accuracy: 0.7009803921568627
Validation f1-score: 0.8134556574923548
Validation MCC     : 0.18493920683163825

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  78%|███████▊  | 112/144 [02:10<00:07,  4.21it/s]

Early stopped on epoch: 5
Validation accuracy: 0.696078431372549
Validation f1-score: 0.8103975535168195
Validation MCC     : 0.1656065918207983

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}
Early stopped on epoch: 5
Validation accuracy: 0.696078431372549
Validation f1-score: 0.8115501519756839
Validation MCC     : 0.16064059415455423

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  78%|███████▊  | 113/144 [02:10<00:06,  4.56it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6813725490196079
Validation f1-score: 0.7999999999999999
Validation MCC     : 0.11566908577376987

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  80%|███████▉  | 115/144 [02:11<00:06,  4.69it/s]

Early stopped on epoch: 6
Validation accuracy: 0.6813725490196079
Validation f1-score: 0.7840531561461793
Validation MCC     : 0.19631246015962545

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}
Early stopped on epoch: 4
Validation accuracy: 0.6985294117647058
Validation f1-score: 0.8161434977578476
Validation MCC     : 0.16193585913035055

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  81%|████████  | 116/144 [02:11<00:05,  4.94it/s]

Early stopped on epoch: 4
Validation accuracy: 0.696078431372549
Validation f1-score: 0.812121212121212
Validation MCC     : 0.15825082044051875

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  81%|████████▏ | 117/144 [02:11<00:05,  4.90it/s]

Early stopped on epoch: 4
Validation accuracy: 0.6985294117647058
Validation f1-score: 0.8133535660091046
Validation MCC     : 0.16986083096841023

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  82%|████████▏ | 118/144 [02:11<00:05,  4.58it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6666666666666666
Validation f1-score: 0.7740863787375415
Validation MCC     : 0.15737445153292287

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  83%|████████▎ | 119/144 [02:12<00:06,  3.75it/s]

Early stopped on epoch: 5
Validation accuracy: 0.6838235294117647
Validation f1-score: 0.7860696517412935
Validation MCC     : 0.20129428483326567

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  83%|████████▎ | 120/144 [02:12<00:06,  3.64it/s]

Early stopped on epoch: 6
Validation accuracy: 0.6936274509803921
Validation f1-score: 0.7987117552334944
Validation MCC     : 0.2022984990145712

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  84%|████████▍ | 121/144 [02:13<00:09,  2.40it/s]

Early stopped on epoch: 22
Validation accuracy: 0.7034313725490197
Validation f1-score: 0.8163884673748103
Validation MCC     : 0.19071017484178912

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.70833:  85%|████████▍ | 122/144 [02:14<00:11,  1.95it/s]

Early stopped on epoch: 21
Validation accuracy: 0.7034313725490197
Validation f1-score: 0.8169440242057489
Validation MCC     : 0.1894568312606072

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  85%|████████▌ | 123/144 [02:14<00:11,  1.83it/s]

Early stopped on epoch: 18
Validation accuracy: 0.7107843137254902
Validation f1-score: 0.8206686930091186
Validation MCC     : 0.222181779520806

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  86%|████████▌ | 124/144 [02:15<00:12,  1.58it/s]

Early stopped on epoch: 21
Validation accuracy: 0.7058823529411765
Validation f1-score: 0.8187311178247733
Validation MCC     : 0.19990543703818464

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  87%|████████▋ | 125/144 [02:16<00:12,  1.55it/s]

Early stopped on epoch: 17
Validation accuracy: 0.6838235294117647
Validation f1-score: 0.8030534351145038
Validation MCC     : 0.11533081825702743

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  88%|████████▊ | 126/144 [02:16<00:11,  1.59it/s]

Early stopped on epoch: 11
Validation accuracy: 0.6985294117647058
Validation f1-score: 0.8183161004431315
Validation MCC     : 0.16493341660730046

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  88%|████████▊ | 127/144 [02:17<00:10,  1.64it/s]

Early stopped on epoch: 14
Validation accuracy: 0.7058823529411765
Validation f1-score: 0.8203592814371258
Validation MCC     : 0.1999411754004219

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  89%|████████▉ | 128/144 [02:17<00:09,  1.71it/s]

Early stopped on epoch: 13
Validation accuracy: 0.7034313725490197
Validation f1-score: 0.8207407407407408
Validation MCC     : 0.1936103315055931

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  90%|████████▉ | 129/144 [02:18<00:08,  1.81it/s]

Early stopped on epoch: 10
Validation accuracy: 0.6985294117647058
Validation f1-score: 0.8183161004431315
Validation MCC     : 0.16493341660730046

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  90%|█████████ | 130/144 [02:18<00:07,  1.90it/s]

Early stopped on epoch: 10
Validation accuracy: 0.6985294117647058
Validation f1-score: 0.8183161004431315
Validation MCC     : 0.16493341660730046

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  91%|█████████ | 131/144 [02:19<00:06,  1.96it/s]

Early stopped on epoch: 10
Validation accuracy: 0.7009803921568627
Validation f1-score: 0.8200589970501474
Validation MCC     : 0.18498419046491685

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  92%|█████████▏| 132/144 [02:20<00:06,  1.78it/s]

Early stopped on epoch: 12
Validation accuracy: 0.7034313725490197
Validation f1-score: 0.8191330343796712
Validation MCC     : 0.18760401395753898

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  92%|█████████▏| 133/144 [02:23<00:15,  1.45s/it]

Early stopped on epoch: 100
Validation accuracy: 0.7034313725490197
Validation f1-score: 0.8202080237741455
Validation MCC     : 0.19034860044670815

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  93%|█████████▎| 134/144 [02:27<00:20,  2.09s/it]

Early stopped on epoch: 100
Validation accuracy: 0.7009803921568627
Validation f1-score: 0.8189910979228486
Validation MCC     : 0.17676105706034642

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  94%|█████████▍| 135/144 [02:30<00:22,  2.55s/it]

Early stopped on epoch: 100
Validation accuracy: 0.7034313725490197
Validation f1-score: 0.8202080237741455
Validation MCC     : 0.19034860044670815

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  94%|█████████▍| 136/144 [02:34<00:22,  2.87s/it]

Early stopped on epoch: 100
Validation accuracy: 0.7034313725490197
Validation f1-score: 0.8202080237741455
Validation MCC     : 0.19034860044670815

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  95%|█████████▌| 137/144 [02:37<00:21,  3.08s/it]

Early stopped on epoch: 86
Validation accuracy: 0.7034313725490197
Validation f1-score: 0.8185907046476761
Validation MCC     : 0.1874105071769544

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  96%|█████████▌| 138/144 [02:41<00:19,  3.17s/it]

Early stopped on epoch: 85
Validation accuracy: 0.7058823529411765
Validation f1-score: 0.8214285714285715
Validation MCC     : 0.2032822906724814

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  97%|█████████▋| 139/144 [02:44<00:16,  3.23s/it]

Early stopped on epoch: 81
Validation accuracy: 0.7058823529411765
Validation f1-score: 0.8214285714285715
Validation MCC     : 0.2032822906724814

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 2, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  97%|█████████▋| 140/144 [02:48<00:13,  3.28s/it]

Early stopped on epoch: 84
Validation accuracy: 0.7083333333333334
Validation f1-score: 0.8226527570789866
Validation MCC     : 0.2156477983472322

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.01, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  98%|█████████▊| 141/144 [02:50<00:09,  3.00s/it]

Early stopped on epoch: 51
Validation accuracy: 0.6985294117647058
Validation f1-score: 0.8183161004431315
Validation MCC     : 0.16493341660730046

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  99%|█████████▊| 142/144 [02:52<00:05,  2.84s/it]

Early stopped on epoch: 53
Validation accuracy: 0.7009803921568627
Validation f1-score: 0.8195266272189349
Validation MCC     : 0.179710163362961

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078:  99%|█████████▉| 143/144 [02:55<00:02,  2.78s/it]

Early stopped on epoch: 57
Validation accuracy: 0.7058823529411765
Validation f1-score: 0.8219584569732937
Validation MCC     : 0.20677292192951885

Training with parameters:
 {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 1e-05, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 3, 'weight_decay': 1e-05, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}


Current best val acc: 0.71078: 100%|██████████| 144/144 [02:57<00:00,  1.24s/it]

Early stopped on epoch: 50
Validation accuracy: 0.6985294117647058
Validation f1-score: 0.8183161004431315
Validation MCC     : 0.16493341660730046

Best Parameters: {'num_epochs': 100, 'batch_size': 512, 'learning_rate': 0.0001, 'category': 'C', 'norm': False, 'input_size': 1536, 'layer_size': 1536, 'num_layers': 1, 'weight_decay': 0.0001, 'patience': 3, 'min_delta': 0, 'device': 'cuda:0'}
Highest Validation Accuracy: 0.7107843137254902





# Evaluate Best Hyperparameters

In [48]:
best_feed_forward = FeedForward(**best_params)

X = np.concatenate((X_train, X_val), axis=0)
Y = np.concatenate((Y_train, Y_val), axis=0)

best_feed_forward.fit(X, Y)

preds = np.argmax(best_feed_forward.predict_proba(X_test), axis=1)
print(preds.shape)


df = pd.DataFrame({
    'index': range(len(preds)),
    'prediction': preds
})

# Write the DataFrame to a .tsv file, without the header and index
df.to_csv('QNLI.tsv', sep='\t', index=False, header=True)

(5463,)
