<a href="https://colab.research.google.com/github/ashwinkd/ADReSSo2021/blob/master/FineTuneLOOCV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/d8/b2/57495b5309f09fa501866e225c84532d1fd89536ea62406b2181933fb418/transformers-4.5.1-py3-none-any.whl (2.1MB)
[K     |████████████████████████████████| 2.1MB 9.2MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)
[K     |████████████████████████████████| 901kB 32.5MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/ae/04/5b870f26a858552025a62f1649c20d29d2672c02ff3c3fb4c688ca46467a/tokenizers-0.10.2-cp37-cp37m-manylinux2010_x86_64.whl (3.3MB)
[K     |████████████████████████████████| 3.3MB 50.9MB/s 
Installing collected packages: sacremoses, tokenizers, transformers
Successfully installed sacremoses-0.0.45 tokenizers-0.10.2 transformers-4.5.1


In [None]:
# models = ['Bert', 'Roberta', 'DistilBERT']
# _ = [print(f"[{i + 1}] {m}") for i, m in enumerate(models)]
# model_num = int(input("Choose model: ")) - 1
# if model_num not in range(len(models)):
#     raise Exception("Incorrect model chosen.")

# model_name = models[model_num]
#####################################################################
import random
import re

import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.data import TensorDataset, random_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from google.colab import drive
import datetime
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup
from sklearn.model_selection import LeaveOneOut
import time
import itertools
import matplotlib.pyplot as plt
% matplotlib inline
from sklearn.model_selection import KFold
import pickle
from sklearn.metrics import confusion_matrix
import seaborn as sns
drive.mount('/content/drive')

#################### Global Variables ####################

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
UNUSED_TOKEN = {1: '[unused0]',
                2: '[unused1]',
                3: '[unused2]'}
to_categorical = {'cn': 0,
                  'ad': 1}

#################### Read Data ####################

df = pd.read_pickle('/content/drive/MyDrive/Research/ADReSSo/data.pickle')
df.dx = df.dx.apply(lambda x: to_categorical[x])

#################### Train and Test Sets ####################

# train_text, temp_text, train_labels, temp_labels = train_test_split(df['transcript'], df['dx'],
#                                                                     random_state=seed_val,
#                                                                     test_size=0.3,
#                                                                     stratify=df['dx'])


#################### BERT features ####################


def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)


def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))

    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

def get_bert_tokenizer():
    from transformers import BertTokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    return tokenizer


def get_roberta_tokenizer():
    from transformers import RobertaTokenizer
    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    return tokenizer

def get_distilbert_tokenizer():
    from transformers import DistilBertTokenizer
    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
    return tokenizer




def encode_sentence(transcript, tokenizer):
    tokens = []
    continous_speech = re.split(r'\[P\d\]', transcript)
    if not continous_speech:
        transcript = str.encode(transcript, 'utf-8')
        tokens = list(tokenizer.tokenize(transcript))
    else:
        for idx, speech in enumerate(continous_speech):
            tokens += list(tokenizer.tokenize(speech))
            if idx + 1 < len(continous_speech):
                surrounding_speech = r'{}\[P\d\]{}'.format(continous_speech[idx], continous_speech[idx + 1])
                surrounding_speech = re.findall(surrounding_speech, transcript)[0]
                pause_num = int(re.sub('[^0-9]', '', surrounding_speech))
                tokens.append(UNUSED_TOKEN[pause_num])
    tokens = ['[CLS]'] + tokens + ['[SEP]']
    token_ids = tokenizer.convert_tokens_to_ids(tokens)
    return token_ids


def add_padding(input_word_ids, max_seq_len):
    input_type = []
    for idx, embedding in enumerate(input_word_ids):
        embedding_len = len(embedding)
        e_input_type = np.ones(embedding_len, dtype=np.int64).tolist()
        if embedding_len < max_seq_len:
            zeros = np.zeros(max_seq_len - embedding_len, dtype=np.int64).tolist()
            e_input_type += zeros
            embedding += zeros
        elif embedding_len > max_seq_len:
            embedding = embedding[:max_seq_len - 1] + [102]
            e_input_type = e_input_type[:max_seq_len]
        input_type.append(torch.tensor([e_input_type]))
        input_word_ids[idx] = torch.tensor([embedding])
    return {'input_ids': input_word_ids, 'attention_mask': input_type}


def bert_encode(transcripts, tokenizer, max_seq_len):
    input_word_ids = [encode_sentence(s, tokenizer)
                      for s in transcripts]
    input_word_ids = add_padding(input_word_ids, max_seq_len)
    return input_word_ids
############################# Models ##################################
def get_bert_model():
    from transformers import BertForSequenceClassification
    model = BertForSequenceClassification.from_pretrained(
        "bert-base-uncased",  # Use the 12-layer BERT model, with an uncased vocab.
        num_labels=2,  # The number of output labels--2 for binary classification.
        # You can increase this for multi-class tasks.
        output_attentions=False,  # Whether the model returns attentions weights.
        output_hidden_states=False,  # Whether the model returns all hidden-states.
    )
    model.cuda()
    return model


def get_roberta_model():
    from transformers import RobertaForSequenceClassification
    model = RobertaForSequenceClassification.from_pretrained(
        "roberta-base",  # Use the 12-layer BERT model, with an uncased vocab.
        num_labels=2,  # The number of output labels--2 for binary classification.
        # You can increase this for multi-class tasks.
        output_attentions=False,  # Whether the model returns attentions weights.
        output_hidden_states=False,  # Whether the model returns all hidden-states.
    )
    model.cuda()
    return model

def get_distilbert_model():
    from transformers import DistilBertForSequenceClassification
    model = DistilBertForSequenceClassification.from_pretrained(
        "distilbert-base-uncased",  # Use the 12-layer BERT model, with an uncased vocab.
        num_labels=2,  # The number of output labels--2 for binary classification.
        # You can increase this for multi-class tasks.
        output_attentions=False,  # Whether the model returns attentions weights.
        output_hidden_states=False,  # Whether the model returns all hidden-states.
    )
    model.cuda()
    return model


Mounted at /content/drive


In [None]:
random_states = [0, 42, 2018, 56, 271, 27]
models = ['Bert', 'Roberta', 'DistilBERT']
batch_sizes = [8, 16, 32, 64]
learning_rates = [5e-5, 3e-5, 2e-5]
max_seq_lens = [64, 128, 256, 512]
epochses = [1,2,3,4]
loo = LeaveOneOut()
params = [models, learning_rates, max_seq_lens, epochses, batch_sizes, batch_sizes, random_states]

X = df.transcript.to_numpy()
y = df.dx.to_numpy()
best_params = None
best_f1 = 0

for param_list in list(itertools.product(*params)):
    try:
        model_name, lr, max_seq_len, epochs, batch_size_tr, batch_size_ts, seed_val = param_list
        pred = []
        target = []
        print(f"SEED: {seed_val}\nMODEL: {model_name}\nBATCH SIZE TR: {batch_size_tr}\nBATCH SIZE TS: {batch_size_ts}\nLEARNING RATE: {lr}\nEMBEDDING LEN: {max_seq_len}\nEPOCH: {epochs}")
        tokenizer = locals()[f"get_{model_name.lower()}_tokenizer"]()
        model = locals()[f"get_{model_name.lower()}_model"]()
        total_val_acc = []
        total_val_f1 = []
        random.seed(seed_val)
        np.random.seed(seed_val)
        torch.manual_seed(seed_val)
        torch.cuda.manual_seed_all(seed_val)
        for train_index, test_index in loo.split(X):
            train_text, temp_text = X[train_index], X[test_index]
            train_labels, temp_labels = y[train_index], y[test_index]

            ########################################################################
            #                           Data
            ########################################################################                                   
            tokens_train = bert_encode(train_text.tolist(), tokenizer, max_seq_len)
            tokens_test = bert_encode(temp_text.tolist(), tokenizer, max_seq_len)

            #################### Torch Dataset ####################

            input_ids = torch.cat(tokens_train['input_ids'], dim=0)
            attention_masks = torch.cat(tokens_train['attention_mask'], dim=0)
            labels = torch.tensor(train_labels.tolist())

            input_ids_test = torch.cat(tokens_test['input_ids'], dim=0)
            attention_masks_test = torch.cat(tokens_test['attention_mask'], dim=0)
            labels_test = torch.tensor(temp_labels.tolist())

            ####################
            dataset = TensorDataset(input_ids, attention_masks, labels)
            train_size = int(0.9 * len(dataset))
            val_size = len(dataset) - train_size
            train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
            ####################
            train_dataloader = DataLoader(
                train_dataset,  # The training samples.
                sampler=RandomSampler(train_dataset),  # Select batches randomly
                batch_size=batch_size_tr  # Trains with this batch size.
            )

            validation_dataloader = DataLoader(
                val_dataset,  # The validation samples.
                sampler=SequentialSampler(val_dataset),  # Pull out batches sequentially.
                batch_size=batch_size_tr  # Evaluate with this batch size.
            )
            ###############################################################################

            total_steps = len(train_dataloader) * epochs

            #################### Training stuff ####################
            optimizer = AdamW(model.parameters(),
                            lr=lr,  # args.learning_rate - default is 5e-5, our notebook had 2e-5
                            eps=1e-8  # args.adam_epsilon  - default is 1e-8.
                            )

            scheduler = get_linear_schedule_with_warmup(optimizer,
                                                        num_warmup_steps=0,  # Default value in run_glue.py
                                                        num_training_steps=total_steps)




            training_stats = []

            # Measure the total training time for the whole run.
            total_t0 = time.time()

            # For each epoch...
            for epoch_i in range(0, epochs):

                # ========================================
                #               Training
                # ========================================

                # Perform one full pass over the training set.

                # print("")
                # print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
                # print('Training...')

                # Measure how long the training epoch takes.
                t0 = time.time()

                # Reset the total loss for this epoch.
                total_train_loss = 0

                # Put the model into training mode. Don't be mislead--the call to
                # `train` just changes the *mode*, it doesn't *perform* the training.
                # `dropout` and `batchnorm` layers behave differently during training
                # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)
                model.train()

                # For each batch of training data...
                for step, batch in enumerate(train_dataloader):

                    # Progress update every 40 batches.
                    if step % 40 == 0 and not step == 0:
                        # Calculate elapsed time in minutes.
                        elapsed = format_time(time.time() - t0)

                        # Report progress.
                        # print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))

                    # Unpack this training batch from our dataloader.
                    #
                    # As we unpack the batch, we'll also copy each tensor to the GPU using the
                    # `to` method.
                    #
                    # `batch` contains three pytorch tensors:
                    #   [0]: input ids
                    #   [1]: attention masks
                    #   [2]: labels
                    b_input_ids = batch[0].to(device)
                    b_input_mask = batch[1].to(device)
                    b_labels = batch[2].to(device)

                    # Always clear any previously calculated gradients before performing a
                    # backward pass. PyTorch doesn't do this automatically because
                    # accumulating the gradients is "convenient while training RNNs".
                    # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
                    model.zero_grad()

                    # Perform a forward pass (evaluate the model on this training batch).
                    # In PyTorch, calling `model` will in turn call the model's `forward`
                    # function and pass down the arguments. The `forward` function is
                    # documented here:
                    # https://huggingface.co/transformers/model_doc/bert.html#bertforsequenceclassification
                    # The results are returned in a results object, documented here:
                    # https://huggingface.co/transformers/main_classes/output.html#transformers.modeling_outputs.SequenceClassifierOutput
                    # Specifically, we'll get the loss (because we provided labels) and the
                    # "logits"--the model outputs prior to activation.
                    try:
                        result = model(b_input_ids,
                                    token_type_ids=None,
                                    attention_mask=b_input_mask,
                                    labels=b_labels,
                                    return_dict=True)
                    except:
                        result = model(b_input_ids,
                                        #    token_type_ids=None,
                                        attention_mask=b_input_mask,
                                        labels=b_labels,
                                        return_dict=True)                    

                    loss = result.loss
                    logits = result.logits

                    # Accumulate the training loss over all of the batches so that we can
                    # calculate the average loss at the end. `loss` is a Tensor containing a
                    # single value; the `.item()` function just returns the Python value
                    # from the tensor.
                    total_train_loss += loss.item()

                    # Perform a backward pass to calculate the gradients.
                    loss.backward()

                    # Clip the norm of the gradients to 1.0.
                    # This is to help prevent the "exploding gradients" problem.
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

                    # Update parameters and take a step using the computed gradient.
                    # The optimizer dictates the "update rule"--how the parameters are
                    # modified based on their gradients, the learning rate, etc.
                    optimizer.step()

                    # Update the learning rate.
                    scheduler.step()

                # Calculate the average loss over all of the batches.
                avg_train_loss = total_train_loss / len(train_dataloader)

                # Measure how long this epoch took.
                training_time = format_time(time.time() - t0)

                # print("")
                # print("  Average training loss: {0:.2f}".format(avg_train_loss))
                # print("  Training epcoh took: {:}".format(training_time))

                # ========================================
                #               Validation
                # ========================================
                # After the completion of each training epoch, measure our performance on
                # our validation set.

                # print("")
                # print("Running Validation...")

                t0 = time.time()

                # Put the model in evaluation mode--the dropout layers behave differently
                # during evaluation.
                model.eval()

                # Tracking variables
                total_eval_accuracy = 0
                total_eval_loss = 0
                nb_eval_steps = 0

                # Evaluate data for one epoch
                for batch in validation_dataloader:
                    # Unpack this training batch from our dataloader.
                    #
                    # As we unpack the batch, we'll also copy each tensor to the GPU using
                    # the `to` method.
                    #
                    # `batch` contains three pytorch tensors:
                    #   [0]: input ids
                    #   [1]: attention masks
                    #   [2]: labels
                    b_input_ids = batch[0].to(device)
                    b_input_mask = batch[1].to(device)
                    b_labels = batch[2].to(device)

                    # Tell pytorch not to bother with constructing the compute graph during
                    # the forward pass, since this is only needed for backprop (training).
                    with torch.no_grad():
                        # Forward pass, calculate logit predictions.
                        # token_type_ids is the same as the "segment ids", which
                        # differentiates sentence 1 and 2 in 2-sentence tasks.
                        result = model(b_input_ids,
                                    #    token_type_ids=None,
                                    attention_mask=b_input_mask,
                                    labels=b_labels,
                                    return_dict=True)

                    # Get the loss and "logits" output by the model. The "logits" are the
                    # output values prior to applying an activation function like the
                    # softmax.
                    loss = result.loss
                    logits = result.logits

                    # Accumulate the validation loss.
                    total_eval_loss += loss.item()

                    # Move logits and labels to CPU
                    logits = logits.detach().cpu().numpy()
                    label_ids = b_labels.to('cpu').numpy()

                    # Calculate the accuracy for this batch of test sentences, and
                    # accumulate it over all batches.
                    total_eval_accuracy += flat_accuracy(logits, label_ids)

                # Report the final accuracy for this validation run.
                avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
                print("  Accuracy: {0:.2f}".format(avg_val_accuracy))

                # Calculate the average loss over all of the batches.
                avg_val_loss = total_eval_loss / len(validation_dataloader)

                # Measure how long the validation run took.
                validation_time = format_time(time.time() - t0)

                print("  Validation Loss: {0:.2f}".format(avg_val_loss))
                # print("  Validation took: {:}".format(validation_time))

                # Record all statistics from this epoch.
                training_stats.append(
                    {
                        'epoch': epoch_i + 1,
                        'Training Loss': avg_train_loss,
                        'Valid. Loss': avg_val_loss,
                        'Valid. Accur.': avg_val_accuracy,
                        'Training Time': training_time,
                        'Validation Time': validation_time
                    }
                )

            # print("")
            # print("Training complete!")

            # print("Total training took {:} (h:mm:ss)".format(format_time(time.time() - total_t0)))

            # Create the DataLoader.
            prediction_data = TensorDataset(input_ids_test, attention_masks_test, labels_test)
            prediction_sampler = SequentialSampler(prediction_data)
            prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size_ts)
            ################################################################################
            # Prediction on test set

            # print('Predicting labels for {:,} test sentences...'.format(len(input_ids)))

            # Put model in evaluation mode
            model.eval()

            # Tracking variables 
            predictions , true_labels = [], []

            # Predict 
            for batch in prediction_dataloader:
                # Add batch to GPU
                batch = tuple(t.to(device) for t in batch)
            
                # Unpack the inputs from our dataloader
                b_input_ids, b_input_mask, b_labels = batch
                
                # Telling the model not to compute or store gradients, saving memory and 
                # speeding up prediction
                with torch.no_grad():

                    # Forward pass, calculate logit predictions.
                    try:
                        result = model(b_input_ids, 
                                        token_type_ids=None, 
                                        attention_mask=b_input_mask,
                                        return_dict=True)
                    except:
                        result = model(b_input_ids, 
                                        #  token_type_ids=None, 
                                        attention_mask=b_input_mask,
                                        return_dict=True)

                logits = result.logits

                # Move logits and labels to CPU
                logits = logits.detach().cpu().numpy()
                label_ids = b_labels.to('cpu').numpy()
                
                # Store predictions and true labels
                predictions.append(logits)
                true_labels.append(label_ids)

            # print('    DONE.')
            ###############################################################################
            
            f1_set = []
            acc_set = []

            # Evaluate each test batch using Matthew's correlation coefficient
            # print('Calculating Acc and F1 Corr. Coef. for each batch...')

            # For each input batch...
            for i in range(len(true_labels)):
                # The predictions for this batch are a 2-column ndarray (one column for "0" 
                # and one column for "1"). Pick the label with the highest value and turn this
                # in to a list of 0s and 1s.
                pred_labels_i = np.argmax(predictions[i], axis=1).flatten()
                
                # Calculate and store the acc for this batch.
                acc = accuracy_score(true_labels[i], pred_labels_i)          
                acc_set.append(acc)

                f1 = f1_score(true_labels[i], pred_labels_i)          
                f1_set.append(f1)
                pred += pred_labels_i
                target += true_labels[i]
                #############################################################################
                # Create a barplot showing the Accuracy score for each batch of test samples.
            
            print(f"Acc: {np.mean(acc_set)}\tF1: {np.mean(f1_set)}")
            total_val_acc += acc_set
            total_val_f1 += f1_set
        print("\n\n")
        print("AVG Acc: {}".format(np.mean(total_val_acc)))
        print("AVG F1: {}".format(np.mean(total_val_f1)))
        print("Confusion Matrix: \n")
        print(confusion_matrix(target, pred))
        print("Total Acc: ", accuracy_score(target, pred) )
        print("Total F1: ", f1_score(target, pred) )
        if np.mean(total_val_f1) > best_f1:
            print("#"*40)
            print("Best F1: ", param_list)
            print(np.mean(total_val_f1))
            best_f1 = 
            best_params = param_list
            bparams = dict(zip(["seed_val", "model_name", "batch_size_tr", "batch_size_ts", "lr", "max_seq_len", "epochs"], best_params))
            with open("/content/drive/MyDrive/Research/ADReSSo/best_params.pickle", "wb") as fptr:
                pickle.dump(bparams, fptr)
                fptr.close()
    except Exception as e:
        print(e)
print(bparams)

SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 5e-05
EMBEDDING LEN: 64
EPOCH: 1


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=28.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=570.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.69
Acc: 0.78125	F1: 0.5952380952380952
  Accuracy: 0.70
  Validation Loss: 0.64
Acc: 0.59375	F1: 0.4807692307692307
  Accuracy: 0.80
  Validation Loss: 0.61
Acc: 0.78125	F1: 0.6565934065934066
  Accuracy: 0.78
  Validation Loss: 0.55
Acc: 0.75	F1: 0.6089743589743589
  Accuracy: 0.78
  Validation Loss: 0.59
Acc: 0.6785714285714286	F1: 0.358974358974359



AVG Acc: 0.7169642857142857
AVG F1: 0.5401098901098901
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 5e-05
EMBEDDING LEN: 64
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.66
  Accuracy: 0.51
  Validation Loss: 0.66
Acc: 0.59375	F1: 0.6006493506493507
  Accuracy: 0.90
  Validation Loss: 0.49
  Accuracy: 0.84
  Validation Loss: 0.39
Acc: 0.6875	F1: 0.32539682539682535


  average, "true nor predicted", 'F-score is', len(true_sum)


  Accuracy: 0.90
  Validation Loss: 0.45
  Accuracy: 0.90
  Validation Loss: 0.36
Acc: 0.9375	F1: 0.7166666666666667
  Accuracy: 0.84
  Validation Loss: 0.23
  Accuracy: 0.84
  Validation Loss: 0.18
Acc: 0.90625	F1: 0.7333333333333334
  Accuracy: 0.90
  Validation Loss: 0.33
  Accuracy: 1.00
  Validation Loss: 0.01
Acc: 0.9375	F1: 0.7166666666666667



AVG Acc: 0.8125
AVG F1: 0.6185425685425686
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 5e-05
EMBEDDING LEN: 64
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.70
  Accuracy: 0.61
  Validation Loss: 0.65
  Accuracy: 0.88
  Validation Loss: 0.51
Acc: 0.625	F1: 0.3333333333333333
  Accuracy: 0.94
  Validation Loss: 0.47
  Accuracy: 0.94
  Validation Loss: 0.30
  Accuracy: 0.84
  Validation Loss: 0.37
Acc: 0.71875	F1: 0.4606060606060606
  Accuracy: 0.71
  Validation Loss: 0.53
  Accuracy: 0.81
  Validation Loss: 0.72
  Accuracy: 0.71
  Validation Loss: 0.81
Acc: 0.9375	F1: 0.75
  Accuracy: 0.94
  Validation Loss: 0.12
  Accuracy: 0.94
  Validation Loss: 0.18
  Accuracy: 0.94
  Validation Loss: 0.20
Acc: 0.90625	F1: 0.7333333333333334
  Accuracy: 1.00
  Validation Loss: 0.01
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 0.9642857142857143	F1: 0.75



AVG Acc: 0.8303571428571429
AVG F1: 0.6054545454545455
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 5e-05
EMBEDDING LEN: 64
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.72
  Accuracy: 0.49
  Validation Loss: 0.69
  Accuracy: 0.51
  Validation Loss: 0.69
  Accuracy: 0.51
  Validation Loss: 0.69
Acc: 0.59375	F1: 0.6363636363636364
  Accuracy: 0.94
  Validation Loss: 0.43
  Accuracy: 0.90
  Validation Loss: 0.33
  Accuracy: 0.90
  Validation Loss: 0.29
  Accuracy: 0.90
  Validation Loss: 0.42
Acc: 0.6875	F1: 0.3392857142857143
  Accuracy: 0.78
  Validation Loss: 0.32
  Accuracy: 0.71
  Validation Loss: 0.52
  Accuracy: 0.71
  Validation Loss: 0.85
  Accuracy: 0.71
  Validation Loss: 0.83
Acc: 0.90625	F1: 0.7142857142857143
  Accuracy: 0.90
  Validation Loss: 0.35
  Accuracy: 0.90
  Validation Loss: 0.08
  Accuracy: 0.90
  Validation Loss: 0.12
  Accuracy: 0.90
  Validation Loss: 0.28
Acc: 1.0	F1: 0.75
  Accuracy: 0.80
  Validation Loss: 0.91
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75



AVG Acc: 0.8375
AVG F1: 0.63798701

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.61
  Validation Loss: 0.66
Acc: 0.5	F1: 0.225
  Accuracy: 0.68
  Validation Loss: 0.57
Acc: 0.65625	F1: 0.35833333333333334
  Accuracy: 0.78
  Validation Loss: 0.58
Acc: 0.75	F1: 0.625
  Accuracy: 0.68
  Validation Loss: 0.57
Acc: 0.84375	F1: 0.6976190476190476
  Accuracy: 0.74
  Validation Loss: 0.73
Acc: 0.8392857142857143	F1: 0.4666666666666667



AVG Acc: 0.7178571428571429
AVG F1: 0.4745238095238095
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 5e-05
EMBEDDING LEN: 128
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.45
  Validation Loss: 0.70
  Accuracy: 0.51
  Validation Loss: 0.68
Acc: 0.5625	F1: 0.6196969696969696
  Accuracy: 0.78
  Validation Loss: 0.52
  Accuracy: 0.94
  Validation Loss: 0.38
Acc: 0.78125	F1: 0.4
  Accuracy: 0.90
  Validation Loss: 0.43
  Accuracy: 0.84
  Validation Loss: 0.36
Acc: 1.0	F1: 0.75
  Accuracy: 0.84
  Validation Loss: 0.25
  Accuracy: 0.84
  Validation Loss: 0.33
Acc: 0.9375	F1: 0.7333333333333334
  Accuracy: 0.90
  Validation Loss: 0.48
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 0.96875	F1: 0.7333333333333334



AVG Acc: 0.85
AVG F1: 0.6472727272727272
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 5e-05
EMBEDDING LEN: 128
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.71
  Accuracy: 0.84
  Validation Loss: 0.60
  Accuracy: 0.65
  Validation Loss: 0.60
Acc: 0.53125	F1: 0.2
  Accuracy: 0.88
  Validation Loss: 0.51
  Accuracy: 0.88
  Validation Loss: 0.33
  Accuracy: 0.94
  Validation Loss: 0.24
Acc: 0.8125	F1: 0.43333333333333335
  Accuracy: 0.78
  Validation Loss: 0.55
  Accuracy: 0.75
  Validation Loss: 0.51
  Accuracy: 0.88
  Validation Loss: 0.45
Acc: 1.0	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.11
  Accuracy: 0.94
  Validation Loss: 0.36
  Accuracy: 0.94
  Validation Loss: 0.11
Acc: 0.90625	F1: 0.7166666666666667
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75



AVG Acc: 0.85
AVG F1: 0.57
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 5e-05
EMBEDDING LEN: 128
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.72
  Accuracy: 0.78
  Validation Loss: 0.61
  Accuracy: 0.84
  Validation Loss: 0.54
  Accuracy: 0.84
  Validation Loss: 0.52
Acc: 0.8125	F1: 0.6089743589743589
  Accuracy: 1.00
  Validation Loss: 0.41
  Accuracy: 0.94
  Validation Loss: 0.21
  Accuracy: 0.68
  Validation Loss: 0.63
  Accuracy: 0.84
  Validation Loss: 0.45
Acc: 0.8125	F1: 0.4606060606060606
  Accuracy: 0.78
  Validation Loss: 0.37
  Accuracy: 0.94
  Validation Loss: 0.16
  Accuracy: 0.94
  Validation Loss: 0.20
  Accuracy: 0.94
  Validation Loss: 0.15
Acc: 1.0	F1: 0.75
  Accuracy: 0.90
  Validation Loss: 0.47
  Accuracy: 0.80
  Validation Loss: 0.67
  Accuracy: 0.90
  Validation Loss: 0.56
  Accuracy: 0.90
  Validation Loss: 0.41
Acc: 0.96875	F1: 0.7333333333333334
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75



AVG Acc: 0.91875
AVG F1: 0.66058275

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.61
  Validation Loss: 0.68
Acc: 0.59375	F1: 0.6196969696969696
  Accuracy: 0.84
  Validation Loss: 0.63
Acc: 0.6875	F1: 0.375
  Accuracy: 0.49
  Validation Loss: 0.65
Acc: 0.5625	F1: 0.6
  Accuracy: 0.51
  Validation Loss: 0.65
Acc: 0.8125	F1: 0.5923076923076923
  Accuracy: 0.57
  Validation Loss: 0.68
Acc: 0.7991071428571428	F1: 0.6333333333333333



AVG Acc: 0.6910714285714286
AVG F1: 0.5640675990675991
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 5e-05
EMBEDDING LEN: 256
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.57
  Validation Loss: 0.72
  Accuracy: 0.68
  Validation Loss: 0.65
Acc: 0.59375	F1: 0.5506410256410257
  Accuracy: 0.78
  Validation Loss: 0.61
  Accuracy: 0.88
  Validation Loss: 0.51
Acc: 0.75	F1: 0.41515151515151516
  Accuracy: 0.84
  Validation Loss: 0.53
  Accuracy: 0.78
  Validation Loss: 0.56
Acc: 0.8125	F1: 0.6666666666666666
  Accuracy: 0.90
  Validation Loss: 0.24
  Accuracy: 0.84
  Validation Loss: 0.21
Acc: 0.875	F1: 0.7166666666666667
  Accuracy: 0.84
  Validation Loss: 0.39
  Accuracy: 0.88
  Validation Loss: 0.28
Acc: 0.8348214285714286	F1: 0.6083333333333334



AVG Acc: 0.7732142857142856
AVG F1: 0.5914918414918415
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 5e-05
EMBEDDING LEN: 256
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.61
  Validation Loss: 0.69
  Accuracy: 0.69
  Validation Loss: 0.61
  Accuracy: 0.88
  Validation Loss: 0.45
Acc: 0.78125	F1: 0.5535714285714286
  Accuracy: 0.88
  Validation Loss: 0.49
  Accuracy: 0.88
  Validation Loss: 0.39
  Accuracy: 0.94
  Validation Loss: 0.32
Acc: 0.75	F1: 0.45833333333333337
  Accuracy: 0.81
  Validation Loss: 0.65
  Accuracy: 0.81
  Validation Loss: 0.79
  Accuracy: 0.71
  Validation Loss: 0.91
Acc: 0.96875	F1: 0.7333333333333334
  Accuracy: 1.00
  Validation Loss: 0.05
  Accuracy: 0.84
  Validation Loss: 0.40
  Accuracy: 1.00
  Validation Loss: 0.03
Acc: 0.96875	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75



AVG Acc: 0.89375
AVG F1: 0.6490476190476191
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 5e-05
EMBEDDING LEN: 256
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.55
  Validation Loss: 0.67
  Accuracy: 0.61
  Validation Loss: 0.66
  Accuracy: 0.71
  Validation Loss: 0.56
  Accuracy: 0.65
  Validation Loss: 0.60
Acc: 0.75	F1: 0.5256410256410257
  Accuracy: 0.94
  Validation Loss: 0.53
  Accuracy: 0.68
  Validation Loss: 0.69
  Accuracy: 0.94
  Validation Loss: 0.61
  Accuracy: 0.94
  Validation Loss: 0.57
Acc: 0.4375	F1: 0.4642857142857143
  Accuracy: 0.49
  Validation Loss: 0.73
  Accuracy: 0.55
  Validation Loss: 0.69
  Accuracy: 0.71
  Validation Loss: 0.69
  Accuracy: 0.71
  Validation Loss: 0.72
Acc: 0.75	F1: 0.5506410256410257
  Accuracy: 0.84
  Validation Loss: 0.45
  Accuracy: 0.78
  Validation Loss: 0.51
  Accuracy: 0.81
  Validation Loss: 0.39
  Accuracy: 0.84
  Validation Loss: 0.30
Acc: 0.84375	F1: 0.6642857142857144
  Accuracy: 1.00
  Validation Loss: 0.05
  Accuracy: 0.94
  Validation Loss: 0.28
  Accuracy: 1.00
  Validation Loss: 0.03
  Accuracy: 0.94
  Validation Loss: 0.20
Acc: 0.90625	F1: 0.48333333333333334



AVG

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.78
  Validation Loss: 0.66
Acc: 0.65625	F1: 0.4583333333333333
  Accuracy: 0.57
  Validation Loss: 0.65
Acc: 0.4375	F1: 0.4642857142857143
  Accuracy: 0.90
  Validation Loss: 0.64
Acc: 0.78125	F1: 0.5666666666666667
  Accuracy: 0.57
  Validation Loss: 0.70
Acc: 0.8125	F1: 0.4476190476190476
  Accuracy: 0.68
  Validation Loss: 0.61
Acc: 0.7098214285714286	F1: 0.38461538461538464



AVG Acc: 0.6794642857142856
AVG F1: 0.46430402930402936
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 5e-05
EMBEDDING LEN: 512
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.68
  Accuracy: 0.68
  Validation Loss: 0.65
Acc: 0.71875	F1: 0.48461538461538467
  Accuracy: 0.71
  Validation Loss: 0.57
  Accuracy: 0.71
  Validation Loss: 0.52
Acc: 0.71875	F1: 0.2857142857142857
  Accuracy: 0.68
  Validation Loss: 0.54
  Accuracy: 0.90
  Validation Loss: 0.38
Acc: 1.0	F1: 0.75
  Accuracy: 0.78
  Validation Loss: 0.49
  Accuracy: 0.78
  Validation Loss: 0.49
Acc: 0.8125	F1: 0.6976190476190476
  Accuracy: 0.84
  Validation Loss: 0.52
  Accuracy: 0.84
  Validation Loss: 0.77
Acc: 0.9375	F1: 0.7166666666666667



AVG Acc: 0.8375
AVG F1: 0.586923076923077
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 5e-05
EMBEDDING LEN: 512
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.61
  Validation Loss: 0.67
  Accuracy: 0.59
  Validation Loss: 0.61
  Accuracy: 0.71
  Validation Loss: 0.52
Acc: 0.8125	F1: 0.6089743589743589
  Accuracy: 0.94
  Validation Loss: 0.39
  Accuracy: 0.94
  Validation Loss: 0.29
  Accuracy: 0.94
  Validation Loss: 0.28
Acc: 0.84375	F1: 0.45833333333333337
  Accuracy: 0.75
  Validation Loss: 0.45
  Accuracy: 0.81
  Validation Loss: 0.63
  Accuracy: 0.75
  Validation Loss: 0.83
Acc: 1.0	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.03
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 1.00
  Validation Loss: 0.02
Acc: 0.96875	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75



AVG Acc: 0.925
AVG F1: 0.6634615384615385
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 5e-05
EMBEDDING LEN: 512
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.78
  Accuracy: 0.61
  Validation Loss: 0.67
  Accuracy: 0.78
  Validation Loss: 0.60
  Accuracy: 0.71
  Validation Loss: 0.64
Acc: 0.71875	F1: 0.5714285714285714
  Accuracy: 0.84
  Validation Loss: 0.56
  Accuracy: 0.84
  Validation Loss: 0.55
  Accuracy: 0.64
  Validation Loss: 0.97
  Accuracy: 0.84
  Validation Loss: 0.48
Acc: 0.65625	F1: 0.4
  Accuracy: 0.78
  Validation Loss: 0.64
  Accuracy: 0.71
  Validation Loss: 0.54
  Accuracy: 0.88
  Validation Loss: 0.49
  Accuracy: 0.78
  Validation Loss: 0.46
Acc: 0.9375	F1: 0.75
  Accuracy: 0.75
  Validation Loss: 0.58
  Accuracy: 0.88
  Validation Loss: 0.46
  Accuracy: 0.94
  Validation Loss: 0.31
  Accuracy: 0.94
  Validation Loss: 0.34
Acc: 0.90625	F1: 0.7166666666666667
  Accuracy: 0.90
  Validation Loss: 0.34
  Accuracy: 0.90
  Validation Loss: 0.23
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 0.90
  Validation Loss: 0.26
Acc: 0.9375	F1: 0.7166666666666667



AVG Acc: 0.83125
AVG F1: 0.6

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.81
  Validation Loss: 0.62
Acc: 0.71875	F1: 0.483974358974359
  Accuracy: 0.70
  Validation Loss: 0.64
Acc: 0.65625	F1: 0.4772727272727273
  Accuracy: 0.84
  Validation Loss: 0.60
Acc: 0.84375	F1: 0.6833333333333333
  Accuracy: 0.68
  Validation Loss: 0.58
Acc: 0.78125	F1: 0.5666666666666667
  Accuracy: 0.68
  Validation Loss: 0.56
Acc: 0.7991071428571428	F1: 0.6976190476190476



AVG Acc: 0.7598214285714285
AVG F1: 0.5817732267732267
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 3e-05
EMBEDDING LEN: 64
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.68
  Accuracy: 0.51
  Validation Loss: 0.68
Acc: 0.625	F1: 0.6363636363636364
  Accuracy: 0.71
  Validation Loss: 0.58
  Accuracy: 0.90
  Validation Loss: 0.56
Acc: 0.6875	F1: 0.41515151515151516
  Accuracy: 0.90
  Validation Loss: 0.44
  Accuracy: 0.84
  Validation Loss: 0.39
Acc: 0.875	F1: 0.6833333333333333
  Accuracy: 1.00
  Validation Loss: 0.22
  Accuracy: 1.00
  Validation Loss: 0.20
Acc: 0.96875	F1: 0.75
  Accuracy: 0.94
  Validation Loss: 0.13
  Accuracy: 1.00
  Validation Loss: 0.10
Acc: 1.0	F1: 0.75



AVG Acc: 0.83125
AVG F1: 0.646969696969697
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 3e-05
EMBEDDING LEN: 64
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.61
  Validation Loss: 0.66
  Accuracy: 0.75
  Validation Loss: 0.54
  Accuracy: 0.78
  Validation Loss: 0.51
Acc: 0.8125	F1: 0.6309523809523809
  Accuracy: 0.88
  Validation Loss: 0.43
  Accuracy: 0.94
  Validation Loss: 0.30
  Accuracy: 0.94
  Validation Loss: 0.27
Acc: 0.84375	F1: 0.48333333333333334
  Accuracy: 0.88
  Validation Loss: 0.48
  Accuracy: 0.81
  Validation Loss: 0.74
  Accuracy: 0.81
  Validation Loss: 0.71
Acc: 0.9375	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.04
  Accuracy: 1.00
  Validation Loss: 0.03
  Accuracy: 1.00
  Validation Loss: 0.03
Acc: 0.96875	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 0.9642857142857143	F1: 0.75



AVG Acc: 0.9053571428571429
AVG F1: 0.6728571428571428
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 3e-05
EMBEDDING LEN: 64
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.57
  Validation Loss: 0.66
  Accuracy: 0.65
  Validation Loss: 0.59
  Accuracy: 0.78
  Validation Loss: 0.51
  Accuracy: 0.88
  Validation Loss: 0.46
Acc: 0.8125	F1: 0.6517857142857143
  Accuracy: 1.00
  Validation Loss: 0.24
  Accuracy: 0.90
  Validation Loss: 0.35
  Accuracy: 0.90
  Validation Loss: 0.27
  Accuracy: 0.90
  Validation Loss: 0.25
Acc: 0.875	F1: 0.45833333333333337
  Accuracy: 0.94
  Validation Loss: 0.15
  Accuracy: 1.00
  Validation Loss: 0.11
  Accuracy: 0.84
  Validation Loss: 0.39
  Accuracy: 0.84
  Validation Loss: 0.28
Acc: 1.0	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75



AVG Acc: 0.9375
AVG F1: 0.6720238095238096
SEED: 0
MO

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.71
  Validation Loss: 0.65
Acc: 0.65625	F1: 0.458974358974359
  Accuracy: 0.90
  Validation Loss: 0.58
Acc: 0.625	F1: 0.41666666666666663
  Accuracy: 0.84
  Validation Loss: 0.55
Acc: 0.8125	F1: 0.6666666666666666
  Accuracy: 0.68
  Validation Loss: 0.62
Acc: 0.84375	F1: 0.6976190476190476
  Accuracy: 0.57
  Validation Loss: 0.70
Acc: 0.8035714285714286	F1: 0.4666666666666667



AVG Acc: 0.7482142857142857
AVG F1: 0.5413186813186813
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 3e-05
EMBEDDING LEN: 128
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.74
  Validation Loss: 0.68
  Accuracy: 0.68
  Validation Loss: 0.68
Acc: 0.6875	F1: 0.5423076923076923
  Accuracy: 0.78
  Validation Loss: 0.55
  Accuracy: 0.78
  Validation Loss: 0.50
Acc: 0.71875	F1: 0.3392857142857143
  Accuracy: 0.90
  Validation Loss: 0.48
  Accuracy: 0.94
  Validation Loss: 0.35
Acc: 0.9375	F1: 0.625
  Accuracy: 0.84
  Validation Loss: 0.27
  Accuracy: 0.84
  Validation Loss: 0.29
Acc: 0.875	F1: 0.7166666666666667
  Accuracy: 0.64
  Validation Loss: 0.65
  Accuracy: 0.94
  Validation Loss: 0.25
Acc: 0.9375	F1: 0.48333333333333334



AVG Acc: 0.83125
AVG F1: 0.5413186813186813
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 3e-05
EMBEDDING LEN: 128
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.68
  Validation Loss: 0.64
  Accuracy: 0.71
  Validation Loss: 0.58
  Accuracy: 0.61
  Validation Loss: 0.57
Acc: 0.8125	F1: 0.6065934065934067
  Accuracy: 0.88
  Validation Loss: 0.40
  Accuracy: 0.94
  Validation Loss: 0.30
  Accuracy: 0.94
  Validation Loss: 0.26
Acc: 0.875	F1: 0.45833333333333337
  Accuracy: 0.65
  Validation Loss: 0.70
  Accuracy: 0.78
  Validation Loss: 0.50
  Accuracy: 0.84
  Validation Loss: 0.45
Acc: 0.96875	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.07
  Accuracy: 1.00
  Validation Loss: 0.03
  Accuracy: 1.00
  Validation Loss: 0.03
Acc: 0.96875	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.01
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75



AVG Acc: 0.925
AVG F1: 0.662985347985348
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 3e-05
EMBEDDING LEN: 128
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.59
  Validation Loss: 0.69
  Accuracy: 0.51
  Validation Loss: 0.69
  Accuracy: 0.64
  Validation Loss: 0.64
  Accuracy: 0.74
  Validation Loss: 0.65
Acc: 0.6875	F1: 0.5904761904761905
  Accuracy: 0.68
  Validation Loss: 0.65
  Accuracy: 0.68
  Validation Loss: 0.62
  Accuracy: 0.78
  Validation Loss: 0.43
  Accuracy: 0.78
  Validation Loss: 0.46
Acc: 0.875	F1: 0.45
  Accuracy: 0.94
  Validation Loss: 0.38
  Accuracy: 0.94
  Validation Loss: 0.17
  Accuracy: 0.88
  Validation Loss: 0.27
  Accuracy: 0.94
  Validation Loss: 0.19
Acc: 0.96875	F1: 0.6666666666666666
  Accuracy: 1.00
  Validation Loss: 0.04
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 1.00
  Validation Loss: 0.01
  Accuracy: 1.00
  Validation Loss: 0.02
Acc: 0.96875	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 0.9375	F1: 0.7166666666666667



AVG Acc: 0.8875
AVG F1: 0.634

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.69
Acc: 0.59375	F1: 0.6363636363636364
  Accuracy: 0.57
  Validation Loss: 0.66
Acc: 0.4375	F1: 0.4642857142857143
  Accuracy: 0.90
  Validation Loss: 0.59
Acc: 0.9375	F1: 0.6666666666666666
  Accuracy: 0.68
  Validation Loss: 0.63
Acc: 0.8125	F1: 0.5952380952380952
  Accuracy: 0.68
  Validation Loss: 0.60
Acc: 0.8660714285714286	F1: 0.48333333333333334



AVG Acc: 0.7294642857142857
AVG F1: 0.5691774891774892
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 3e-05
EMBEDDING LEN: 256
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.57
  Validation Loss: 0.76
  Accuracy: 0.57
  Validation Loss: 0.66
Acc: 0.5625	F1: 0.5367521367521368
  Accuracy: 0.84
  Validation Loss: 0.62
  Accuracy: 0.78
  Validation Loss: 0.54
Acc: 0.625	F1: 0.38095238095238093
  Accuracy: 0.90
  Validation Loss: 0.56
  Accuracy: 0.74
  Validation Loss: 0.56
Acc: 0.71875	F1: 0.6309523809523809
  Accuracy: 0.74
  Validation Loss: 0.45
  Accuracy: 0.78
  Validation Loss: 0.44
Acc: 0.8125	F1: 0.6142857142857142
  Accuracy: 0.84
  Validation Loss: 0.39
  Accuracy: 0.84
  Validation Loss: 0.33
Acc: 0.8392857142857143	F1: 0.4642857142857143



AVG Acc: 0.7116071428571429
AVG F1: 0.5254456654456654
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 3e-05
EMBEDDING LEN: 256
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.68
  Validation Loss: 0.66
  Accuracy: 0.81
  Validation Loss: 0.62
  Accuracy: 0.88
  Validation Loss: 0.54
Acc: 0.75	F1: 0.5285714285714286
  Accuracy: 0.88
  Validation Loss: 0.46
  Accuracy: 0.88
  Validation Loss: 0.32
  Accuracy: 0.94
  Validation Loss: 0.27
Acc: 0.8125	F1: 0.4226190476190476
  Accuracy: 0.81
  Validation Loss: 0.48
  Accuracy: 0.88
  Validation Loss: 0.48
  Accuracy: 0.88
  Validation Loss: 0.41
Acc: 1.0	F1: 0.75
  Accuracy: 0.94
  Validation Loss: 0.10
  Accuracy: 0.94
  Validation Loss: 0.07
  Accuracy: 0.94
  Validation Loss: 0.06
Acc: 0.96875	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75



AVG Acc: 0.90625
AVG F1: 0.6402380952380952
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 3e-05
EMBEDDING LEN: 256
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.68
  Accuracy: 0.71
  Validation Loss: 0.64
  Accuracy: 0.74
  Validation Loss: 0.59
  Accuracy: 0.74
  Validation Loss: 0.56
Acc: 0.8125	F1: 0.6565934065934066
  Accuracy: 0.84
  Validation Loss: 0.34
  Accuracy: 0.84
  Validation Loss: 0.31
  Accuracy: 0.68
  Validation Loss: 1.07
  Accuracy: 0.84
  Validation Loss: 0.68
Acc: 0.875	F1: 0.4772727272727273
  Accuracy: 0.84
  Validation Loss: 0.30
  Accuracy: 1.00
  Validation Loss: 0.04
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 1.00
  Validation Loss: 0.02
Acc: 0.96875	F1: 0.6666666666666666
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 0.96875	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75



AVG Acc: 0.925
AVG F1: 0.6601065

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.61
  Validation Loss: 0.67
Acc: 0.59375	F1: 0.6142857142857143
  Accuracy: 0.64
  Validation Loss: 0.62
Acc: 0.53125	F1: 0.45833333333333337
  Accuracy: 0.94
  Validation Loss: 0.52
Acc: 0.8125	F1: 0.6833333333333333
  Accuracy: 0.78
  Validation Loss: 0.49
Acc: 0.875	F1: 0.7166666666666667
  Accuracy: 0.68
  Validation Loss: 0.67
Acc: 0.7410714285714286	F1: 0.4065934065934066



AVG Acc: 0.7107142857142856
AVG F1: 0.5758424908424908
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 3e-05
EMBEDDING LEN: 512
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.57
  Validation Loss: 0.70
  Accuracy: 0.68
  Validation Loss: 0.65
Acc: 0.65625	F1: 0.5892857142857143
  Accuracy: 0.71
  Validation Loss: 0.57
  Accuracy: 0.78
  Validation Loss: 0.54
Acc: 0.5	F1: 0.4642857142857143
  Accuracy: 0.68
  Validation Loss: 0.57
  Accuracy: 0.90
  Validation Loss: 0.51
Acc: 0.84375	F1: 0.7333333333333334
  Accuracy: 0.84
  Validation Loss: 0.41
  Accuracy: 0.78
  Validation Loss: 0.39
Acc: 0.8125	F1: 0.6785714285714286
  Accuracy: 0.94
  Validation Loss: 0.25
  Accuracy: 0.74
  Validation Loss: 0.60
Acc: 0.8705357142857143	F1: 0.6976190476190476



AVG Acc: 0.7366071428571429
AVG F1: 0.6326190476190476
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 3e-05
EMBEDDING LEN: 512
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.70
  Accuracy: 0.68
  Validation Loss: 0.60
  Accuracy: 0.61
  Validation Loss: 0.60
Acc: 0.65625	F1: 0.32867132867132864
  Accuracy: 0.88
  Validation Loss: 0.41
  Accuracy: 0.94
  Validation Loss: 0.28
  Accuracy: 0.94
  Validation Loss: 0.27
Acc: 0.75	F1: 0.39610389610389607
  Accuracy: 0.65
  Validation Loss: 0.78
  Accuracy: 0.75
  Validation Loss: 0.65
  Accuracy: 0.75
  Validation Loss: 0.59
Acc: 0.96875	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.06
  Accuracy: 1.00
  Validation Loss: 0.05
  Accuracy: 1.00
  Validation Loss: 0.04
Acc: 0.96875	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 1.00
  Validation Loss: 0.01
  Accuracy: 1.00
  Validation Loss: 0.01
Acc: 1.0	F1: 0.75



AVG Acc: 0.86875
AVG F1: 0.5949550449550449
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 3e-05
EMBEDDING LEN: 512
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.57
  Validation Loss: 0.67
  Accuracy: 0.61
  Validation Loss: 0.63
  Accuracy: 0.84
  Validation Loss: 0.49
  Accuracy: 0.94
  Validation Loss: 0.46
Acc: 0.71875	F1: 0.5065934065934066
  Accuracy: 0.78
  Validation Loss: 0.50
  Accuracy: 0.78
  Validation Loss: 0.56
  Accuracy: 0.78
  Validation Loss: 0.45
  Accuracy: 0.78
  Validation Loss: 0.45
Acc: 0.84375	F1: 0.44166666666666665
  Accuracy: 0.61
  Validation Loss: 0.87
  Accuracy: 0.88
  Validation Loss: 0.33
  Accuracy: 0.94
  Validation Loss: 0.23
  Accuracy: 0.94
  Validation Loss: 0.25
Acc: 0.9375	F1: 0.6666666666666666
  Accuracy: 0.94
  Validation Loss: 0.24
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 1.00
  Validation Loss: 0.01
  Accuracy: 1.00
  Validation Loss: 0.02
Acc: 0.96875	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.01
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75



AVG Acc: 0.89375
AVG F1: 0.62

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.71
  Validation Loss: 0.62
Acc: 0.53125	F1: 0.2919191919191919
  Accuracy: 0.60
  Validation Loss: 0.64
Acc: 0.625	F1: 0.41666666666666663
  Accuracy: 0.90
  Validation Loss: 0.57
Acc: 0.875	F1: 0.65
  Accuracy: 0.68
  Validation Loss: 0.56
Acc: 0.875	F1: 0.7142857142857143
  Accuracy: 0.68
  Validation Loss: 0.63
Acc: 0.7991071428571428	F1: 0.4642857142857143



AVG Acc: 0.7410714285714286
AVG F1: 0.5074314574314573
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 2e-05
EMBEDDING LEN: 64
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.71
  Validation Loss: 0.69
  Accuracy: 0.51
  Validation Loss: 0.68
Acc: 0.65625	F1: 0.6363636363636364
  Accuracy: 0.90
  Validation Loss: 0.55
  Accuracy: 0.90
  Validation Loss: 0.52
Acc: 0.65625	F1: 0.4
  Accuracy: 0.84
  Validation Loss: 0.48
  Accuracy: 0.94
  Validation Loss: 0.42
Acc: 0.84375	F1: 0.7333333333333334
  Accuracy: 0.84
  Validation Loss: 0.32
  Accuracy: 1.00
  Validation Loss: 0.21
Acc: 0.875	F1: 0.7166666666666667
  Accuracy: 0.94
  Validation Loss: 0.21
  Accuracy: 0.94
  Validation Loss: 0.25
Acc: 0.96875	F1: 0.7333333333333334



AVG Acc: 0.8
AVG F1: 0.6439393939393939
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 2e-05
EMBEDDING LEN: 64
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.49
  Validation Loss: 0.67
  Accuracy: 0.75
  Validation Loss: 0.58
  Accuracy: 0.65
  Validation Loss: 0.57
Acc: 0.6875	F1: 0.48461538461538467
  Accuracy: 0.84
  Validation Loss: 0.47
  Accuracy: 0.94
  Validation Loss: 0.38
  Accuracy: 0.94
  Validation Loss: 0.38
Acc: 0.78125	F1: 0.4226190476190476
  Accuracy: 0.49
  Validation Loss: 0.75
  Accuracy: 0.75
  Validation Loss: 0.68
  Accuracy: 0.69
  Validation Loss: 0.75
Acc: 0.90625	F1: 0.7166666666666667
  Accuracy: 0.94
  Validation Loss: 0.14
  Accuracy: 0.94
  Validation Loss: 0.14
  Accuracy: 0.94
  Validation Loss: 0.12
Acc: 0.9375	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.04
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 1.00
  Validation Loss: 0.02
Acc: 0.9642857142857143	F1: 0.75



AVG Acc: 0.8553571428571429
AVG F1: 0.6247802197802198
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 2e-05
EMBEDDING LEN: 64
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.57
  Validation Loss: 0.66
  Accuracy: 0.81
  Validation Loss: 0.60
  Accuracy: 0.81
  Validation Loss: 0.54
  Accuracy: 0.78
  Validation Loss: 0.51
Acc: 0.84375	F1: 0.6708333333333333
  Accuracy: 0.90
  Validation Loss: 0.41
  Accuracy: 0.90
  Validation Loss: 0.40
  Accuracy: 0.90
  Validation Loss: 0.38
  Accuracy: 0.90
  Validation Loss: 0.38
Acc: 0.84375	F1: 0.45833333333333337
  Accuracy: 0.94
  Validation Loss: 0.21
  Accuracy: 0.94
  Validation Loss: 0.22
  Accuracy: 0.88
  Validation Loss: 0.27
  Accuracy: 0.88
  Validation Loss: 0.21
Acc: 1.0	F1: 0.75
  Accuracy: 0.94
  Validation Loss: 0.10
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 1.00
  Validation Loss: 0.01
  Accuracy: 1.00
  Validation Loss: 0.01
Acc: 1.0	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.01
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75



AVG Acc: 0.9375
AVG F1: 0.6758333333333334
SEED: 0

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.67
Acc: 0.59375	F1: 0.6333333333333333
  Accuracy: 0.78
  Validation Loss: 0.62
Acc: 0.6875	F1: 0.41666666666666663
  Accuracy: 0.68
  Validation Loss: 0.57
Acc: 0.75	F1: 0.5916666666666667
  Accuracy: 0.68
  Validation Loss: 0.63
Acc: 0.875	F1: 0.7166666666666667
  Accuracy: 0.57
  Validation Loss: 0.63
Acc: 0.7723214285714286	F1: 0.42857142857142855



AVG Acc: 0.7357142857142857
AVG F1: 0.5573809523809524
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 2e-05
EMBEDDING LEN: 128
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.74
  Validation Loss: 0.68
  Accuracy: 0.74
  Validation Loss: 0.67
Acc: 0.75	F1: 0.6089743589743589
  Accuracy: 0.84
  Validation Loss: 0.56
  Accuracy: 0.71
  Validation Loss: 0.56
Acc: 0.6875	F1: 0.3392857142857143
  Accuracy: 0.90
  Validation Loss: 0.39
  Accuracy: 0.90
  Validation Loss: 0.36
Acc: 0.9375	F1: 0.65
  Accuracy: 0.90
  Validation Loss: 0.32
  Accuracy: 0.90
  Validation Loss: 0.29
Acc: 0.84375	F1: 0.6785714285714286
  Accuracy: 0.84
  Validation Loss: 0.39
  Accuracy: 0.84
  Validation Loss: 0.39
Acc: 0.96875	F1: 0.7333333333333334



AVG Acc: 0.8375
AVG F1: 0.602032967032967
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 2e-05
EMBEDDING LEN: 128
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.68
  Validation Loss: 0.66
  Accuracy: 0.71
  Validation Loss: 0.63
  Accuracy: 0.61
  Validation Loss: 0.60
Acc: 0.71875	F1: 0.483974358974359
  Accuracy: 0.88
  Validation Loss: 0.42
  Accuracy: 0.94
  Validation Loss: 0.30
  Accuracy: 0.94
  Validation Loss: 0.32
Acc: 0.75	F1: 0.41515151515151516
  Accuracy: 0.81
  Validation Loss: 0.45
  Accuracy: 0.75
  Validation Loss: 0.55
  Accuracy: 0.81
  Validation Loss: 0.52
Acc: 0.96875	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.09
  Accuracy: 1.00
  Validation Loss: 0.09
  Accuracy: 1.00
  Validation Loss: 0.08
Acc: 0.96875	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.03
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 1.00
  Validation Loss: 0.02
Acc: 0.9642857142857143	F1: 0.75



AVG Acc: 0.8741071428571429
AVG F1: 0.6298251748251749
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 2e-05
EMBEDDING LEN: 128
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.71
  Validation Loss: 0.68
  Accuracy: 0.51
  Validation Loss: 0.67
  Accuracy: 0.74
  Validation Loss: 0.65
  Accuracy: 0.74
  Validation Loss: 0.64
Acc: 0.78125	F1: 0.6619047619047619
  Accuracy: 0.78
  Validation Loss: 0.52
  Accuracy: 0.84
  Validation Loss: 0.44
  Accuracy: 0.78
  Validation Loss: 0.54
  Accuracy: 0.78
  Validation Loss: 0.55
Acc: 0.84375	F1: 0.45
  Accuracy: 1.00
  Validation Loss: 0.19
  Accuracy: 0.90
  Validation Loss: 0.25
  Accuracy: 1.00
  Validation Loss: 0.14
  Accuracy: 0.90
  Validation Loss: 0.19
Acc: 0.9375	F1: 0.625
  Accuracy: 1.00
  Validation Loss: 0.04
  Accuracy: 0.94
  Validation Loss: 0.28
  Accuracy: 0.94
  Validation Loss: 0.24
  Accuracy: 0.94
  Validation Loss: 0.20
Acc: 0.96875	F1: 0.7333333333333334
  Accuracy: 1.00
  Validation Loss: 0.01
  Accuracy: 1.00
  Validation Loss: 0.01
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75



AVG Acc: 0.90625
AVG F1: 0.644047619047619


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.69
Acc: 0.59375	F1: 0.6363636363636364
  Accuracy: 0.57
  Validation Loss: 0.67
Acc: 0.4375	F1: 0.4642857142857143
  Accuracy: 0.68
  Validation Loss: 0.65
Acc: 0.8125	F1: 0.5732600732600732
  Accuracy: 0.71
  Validation Loss: 0.61
Acc: 0.75	F1: 0.6666666666666666
  Accuracy: 0.57
  Validation Loss: 0.60
Acc: 0.7410714285714286	F1: 0.4065934065934066



AVG Acc: 0.6669642857142857
AVG F1: 0.5494338994338995
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 2e-05
EMBEDDING LEN: 256
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.61
  Validation Loss: 0.68
  Accuracy: 0.51
  Validation Loss: 0.68
Acc: 0.59375	F1: 0.6333333333333333
  Accuracy: 0.68
  Validation Loss: 0.63
  Accuracy: 0.81
  Validation Loss: 0.61
Acc: 0.6875	F1: 0.3392857142857143
  Accuracy: 1.00
  Validation Loss: 0.60
  Accuracy: 0.94
  Validation Loss: 0.57
Acc: 0.8125	F1: 0.6833333333333333
  Accuracy: 0.54
  Validation Loss: 0.62
  Accuracy: 0.74
  Validation Loss: 0.51
Acc: 0.78125	F1: 0.5923076923076923
  Accuracy: 0.84
  Validation Loss: 0.33
  Accuracy: 0.94
  Validation Loss: 0.26
Acc: 0.8705357142857143	F1: 0.48333333333333334



AVG Acc: 0.7491071428571429
AVG F1: 0.5463186813186813
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 2e-05
EMBEDDING LEN: 256
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.65
  Validation Loss: 0.66
  Accuracy: 0.65
  Validation Loss: 0.63
  Accuracy: 0.71
  Validation Loss: 0.62
Acc: 0.6875	F1: 0.45367132867132864
  Accuracy: 0.88
  Validation Loss: 0.44
  Accuracy: 0.88
  Validation Loss: 0.35
  Accuracy: 0.88
  Validation Loss: 0.36
Acc: 0.78125	F1: 0.39610389610389607
  Accuracy: 0.88
  Validation Loss: 0.44
  Accuracy: 0.81
  Validation Loss: 0.57
  Accuracy: 0.81
  Validation Loss: 0.49
Acc: 1.0	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.09
  Accuracy: 1.00
  Validation Loss: 0.08
  Accuracy: 1.00
  Validation Loss: 0.07
Acc: 0.90625	F1: 0.7142857142857143
  Accuracy: 0.94
  Validation Loss: 0.14
  Accuracy: 1.00
  Validation Loss: 0.01
  Accuracy: 1.00
  Validation Loss: 0.01
Acc: 0.9330357142857143	F1: 0.5



AVG Acc: 0.8616071428571429
AVG F1: 0.5628121878121878
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 2e-05
EMBEDDING LEN: 256
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.61
  Validation Loss: 0.65
  Accuracy: 0.74
  Validation Loss: 0.64
  Accuracy: 0.74
  Validation Loss: 0.61
  Accuracy: 0.74
  Validation Loss: 0.60
Acc: 0.71875	F1: 0.6131410256410257
  Accuracy: 0.78
  Validation Loss: 0.47
  Accuracy: 0.90
  Validation Loss: 0.33
  Accuracy: 0.78
  Validation Loss: 0.65
  Accuracy: 0.78
  Validation Loss: 0.62
Acc: 0.84375	F1: 0.45
  Accuracy: 0.88
  Validation Loss: 0.27
  Accuracy: 0.94
  Validation Loss: 0.16
  Accuracy: 0.88
  Validation Loss: 0.26
  Accuracy: 1.00
  Validation Loss: 0.09
Acc: 0.96875	F1: 0.6666666666666666
  Accuracy: 1.00
  Validation Loss: 0.07
  Accuracy: 1.00
  Validation Loss: 0.03
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 1.00
  Validation Loss: 0.01
Acc: 0.96875	F1: 0.75
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75



AVG Acc: 0.9
AVG F1: 0.6459615384615385
SEE

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.67
Acc: 0.59375	F1: 0.6363636363636364
  Accuracy: 0.64
  Validation Loss: 0.63
Acc: 0.59375	F1: 0.4807692307692307
  Accuracy: 0.78
  Validation Loss: 0.54
Acc: 0.78125	F1: 0.5732600732600732
  Accuracy: 0.61
  Validation Loss: 0.60
Acc: 0.84375	F1: 0.6976190476190476
  Accuracy: 0.61
  Validation Loss: 0.64
Acc: 0.6741071428571428	F1: 0.4065934065934066



AVG Acc: 0.6973214285714285
AVG F1: 0.558921078921079
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 2e-05
EMBEDDING LEN: 512
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.78
  Validation Loss: 0.68
  Accuracy: 0.74
  Validation Loss: 0.68
Acc: 0.75	F1: 0.6309523809523809
  Accuracy: 0.71
  Validation Loss: 0.57
  Accuracy: 0.71
  Validation Loss: 0.52
Acc: 0.6875	F1: 0.43181818181818177
  Accuracy: 0.90
  Validation Loss: 0.52
  Accuracy: 1.00
  Validation Loss: 0.46
Acc: 0.8125	F1: 0.6666666666666666
  Accuracy: 1.00
  Validation Loss: 0.28
  Accuracy: 1.00
  Validation Loss: 0.23
Acc: 0.875	F1: 0.7166666666666667
  Accuracy: 0.64
  Validation Loss: 0.74
  Accuracy: 0.88
  Validation Loss: 0.32
Acc: 0.9642857142857143	F1: 0.75



AVG Acc: 0.8178571428571428
AVG F1: 0.6392207792207791
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 2e-05
EMBEDDING LEN: 512
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.68
  Validation Loss: 0.67
  Accuracy: 0.75
  Validation Loss: 0.62
  Accuracy: 0.71
  Validation Loss: 0.61
Acc: 0.75	F1: 0.558974358974359
  Accuracy: 0.88
  Validation Loss: 0.44
  Accuracy: 0.94
  Validation Loss: 0.36
  Accuracy: 0.94
  Validation Loss: 0.35
Acc: 0.71875	F1: 0.39610389610389607
  Accuracy: 0.81
  Validation Loss: 0.52
  Accuracy: 0.65
  Validation Loss: 0.72
  Accuracy: 0.75
  Validation Loss: 0.63
Acc: 0.9375	F1: 0.75
  Accuracy: 0.84
  Validation Loss: 0.35
  Accuracy: 1.00
  Validation Loss: 0.09
  Accuracy: 1.00
  Validation Loss: 0.07
Acc: 0.9375	F1: 0.7333333333333334
  Accuracy: 1.00
  Validation Loss: 0.03
  Accuracy: 1.00
  Validation Loss: 0.01
  Accuracy: 1.00
  Validation Loss: 0.01
Acc: 0.9642857142857143	F1: 0.75



AVG Acc: 0.8616071428571429
AVG F1: 0.6376823176823176
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 8
LEARNING RATE: 2e-05
EMBEDDING LEN: 512
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.74
  Validation Loss: 0.63
  Accuracy: 0.84
  Validation Loss: 0.63
  Accuracy: 0.84
  Validation Loss: 0.61
  Accuracy: 0.74
  Validation Loss: 0.60
Acc: 0.65625	F1: 0.5923076923076923
  Accuracy: 0.78
  Validation Loss: 0.47
  Accuracy: 0.78
  Validation Loss: 0.60
  Accuracy: 0.78
  Validation Loss: 0.71
  Accuracy: 0.78
  Validation Loss: 0.71
Acc: 0.75	F1: 0.43333333333333335
  Accuracy: 0.78
  Validation Loss: 0.44
  Accuracy: 0.94
  Validation Loss: 0.19
  Accuracy: 0.78
  Validation Loss: 0.31
  Accuracy: 0.84
  Validation Loss: 0.27
Acc: 0.96875	F1: 0.6666666666666666
  Accuracy: 1.00
  Validation Loss: 0.04
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 1.00
  Validation Loss: 0.04
  Accuracy: 1.00
  Validation Loss: 0.04
Acc: 0.96875	F1: 0.75
  Accuracy: 0.94
  Validation Loss: 0.25
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 0.75



AVG Acc: 0.86875
AVG F1: 0.6384

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.66
Acc: 0.75	F1: 0.7142857142857143
  Accuracy: 0.74
  Validation Loss: 0.60
Acc: 0.625	F1: 0.36363636363636365
  Accuracy: 0.55
  Validation Loss: 0.64
Acc: 0.65625	F1: 0.6333333333333333
  Accuracy: 1.00
  Validation Loss: 0.36
Acc: 0.8125	F1: 0.7060931899641577
  Accuracy: 0.64
  Validation Loss: 0.67
Acc: 0.8333333333333333	F1: 0.6428571428571428



AVG Acc: 0.7354166666666667
AVG F1: 0.6120411488153423
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 5e-05
EMBEDDING LEN: 64
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.66
  Accuracy: 0.51
  Validation Loss: 0.66
Acc: 0.59375	F1: 0.6431372549019608
  Accuracy: 0.90
  Validation Loss: 0.49
  Accuracy: 0.84
  Validation Loss: 0.39
Acc: 0.6875	F1: 0.34782608695652173
  Accuracy: 0.90
  Validation Loss: 0.45
  Accuracy: 0.90
  Validation Loss: 0.36
Acc: 0.9375	F1: 0.9666666666666667
  Accuracy: 0.84
  Validation Loss: 0.23
  Accuracy: 0.84
  Validation Loss: 0.18
Acc: 0.90625	F1: 0.8172043010752688
  Accuracy: 0.90
  Validation Loss: 0.33
  Accuracy: 1.00
  Validation Loss: 0.01
Acc: 0.9375	F1: 0.9666666666666667



AVG Acc: 0.8125
AVG F1: 0.748300195253417
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 5e-05
EMBEDDING LEN: 64
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.70
  Accuracy: 0.61
  Validation Loss: 0.65
  Accuracy: 0.88
  Validation Loss: 0.51
Acc: 0.625	F1: 0.3333333333333333
  Accuracy: 0.94
  Validation Loss: 0.47
  Accuracy: 0.94
  Validation Loss: 0.30
  Accuracy: 0.84
  Validation Loss: 0.37
Acc: 0.71875	F1: 0.4615384615384615
  Accuracy: 0.71
  Validation Loss: 0.53
  Accuracy: 0.81
  Validation Loss: 0.72
  Accuracy: 0.71
  Validation Loss: 0.81
Acc: 0.9375	F1: 0.8333333333333333
  Accuracy: 0.94
  Validation Loss: 0.12
  Accuracy: 0.94
  Validation Loss: 0.18
  Accuracy: 0.94
  Validation Loss: 0.20
Acc: 0.90625	F1: 0.8172043010752688
  Accuracy: 1.00
  Validation Loss: 0.01
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 0.9666666666666667	F1: 0.8333333333333333



AVG Acc: 0.8308333333333333
AVG F1: 0.655748552522746
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 5e-05
EMBEDDING LEN: 64
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.72
  Accuracy: 0.49
  Validation Loss: 0.69
  Accuracy: 0.51
  Validation Loss: 0.69
  Accuracy: 0.51
  Validation Loss: 0.69
Acc: 0.59375	F1: 0.6578947368421053
  Accuracy: 0.94
  Validation Loss: 0.43
  Accuracy: 0.90
  Validation Loss: 0.33
  Accuracy: 0.90
  Validation Loss: 0.29
  Accuracy: 0.90
  Validation Loss: 0.42
Acc: 0.6875	F1: 0.36363636363636365
  Accuracy: 0.78
  Validation Loss: 0.32
  Accuracy: 0.71
  Validation Loss: 0.52
  Accuracy: 0.71
  Validation Loss: 0.85
  Accuracy: 0.71
  Validation Loss: 0.83
Acc: 0.90625	F1: 0.8666666666666667
  Accuracy: 0.90
  Validation Loss: 0.35
  Accuracy: 0.90
  Validation Loss: 0.08
  Accuracy: 0.90
  Validation Loss: 0.12
  Accuracy: 0.90
  Validation Loss: 0.28
Acc: 1.0	F1: 1.0
  Accuracy: 0.80
  Validation Loss: 0.91
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 1.0



AVG Acc: 0.8375
AVG F1: 0.777639553

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.61
  Validation Loss: 0.66
Acc: 0.5	F1: 0.3611111111111111
  Accuracy: 0.68
  Validation Loss: 0.57
Acc: 0.65625	F1: 0.391304347826087
  Accuracy: 0.78
  Validation Loss: 0.58
Acc: 0.75	F1: 0.6666666666666667
  Accuracy: 0.68
  Validation Loss: 0.57
Acc: 0.84375	F1: 0.7816091954022988
  Accuracy: 0.74
  Validation Loss: 0.73
Acc: 0.8375	F1: 0.4666666666666667



AVG Acc: 0.7175
AVG F1: 0.5334715975345661
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 5e-05
EMBEDDING LEN: 128
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.45
  Validation Loss: 0.70
  Accuracy: 0.51
  Validation Loss: 0.68
Acc: 0.5625	F1: 0.6417657045840408
  Accuracy: 0.78
  Validation Loss: 0.52
  Accuracy: 0.94
  Validation Loss: 0.38
Acc: 0.78125	F1: 0.41666666666666663
  Accuracy: 0.90
  Validation Loss: 0.43
  Accuracy: 0.84
  Validation Loss: 0.36
Acc: 1.0	F1: 1.0
  Accuracy: 0.84
  Validation Loss: 0.25
  Accuracy: 0.84
  Validation Loss: 0.33
Acc: 0.9375	F1: 0.8838709677419355
  Accuracy: 0.90
  Validation Loss: 0.48
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 0.96875	F1: 0.9838709677419355



AVG Acc: 0.85
AVG F1: 0.7852348613469157
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 5e-05
EMBEDDING LEN: 128
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.71
  Accuracy: 0.84
  Validation Loss: 0.60
  Accuracy: 0.65
  Validation Loss: 0.60
Acc: 0.53125	F1: 0.2
  Accuracy: 0.88
  Validation Loss: 0.51
  Accuracy: 0.88
  Validation Loss: 0.33
  Accuracy: 0.94
  Validation Loss: 0.24
Acc: 0.8125	F1: 0.44
  Accuracy: 0.78
  Validation Loss: 0.55
  Accuracy: 0.75
  Validation Loss: 0.51
  Accuracy: 0.88
  Validation Loss: 0.45
Acc: 1.0	F1: 1.0
  Accuracy: 1.00
  Validation Loss: 0.11
  Accuracy: 0.94
  Validation Loss: 0.36
  Accuracy: 0.94
  Validation Loss: 0.11
Acc: 0.90625	F1: 0.8666666666666667
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 1.0



AVG Acc: 0.85
AVG F1: 0.7013333333333334
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 5e-05
EMBEDDING LEN: 128
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.72
  Accuracy: 0.78
  Validation Loss: 0.61
  Accuracy: 0.84
  Validation Loss: 0.54
  Accuracy: 0.84
  Validation Loss: 0.52
Acc: 0.8125	F1: 0.7339901477832513
  Accuracy: 1.00
  Validation Loss: 0.41
  Accuracy: 0.94
  Validation Loss: 0.21
  Accuracy: 0.68
  Validation Loss: 0.63
  Accuracy: 0.84
  Validation Loss: 0.45
Acc: 0.8125	F1: 0.4615384615384615
  Accuracy: 0.78
  Validation Loss: 0.37
  Accuracy: 0.94
  Validation Loss: 0.16
  Accuracy: 0.94
  Validation Loss: 0.20
  Accuracy: 0.94
  Validation Loss: 0.15
Acc: 1.0	F1: 1.0
  Accuracy: 0.90
  Validation Loss: 0.47
  Accuracy: 0.80
  Validation Loss: 0.67
  Accuracy: 0.90
  Validation Loss: 0.56
  Accuracy: 0.90
  Validation Loss: 0.41
Acc: 0.96875	F1: 0.9838709677419355
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 1.0



AVG Acc: 0.91875
AVG F1: 0.8358799154

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.61
  Validation Loss: 0.68
Acc: 0.59375	F1: 0.6505376344086022
  Accuracy: 0.84
  Validation Loss: 0.63
Acc: 0.6875	F1: 0.41666666666666663
  Accuracy: 0.49
  Validation Loss: 0.65
Acc: 0.5625	F1: 0.6111111111111112
  Accuracy: 0.51
  Validation Loss: 0.65
Acc: 0.8125	F1: 0.6785714285714286
  Accuracy: 0.57
  Validation Loss: 0.68
Acc: 0.8041666666666667	F1: 0.6333333333333333



AVG Acc: 0.6920833333333334
AVG F1: 0.5980440348182283
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 5e-05
EMBEDDING LEN: 256
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.57
  Validation Loss: 0.72
  Accuracy: 0.68
  Validation Loss: 0.65
Acc: 0.59375	F1: 0.5824175824175823
  Accuracy: 0.78
  Validation Loss: 0.61
  Accuracy: 0.88
  Validation Loss: 0.51
Acc: 0.75	F1: 0.4230769230769231
  Accuracy: 0.84
  Validation Loss: 0.53
  Accuracy: 0.78
  Validation Loss: 0.56
Acc: 0.8125	F1: 0.7
  Accuracy: 0.90
  Validation Loss: 0.24
  Accuracy: 0.84
  Validation Loss: 0.21
Acc: 0.875	F1: 0.8
  Accuracy: 0.84
  Validation Loss: 0.39
  Accuracy: 0.88
  Validation Loss: 0.28
Acc: 0.8354166666666667	F1: 0.6505376344086022



AVG Acc: 0.7733333333333333
AVG F1: 0.6312064279806215
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 5e-05
EMBEDDING LEN: 256
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.61
  Validation Loss: 0.69
  Accuracy: 0.69
  Validation Loss: 0.61
  Accuracy: 0.88
  Validation Loss: 0.45
Acc: 0.78125	F1: 0.6285714285714286
  Accuracy: 0.88
  Validation Loss: 0.49
  Accuracy: 0.88
  Validation Loss: 0.39
  Accuracy: 0.94
  Validation Loss: 0.32
Acc: 0.75	F1: 0.4642857142857143
  Accuracy: 0.81
  Validation Loss: 0.65
  Accuracy: 0.81
  Validation Loss: 0.79
  Accuracy: 0.71
  Validation Loss: 0.91
Acc: 0.96875	F1: 0.9838709677419355
  Accuracy: 1.00
  Validation Loss: 0.05
  Accuracy: 0.84
  Validation Loss: 0.40
  Accuracy: 1.00
  Validation Loss: 0.03
Acc: 0.96875	F1: 0.9
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 1.0



AVG Acc: 0.89375
AVG F1: 0.7953456221198156
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 5e-05
EMBEDDING LEN: 256
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.55
  Validation Loss: 0.67
  Accuracy: 0.61
  Validation Loss: 0.66
  Accuracy: 0.71
  Validation Loss: 0.56
  Accuracy: 0.65
  Validation Loss: 0.60
Acc: 0.75	F1: 0.5952380952380952
  Accuracy: 0.94
  Validation Loss: 0.53
  Accuracy: 0.68
  Validation Loss: 0.69
  Accuracy: 0.94
  Validation Loss: 0.61
  Accuracy: 0.94
  Validation Loss: 0.57
Acc: 0.4375	F1: 0.4666666666666667
  Accuracy: 0.49
  Validation Loss: 0.73
  Accuracy: 0.55
  Validation Loss: 0.69
  Accuracy: 0.71
  Validation Loss: 0.69
  Accuracy: 0.71
  Validation Loss: 0.72
Acc: 0.75	F1: 0.5952380952380952
  Accuracy: 0.84
  Validation Loss: 0.45
  Accuracy: 0.78
  Validation Loss: 0.51
  Accuracy: 0.81
  Validation Loss: 0.39
  Accuracy: 0.84
  Validation Loss: 0.30
Acc: 0.84375	F1: 0.7523809523809524
  Accuracy: 1.00
  Validation Loss: 0.05
  Accuracy: 0.94
  Validation Loss: 0.28
  Accuracy: 1.00
  Validation Loss: 0.03
  Accuracy: 0.94
  Validation Loss: 0.20
Acc: 0.9020833333333333	F1: 0.4838709677419

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.78
  Validation Loss: 0.66
Acc: 0.65625	F1: 0.5333333333333333
  Accuracy: 0.57
  Validation Loss: 0.65
Acc: 0.4375	F1: 0.4666666666666667
  Accuracy: 0.90
  Validation Loss: 0.64
Acc: 0.78125	F1: 0.6574074074074074
  Accuracy: 0.57
  Validation Loss: 0.70
Acc: 0.8125	F1: 0.4482758620689655
  Accuracy: 0.68
  Validation Loss: 0.61
Acc: 0.7125	F1: 0.38461538461538464



AVG Acc: 0.6799999999999999
AVG F1: 0.4980597308183515
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 5e-05
EMBEDDING LEN: 512
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.68
  Accuracy: 0.68
  Validation Loss: 0.65
Acc: 0.71875	F1: 0.5846153846153846
  Accuracy: 0.71
  Validation Loss: 0.57
  Accuracy: 0.71
  Validation Loss: 0.52
Acc: 0.71875	F1: 0.3333333333333333
  Accuracy: 0.68
  Validation Loss: 0.54
  Accuracy: 0.90
  Validation Loss: 0.38
Acc: 1.0	F1: 1.0
  Accuracy: 0.78
  Validation Loss: 0.49
  Accuracy: 0.78
  Validation Loss: 0.49
Acc: 0.8125	F1: 0.7339901477832513
  Accuracy: 0.84
  Validation Loss: 0.52
  Accuracy: 0.84
  Validation Loss: 0.77
Acc: 0.9375	F1: 0.9666666666666667



AVG Acc: 0.8375
AVG F1: 0.7237211064797272
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 5e-05
EMBEDDING LEN: 512
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.61
  Validation Loss: 0.67
  Accuracy: 0.59
  Validation Loss: 0.61
  Accuracy: 0.71
  Validation Loss: 0.52
Acc: 0.8125	F1: 0.7339901477832513
  Accuracy: 0.94
  Validation Loss: 0.39
  Accuracy: 0.94
  Validation Loss: 0.29
  Accuracy: 0.94
  Validation Loss: 0.28
Acc: 0.84375	F1: 0.4642857142857143
  Accuracy: 0.75
  Validation Loss: 0.45
  Accuracy: 0.81
  Validation Loss: 0.63
  Accuracy: 0.75
  Validation Loss: 0.83
Acc: 1.0	F1: 1.0
  Accuracy: 1.00
  Validation Loss: 0.03
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 1.00
  Validation Loss: 0.02
Acc: 0.96875	F1: 0.9
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 1.0



AVG Acc: 0.925
AVG F1: 0.8196551724137932
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 5e-05
EMBEDDING LEN: 512
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.78
  Accuracy: 0.61
  Validation Loss: 0.67
  Accuracy: 0.78
  Validation Loss: 0.60
  Accuracy: 0.71
  Validation Loss: 0.64
Acc: 0.71875	F1: 0.6507936507936507
  Accuracy: 0.84
  Validation Loss: 0.56
  Accuracy: 0.84
  Validation Loss: 0.55
  Accuracy: 0.64
  Validation Loss: 0.97
  Accuracy: 0.84
  Validation Loss: 0.48
Acc: 0.65625	F1: 0.4230769230769231
  Accuracy: 0.78
  Validation Loss: 0.64
  Accuracy: 0.71
  Validation Loss: 0.54
  Accuracy: 0.88
  Validation Loss: 0.49
  Accuracy: 0.78
  Validation Loss: 0.46
Acc: 0.9375	F1: 0.8333333333333333
  Accuracy: 0.75
  Validation Loss: 0.58
  Accuracy: 0.88
  Validation Loss: 0.46
  Accuracy: 0.94
  Validation Loss: 0.31
  Accuracy: 0.94
  Validation Loss: 0.34
Acc: 0.90625	F1: 0.8666666666666667
  Accuracy: 0.90
  Validation Loss: 0.34
  Accuracy: 0.90
  Validation Loss: 0.23
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 0.90
  Validation Loss: 0.26
Acc: 0.9375	F1: 0.9666666666666667




Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.81
  Validation Loss: 0.62
Acc: 0.71875	F1: 0.61
  Accuracy: 0.70
  Validation Loss: 0.64
Acc: 0.65625	F1: 0.4814814814814815
  Accuracy: 0.84
  Validation Loss: 0.60
Acc: 0.84375	F1: 0.7338709677419355
  Accuracy: 0.68
  Validation Loss: 0.58
Acc: 0.78125	F1: 0.6574074074074074
  Accuracy: 0.68
  Validation Loss: 0.56
Acc: 0.80625	F1: 0.6482758620689655



AVG Acc: 0.76125
AVG F1: 0.626207143739958
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 3e-05
EMBEDDING LEN: 64
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.51
  Validation Loss: 0.68
  Accuracy: 0.51
  Validation Loss: 0.68
Acc: 0.625	F1: 0.6666666666666667
  Accuracy: 0.71
  Validation Loss: 0.58
  Accuracy: 0.90
  Validation Loss: 0.56
Acc: 0.6875	F1: 0.4230769230769231
  Accuracy: 0.90
  Validation Loss: 0.44
  Accuracy: 0.84
  Validation Loss: 0.39
Acc: 0.875	F1: 0.7695852534562213
  Accuracy: 1.00
  Validation Loss: 0.22
  Accuracy: 1.00
  Validation Loss: 0.20
Acc: 0.96875	F1: 0.9
  Accuracy: 0.94
  Validation Loss: 0.13
  Accuracy: 1.00
  Validation Loss: 0.10
Acc: 1.0	F1: 1.0



AVG Acc: 0.83125
AVG F1: 0.7518657686399622
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 3e-05
EMBEDDING LEN: 64
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.61
  Validation Loss: 0.66
  Accuracy: 0.75
  Validation Loss: 0.54
  Accuracy: 0.78
  Validation Loss: 0.51
Acc: 0.8125	F1: 0.7166666666666667
  Accuracy: 0.88
  Validation Loss: 0.43
  Accuracy: 0.94
  Validation Loss: 0.30
  Accuracy: 0.94
  Validation Loss: 0.27
Acc: 0.84375	F1: 0.4814814814814815
  Accuracy: 0.88
  Validation Loss: 0.48
  Accuracy: 0.81
  Validation Loss: 0.74
  Accuracy: 0.81
  Validation Loss: 0.71
Acc: 0.9375	F1: 0.8333333333333333
  Accuracy: 1.00
  Validation Loss: 0.04
  Accuracy: 1.00
  Validation Loss: 0.03
  Accuracy: 1.00
  Validation Loss: 0.03
Acc: 0.96875	F1: 0.9
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 0.9666666666666667	F1: 0.8333333333333333



AVG Acc: 0.9058333333333334
AVG F1: 0.7529629629629631
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 3e-05
EMBEDDING LEN: 64
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.57
  Validation Loss: 0.66
  Accuracy: 0.65
  Validation Loss: 0.59
  Accuracy: 0.78
  Validation Loss: 0.51
  Accuracy: 0.88
  Validation Loss: 0.46
Acc: 0.8125	F1: 0.7666666666666666
  Accuracy: 1.00
  Validation Loss: 0.24
  Accuracy: 0.90
  Validation Loss: 0.35
  Accuracy: 0.90
  Validation Loss: 0.27
  Accuracy: 0.90
  Validation Loss: 0.25
Acc: 0.875	F1: 0.4642857142857143
  Accuracy: 0.94
  Validation Loss: 0.15
  Accuracy: 1.00
  Validation Loss: 0.11
  Accuracy: 0.84
  Validation Loss: 0.39
  Accuracy: 0.84
  Validation Loss: 0.28
Acc: 1.0	F1: 1.0
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 1.0
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 1.0



AVG Acc: 0.9375
AVG F1: 0.8461904761904762
SEED: 0
MODEL:

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.71
  Validation Loss: 0.65
Acc: 0.65625	F1: 0.5266666666666666
  Accuracy: 0.90
  Validation Loss: 0.58
Acc: 0.625	F1: 0.44
  Accuracy: 0.84
  Validation Loss: 0.55
Acc: 0.8125	F1: 0.7
  Accuracy: 0.68
  Validation Loss: 0.62
Acc: 0.84375	F1: 0.7816091954022988
  Accuracy: 0.57
  Validation Loss: 0.70
Acc: 0.8041666666666667	F1: 0.4666666666666667



AVG Acc: 0.7483333333333333
AVG F1: 0.5829885057471265
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 3e-05
EMBEDDING LEN: 128
EPOCH: 2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.74
  Validation Loss: 0.68
  Accuracy: 0.68
  Validation Loss: 0.68
Acc: 0.6875	F1: 0.5593869731800767
  Accuracy: 0.78
  Validation Loss: 0.55
  Accuracy: 0.78
  Validation Loss: 0.50
Acc: 0.71875	F1: 0.36363636363636365
  Accuracy: 0.90
  Validation Loss: 0.48
  Accuracy: 0.94
  Validation Loss: 0.35
Acc: 0.9375	F1: 0.75
  Accuracy: 0.84
  Validation Loss: 0.27
  Accuracy: 0.84
  Validation Loss: 0.29
Acc: 0.875	F1: 0.8
  Accuracy: 0.64
  Validation Loss: 0.65
  Accuracy: 0.94
  Validation Loss: 0.25
Acc: 0.9354166666666667	F1: 0.4838709677419355



AVG Acc: 0.8308333333333333
AVG F1: 0.5913788609116752
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 3e-05
EMBEDDING LEN: 128
EPOCH: 3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.68
  Validation Loss: 0.64
  Accuracy: 0.71
  Validation Loss: 0.58
  Accuracy: 0.61
  Validation Loss: 0.57
Acc: 0.8125	F1: 0.8074074074074074
  Accuracy: 0.88
  Validation Loss: 0.40
  Accuracy: 0.94
  Validation Loss: 0.30
  Accuracy: 0.94
  Validation Loss: 0.26
Acc: 0.875	F1: 0.4642857142857143
  Accuracy: 0.65
  Validation Loss: 0.70
  Accuracy: 0.78
  Validation Loss: 0.50
  Accuracy: 0.84
  Validation Loss: 0.45
Acc: 0.96875	F1: 0.9
  Accuracy: 1.00
  Validation Loss: 0.07
  Accuracy: 1.00
  Validation Loss: 0.03
  Accuracy: 1.00
  Validation Loss: 0.03
Acc: 0.96875	F1: 0.9
  Accuracy: 1.00
  Validation Loss: 0.01
  Accuracy: 1.00
  Validation Loss: 0.00
  Accuracy: 1.00
  Validation Loss: 0.00
Acc: 1.0	F1: 1.0



AVG Acc: 0.925
AVG F1: 0.8143386243386244
SEED: 0
MODEL: Bert
BATCH SIZE TR: 8
BATCH SIZE TS: 16
LEARNING RATE: 3e-05
EMBEDDING LEN: 128
EPOCH: 4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Accuracy: 0.59
  Validation Loss: 0.69
  Accuracy: 0.51
  Validation Loss: 0.69
  Accuracy: 0.64
  Validation Loss: 0.64
  Accuracy: 0.74
  Validation Loss: 0.65
Acc: 0.6875	F1: 0.6300940438871474
  Accuracy: 0.68
  Validation Loss: 0.65
  Accuracy: 0.68
  Validation Loss: 0.62
  Accuracy: 0.78
  Validation Loss: 0.43
  Accuracy: 0.78
  Validation Loss: 0.46
Acc: 0.875	F1: 0.4615384615384615
  Accuracy: 0.94
  Validation Loss: 0.38
  Accuracy: 0.94
  Validation Loss: 0.17
  Accuracy: 0.88
  Validation Loss: 0.27
  Accuracy: 0.94
  Validation Loss: 0.19
Acc: 0.96875	F1: 0.8333333333333333
  Accuracy: 1.00
  Validation Loss: 0.04
  Accuracy: 1.00
  Validation Loss: 0.02
  Accuracy: 1.00
  Validation Loss: 0.01
