In [9]:
import sys
relative_path = "../../"
sys.path.append(relative_path)
import os
from lookup import LookupCreator
from sensepolar.polarity import WordPolarity
from sensepolar.embed.bertEmbed import BERTWordEmbeddings
from sensepolar.embed.albertEmbed import ALBERTWordEmbeddings
from sensepolar.embed.robertaEmbed import RoBERTaWordEmbeddings
from sensepolar.polarDim import PolarDimensions
from sensepolar.oracle.dictionaryapi import Dictionary
import nltk
import pandas as pd
from nltk.stem import PorterStemmer
from datasets import load_dataset
from datasets import Dataset as Data
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import numpy as np
from transformers import BertTokenizer, BertModel
from IPython.utils import io
import re
import torch.nn.utils.rnn as rnn_utils

In [10]:
class PoemSentimentDataset(Dataset):
    def __init__(self, verse_text, labels, word_polarity_model, method='cls', dimension=39):
        self.verse_text = verse_text
        self.labels = labels
        self.word_polarity_model = word_polarity_model
        self.polar_embeddings_cache = {}
        self.dimension = dimension
        self.method = method

    def __len__(self):
        return len(self.verse_text)

    def get_sense_polar_embedding(self, word, context):
        if (word, context) not in self.polar_embeddings_cache:
            with io.capture_output() as captured:
                polar_embedding = self.word_polarity_model.analyze_word(word, context)
            antonym_dict = {}
            for pair in polar_embedding:
                antonym_dict[(pair[0], pair[1])] = antonym_dict.get((pair[0], pair[1]), []) + [pair[2]]
            sorted_antonym_dict = dict(sorted(antonym_dict.items(), key=lambda item: item[0]))
            self.polar_embeddings_cache[(word, context)] = list(sorted_antonym_dict.values())
        return self.polar_embeddings_cache[(word, context)]

    def __getitem__(self, idx):
        verse = self.verse_text[idx]
        labels = self.labels[idx]
        verse_polar_embeddings = None

        if self.method == 'cls':
            verse += ' [CLS]'
            cls_polar_embedding = self.get_sense_polar_embedding('[CLS]', verse)
            verse_polar_embeddings = torch.tensor(cls_polar_embedding, dtype=torch.float)
        else:
            polar_embeddings_list = []
            for word in verse.split():
                polar_embedding = self.get_sense_polar_embedding(word, verse)
                polar_embeddings_list.append(polar_embedding)
            verse_polar_embeddings = torch.tensor(polar_embeddings_list, dtype=torch.float)
            verse_polar_embeddings = torch.mean(verse_polar_embeddings, dim=0)
            
        verse_polar_embeddings = verse_polar_embeddings.long()
        verse_polar_embeddings = verse_polar_embeddings.squeeze(dim=1)
        verse_polar_embeddings = verse_polar_embeddings[:self.dimension]
        label = torch.tensor(labels, dtype=torch.long)

        return {
            'polar_embeddings': verse_polar_embeddings,
            'label': label
        }


In [11]:

class PolarEmbeddingClassifier(nn.Module):
    def __init__(self, num_classes, polar_dimension, model_name='sense_polar_model'):
        super(PolarEmbeddingClassifier, self).__init__()
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(polar_dimension, num_classes)
        self.model_name = model_name

    def forward(self, polar_embeddings):
        output = self.dropout(polar_embeddings.float())
        logits = self.fc(output)
        return logits

    def train_model(self, train_loader, valid_loader, num_epochs, patience, optimizer, loss_fn, device):
        best_valid_loss = float('inf')
        epochs_without_improvement = 0
        train_losses = []
        valid_losses = [] 

        for epoch in range(num_epochs):
            self.train()
            train_loss = 0.0
            for batch in train_loader:
                polar_embeddings = batch['polar_embeddings'].to(device)
                labels = batch['label'].to(device)

                optimizer.zero_grad()

                outputs = self(polar_embeddings)  
                logits = outputs
                loss = loss_fn(logits, labels)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()

            avg_train_loss = train_loss / len(train_loader)
            train_losses.append(avg_train_loss)
            print(f"Epoch {epoch+1}/{num_epochs} - Training Loss: {avg_train_loss}")

            # Validation
            self.eval()
            valid_loss = 0.0
            with torch.no_grad():
                for batch in valid_loader:
                    polar_embeddings = batch['polar_embeddings'].to(device)  
                    labels = batch['label'].to(device)

                    outputs = self(polar_embeddings)
                    logits = outputs
                    loss = loss_fn(logits, labels)

                    valid_loss += loss.item()

            avg_valid_loss = valid_loss / len(valid_loader)
            valid_losses.append(avg_valid_loss)
            print(f"Epoch {epoch+1}/{num_epochs} - Validation Loss: {avg_valid_loss}")

            if avg_valid_loss < best_valid_loss:
                best_valid_loss = avg_valid_loss
                epochs_without_improvement = 0
                torch.save(self.state_dict(), "model/" + self.model_name + ".pth")
            else:
                epochs_without_improvement += 1
                if epochs_without_improvement >= patience:
                    print(f"Early stopping. No improvement in {patience} epochs.")
                    break

        self.load_state_dict(torch.load("model/" + self.model_name +".pth"))
        # Save results in a file
        with open('results/' + self.model_name+'_train.txt', 'w') as f:
            for epoch, train_loss, valid_loss in zip(range(1, num_epochs + 1), train_losses, valid_losses):
                f.write(f"Epoch {epoch} - Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}\n")


    def test_model(self, test_loader, loss_fn, device):
        self.eval()
        test_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        all_predictions = []  # Initialize list to collect predictions
        all_labels = [] 

        with torch.no_grad():
            for batch in test_loader:
                polar_embeddings = batch['polar_embeddings'].to(device) 
                labels = batch['label'].to(device)

                outputs = self(polar_embeddings) 
                logits = outputs
                loss = loss_fn(logits, labels)
                test_loss += loss.item()

                _, predicted = torch.max(logits, dim=1)
                correct_predictions += (predicted == labels).sum().item()
                total_samples += labels.size(0)
                all_predictions.extend(predicted.tolist())
                all_labels.extend(labels.tolist())

        avg_test_loss = test_loss / len(test_loader)
        accuracy = correct_predictions / total_samples
        print(f"Test Loss: {avg_test_loss}, Accuracy: {accuracy}")
        classification_rep = classification_report(all_labels, all_predictions, digits=4)
        with open('results/' + self.model_name+'_test.txt', 'w') as f:
            f.write(f"Test Loss: {avg_test_loss:.4f}, Accuracy: {accuracy:.4f}\n")
            f.write("\nClassification Report:\n")
            f.write(classification_rep)

    def predict(self, polar_embeddings, device): 
        polar_embeddings = polar_embeddings.to(device)
        
        self.eval()
        with torch.no_grad():
            outputs = self(polar_embeddings)
            logits = outputs

        _, predicted = torch.max(logits, dim=1)
        return predicted.item()


In [15]:
from itertools import product
from sklearn.metrics import classification_report

experiment_settings = {
        "embed_model": [ALBERTWordEmbeddings],  
        "polar_dimension": [ 10], 
        "WordPolarity_method": ["base-change", "projection"], 
        "PoemSentimentDataset_method": ["avg", "cls"], 
        "layer": [2, 3, 4, 5]  
    }
# Create a list of lists containing values for each setting
setting_values = [values for values in experiment_settings.values()]

# Iterate through all combinations of experiment settings
for setting_combination in product(*setting_values):
    setting = {
        key: value for key, value in zip(experiment_settings.keys(), setting_combination)
    }
    print(setting)
    with io.capture_output() as captured:
        # Extract the values from the current setting
        embed_model = setting["embed_model"]
        polar_dimension = setting["polar_dimension"]
        WordPolarity_method = setting["WordPolarity_method"]
        PoemSentimentDataset_method = setting["PoemSentimentDataset_method"]
        layer = setting["layer"]
        
        print('Setting', embed_model, polar_dimension, WordPolarity_method, PoemSentimentDataset_method, layer)
        dataset = load_dataset("poem_sentiment")
        out_path = './antonyms/'
        antonym_path = "data/polars_all_combined.xlsx"
        embed_model = embed_model(layer=layer)
        
        dictionary = Dictionary('wordnet', api_key='')    
        lookupSpace = LookupCreator(dictionary, out_path, antonyms_file_path=antonym_path, is_path=True)
        lookupSpace.create_lookup_files()
        antonym_path = out_path + "polar_dimensions.pkl"

        pdc = PolarDimensions(embed_model, antonym_path=out_path + "antonym_wordnet_example_sentences_readable_extended.txt")
        pdc.create_polar_dimensions(out_path,"/polar_dimensions.pkl" )

        wp = WordPolarity(embed_model, antonym_path=antonym_path, method=WordPolarity_method)
        num_classes = 4

    # Define your model
    sensepolar_model = PolarEmbeddingClassifier(num_classes=num_classes, polar_dimension=polar_dimension, model_name=f'sense_polar_{embed_model.model_name}_dim{polar_dimension}_{WordPolarity_method}_{PoemSentimentDataset_method}_layer{layer}')


    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    sensepolar_model.to(device)

    optimizer = torch.optim.AdamW(sensepolar_model.parameters(), lr=1e-3)
    loss_fn = torch.nn.CrossEntropyLoss()

    num_epochs = 1000
    patience = 50

    preprocess_text = lambda verse: re.sub(r'\W+', ' ', re.sub(r'_([^_]+)_', r'\1', verse))
    train_texts = [preprocess_text(verse) for verse in dataset["train"]["verse_text"]]
    test_texts = [preprocess_text(verse) for verse in dataset["test"]["verse_text"]]
    valid_texts = [preprocess_text(verse) for verse in dataset["validation"]["verse_text"]]
    train_labels = dataset["train"]["label"]
    test_labels = dataset["test"]["label"]
    valid_labels = dataset["validation"]["label"]

    # train_texts_filtered = []
    # train_labels_filtered = []
    # for text, label in zip(train_texts, train_labels):
    #     if label != 3:
    #         train_texts_filtered.append(text)
    #         train_labels_filtered.append(label)`

    train_dataset = PoemSentimentDataset(train_texts, train_labels, wp, method=PoemSentimentDataset_method, dimension=polar_dimension)
    valid_dataset = PoemSentimentDataset(valid_texts, valid_labels, wp, dimension=polar_dimension, method=PoemSentimentDataset_method)
    test_dataset = PoemSentimentDataset(test_texts, test_labels, wp, dimension=polar_dimension, method=PoemSentimentDataset_method)

    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

    sensepolar_model.train_model(train_loader, valid_loader, num_epochs, patience, optimizer, loss_fn, device)

    sensepolar_model.test_model(test_loader, loss_fn, device)

{'embed_model': <class 'sensepolar.embed.albertEmbed.ALBERTWordEmbeddings'>, 'polar_dimension': 10, 'WordPolarity_method': 'base-change', 'PoemSentimentDataset_method': 'avg', 'layer': 2}


Some weights of the model checkpoint at albert-base-v1 were not used when initializing AlbertModel: ['predictions.bias', 'predictions.dense.weight', 'predictions.dense.bias', 'predictions.decoder.bias', 'predictions.LayerNorm.weight', 'predictions.decoder.weight', 'predictions.LayerNorm.bias']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 2.2934774977207992e+18
Epoch 1/1000 - Validation Loss: 1.0702493431191868e+18
Epoch 2/1000 - Training Loss: 1.359855860845492e+18
Epoch 2/1000 - Validation Loss: 2.397095010893075e+17
Epoch 3/1000 - Training Loss: 1.0088591513779662e+18
Epoch 3/1000 - Validation Loss: 1.95768856460769e+17
Epoch 4/1000 - Training Loss: 1.0200755186010124e+18
Epoch 4/1000 - Validation Loss: 2.890717136139247e+17
Epoch 5/1000 - Training Loss: 8.759815537992663e+17
Epoch 5/1000 - Validation Loss: 1.461137676619706e+17
Epoch 6/1000 - Training Loss: 7.963574692449016e+17
Epoch 6/1000 - Validation Loss: 1.903411013575171e+17
Epoch 7/1000 - Training Loss: 6.807617651906595e+17
Epoch 7/1000 - Validation Loss: 1.1147599888103131e+17
Epoch 8/1000 - Training Loss: 6.938457763937929e+17
Epoch 8/1000 - Validation Loss: 2.0879510572248035e+17
Epoch 9/1000 - Training Loss: 6.125154479751823e+17
Epoch 9/1000 - Validation Loss: 1.449287984563393e+17
Epoch 10/1000 - Training Loss: 4.90090952

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'embed_model': <class 'sensepolar.embed.albertEmbed.ALBERTWordEmbeddings'>, 'polar_dimension': 10, 'WordPolarity_method': 'base-change', 'PoemSentimentDataset_method': 'avg', 'layer': 3}


Some weights of the model checkpoint at albert-base-v1 were not used when initializing AlbertModel: ['predictions.bias', 'predictions.dense.weight', 'predictions.dense.bias', 'predictions.decoder.bias', 'predictions.LayerNorm.weight', 'predictions.decoder.weight', 'predictions.LayerNorm.bias']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 2.761709329864046e+18
Epoch 1/1000 - Validation Loss: 1.5784540432035817e+18
Epoch 2/1000 - Training Loss: 1.882211883930803e+18
Epoch 2/1000 - Validation Loss: 7.595994880175917e+17
Epoch 3/1000 - Training Loss: 1.4103476365772718e+18
Epoch 3/1000 - Validation Loss: 5.965114954328872e+17
Epoch 4/1000 - Training Loss: 1.3094386831735427e+18
Epoch 4/1000 - Validation Loss: 5.343906837371102e+17
Epoch 5/1000 - Training Loss: 1.2170381759473068e+18
Epoch 5/1000 - Validation Loss: 5.90238030590715e+17
Epoch 6/1000 - Training Loss: 1.0375266928927328e+18
Epoch 6/1000 - Validation Loss: 4.723342769166371e+17
Epoch 7/1000 - Training Loss: 9.937621799644692e+17
Epoch 7/1000 - Validation Loss: 4.6422661315810336e+17
Epoch 8/1000 - Training Loss: 8.617744345714244e+17
Epoch 8/1000 - Validation Loss: 3.749635372054891e+17
Epoch 9/1000 - Training Loss: 7.754728441028842e+17
Epoch 9/1000 - Validation Loss: 3.828144060965285e+17
Epoch 10/1000 - Training Loss: 6.84051407

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at albert-base-v1 were not used when initializing AlbertModel: ['predictions.bias', 'predictions.dense.weight', 'predictions.dense.bias', 'predictions.decoder.bias', 'predictions.LayerNorm.weight', 'predictions.decoder.weight', 'predictions.LayerNorm.bias']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


KeyboardInterrupt: 

## Roberta Runns

In [None]:
from itertools import product
from sklearn.metrics import classification_report

experiment_settings = {
        "embed_model": [RoBERTaWordEmbeddings, BERTWordEmbeddings, ALBERTWordEmbeddings],  
        "polar_dimension": [ 786, 1586], 
        "WordPolarity_method": ["base-change", "projection"], 
        "PoemSentimentDataset_method": ["avg", "cls"], 
        "layer": [2, 3, 4, 5]  
    }
# Create a list of lists containing values for each setting
setting_values = [values for values in experiment_settings.values()]

# Iterate through all combinations of experiment settings
for setting_combination in product(*setting_values):
    setting = {
        key: value for key, value in zip(experiment_settings.keys(), setting_combination)
    }
    print(setting)
    with io.capture_output() as captured:
        # Extract the values from the current setting
        embed_model = setting["embed_model"]
        polar_dimension = setting["polar_dimension"]
        WordPolarity_method = setting["WordPolarity_method"]
        PoemSentimentDataset_method = setting["PoemSentimentDataset_method"]
        layer = setting["layer"]
        
        print('Setting', embed_model, polar_dimension, WordPolarity_method, PoemSentimentDataset_method, layer)
        dataset = load_dataset("poem_sentiment")
        out_path = './antonyms/'
        antonym_path = "data/polars_all_combined.xlsx"
        embed_model = embed_model(layer=layer)
        
        dictionary = Dictionary('wordnet', api_key='')    
        lookupSpace = LookupCreator(dictionary, out_path, antonyms_file_path=antonym_path)
        lookupSpace.create_lookup_files()
        antonym_path = out_path + "polar_dimensions.pkl"

        pdc = PolarDimensions(embed_model, antonym_path=out_path + "antonym_wordnet_example_sentences_readable_extended.txt")
        pdc.create_polar_dimensions(out_path,"/polar_dimensions.pkl" )

        wp = WordPolarity(embed_model, antonym_path=antonym_path, method=WordPolarity_method)
        num_classes = 4

    # Define your model
    sensepolar_model = PolarEmbeddingClassifier(num_classes=num_classes, polar_dimension=polar_dimension, model_name=f'sense_polar_{embed_model.model_name}_dim{polar_dimension}_{WordPolarity_method}_{PoemSentimentDataset_method}_layer{layer}')


    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    sensepolar_model.to(device)

    optimizer = torch.optim.AdamW(sensepolar_model.parameters(), lr=1e-3)
    loss_fn = torch.nn.CrossEntropyLoss()

    num_epochs = 1000
    patience = 50

    preprocess_text = lambda verse: re.sub(r'\W+', ' ', re.sub(r'_([^_]+)_', r'\1', verse))
    train_texts = [preprocess_text(verse) for verse in dataset["train"]["verse_text"]]
    test_texts = [preprocess_text(verse) for verse in dataset["test"]["verse_text"]]
    valid_texts = [preprocess_text(verse) for verse in dataset["validation"]["verse_text"]]
    train_labels = dataset["train"]["label"]
    test_labels = dataset["test"]["label"]
    valid_labels = dataset["validation"]["label"]

    # train_texts_filtered = []
    # train_labels_filtered = []
    # for text, label in zip(train_texts, train_labels):
    #     if label != 3:
    #         train_texts_filtered.append(text)
    #         train_labels_filtered.append(label)`

    train_dataset = PoemSentimentDataset(train_texts, train_labels, wp, method=PoemSentimentDataset_method, dimension=polar_dimension)
    valid_dataset = PoemSentimentDataset(valid_texts, valid_labels, wp, dimension=polar_dimension, method=PoemSentimentDataset_method)
    test_dataset = PoemSentimentDataset(test_texts, test_labels, wp, dimension=polar_dimension, method=PoemSentimentDataset_method)

    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

    sensepolar_model.train_model(train_loader, valid_loader, num_epochs, patience, optimizer, loss_fn, device)

    sensepolar_model.test_model(test_loader, loss_fn, device)

{'embed_model': <class 'sensepolar.embed.robertaEmbed.RoBERTaWordEmbeddings'>, 'polar_dimension': 786, 'WordPolarity_method': 'base-change', 'PoemSentimentDataset_method': 'avg', 'layer': 2}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3567408961909158
Epoch 1/1000 - Validation Loss: 1.3329007455280848
Epoch 2/1000 - Training Loss: 1.3236245619399207
Epoch 2/1000 - Validation Loss: 1.2962783915655953
Epoch 3/1000 - Training Loss: 1.292531070964677
Epoch 3/1000 - Validation Loss: 1.2629247052328927
Epoch 4/1000 - Training Loss: 1.263562730380467
Epoch 4/1000 - Validation Loss: 1.2321505546569824
Epoch 5/1000 - Training Loss: 1.2383650988340378
Epoch 5/1000 - Validation Loss: 1.2041877167565482
Epoch 6/1000 - Training Loss: 1.2154263917888914
Epoch 6/1000 - Validation Loss: 1.1778766768319267
Epoch 7/1000 - Training Loss: 1.1941535025835037
Epoch 7/1000 - Validation Loss: 1.1551572935921806
Epoch 8/1000 - Training Loss: 1.1751979451094354
Epoch 8/1000 - Validation Loss: 1.134286301476615
Epoch 9/1000 - Training Loss: 1.1585263652460915
Epoch 9/1000 - Validation Loss: 1.1156844411577498
Epoch 10/1000 - Training Loss: 1.143482655286789
Epoch 10/1000 - Validation Loss: 1.0989598802157812
Ep

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3638626549925124
Epoch 1/1000 - Validation Loss: 1.3418146882738387
Epoch 2/1000 - Training Loss: 1.3297476917505264
Epoch 2/1000 - Validation Loss: 1.3045054503849574
Epoch 3/1000 - Training Loss: 1.2981198813234056
Epoch 3/1000 - Validation Loss: 1.2705982412610735
Epoch 4/1000 - Training Loss: 1.269454572881971
Epoch 4/1000 - Validation Loss: 1.2396946123668127
Epoch 5/1000 - Training Loss: 1.243673371417182
Epoch 5/1000 - Validation Loss: 1.2112106084823608
Epoch 6/1000 - Training Loss: 1.2190798159156526
Epoch 6/1000 - Validation Loss: 1.1857751096997942
Epoch 7/1000 - Training Loss: 1.1982983989374978
Epoch 7/1000 - Validation Loss: 1.1622588804789953
Epoch 8/1000 - Training Loss: 1.1793300647820746
Epoch 8/1000 - Validation Loss: 1.1407230922154017
Epoch 9/1000 - Training Loss: 1.1622239585433687
Epoch 9/1000 - Validation Loss: 1.1218615514891488
Epoch 10/1000 - Training Loss: 1.1475104444793292
Epoch 10/1000 - Validation Loss: 1.1049675260271346


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3643860263483865
Epoch 1/1000 - Validation Loss: 1.342516575540815
Epoch 2/1000 - Training Loss: 1.3310836915458952
Epoch 2/1000 - Validation Loss: 1.306193573134286
Epoch 3/1000 - Training Loss: 1.2996404894760676
Epoch 3/1000 - Validation Loss: 1.2727602549961634
Epoch 4/1000 - Training Loss: 1.2712829176868712
Epoch 4/1000 - Validation Loss: 1.2415341649736678
Epoch 5/1000 - Training Loss: 1.2456201378788267
Epoch 5/1000 - Validation Loss: 1.2124700035367693
Epoch 6/1000 - Training Loss: 1.2212269540343965
Epoch 6/1000 - Validation Loss: 1.1871498482567924
Epoch 7/1000 - Training Loss: 1.1993093746049064
Epoch 7/1000 - Validation Loss: 1.1638385568346297
Epoch 8/1000 - Training Loss: 1.180642687848636
Epoch 8/1000 - Validation Loss: 1.1428444385528564
Epoch 9/1000 - Training Loss: 1.1642013213464193
Epoch 9/1000 - Validation Loss: 1.123539115701403
Epoch 10/1000 - Training Loss: 1.149217585367816
Epoch 10/1000 - Validation Loss: 1.1060790760176522
Epo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.373610109090805
Epoch 1/1000 - Validation Loss: 1.349826455116272
Epoch 2/1000 - Training Loss: 1.3384694755077362
Epoch 2/1000 - Validation Loss: 1.3119771991457259
Epoch 3/1000 - Training Loss: 1.3058359303644724
Epoch 3/1000 - Validation Loss: 1.277057136808123
Epoch 4/1000 - Training Loss: 1.2764033866780145
Epoch 4/1000 - Validation Loss: 1.2448042631149292
Epoch 5/1000 - Training Loss: 1.2493629136255808
Epoch 5/1000 - Validation Loss: 1.2158811262675695
Epoch 6/1000 - Training Loss: 1.2249542389597212
Epoch 6/1000 - Validation Loss: 1.1897444214139665
Epoch 7/1000 - Training Loss: 1.2022812174899238
Epoch 7/1000 - Validation Loss: 1.166090454374041
Epoch 8/1000 - Training Loss: 1.1834130138158798
Epoch 8/1000 - Validation Loss: 1.1436855282102312
Epoch 9/1000 - Training Loss: 1.1660307443567686
Epoch 9/1000 - Validation Loss: 1.1241319860730852
Epoch 10/1000 - Training Loss: 1.1515182075755936
Epoch 10/1000 - Validation Loss: 1.1066714525222778
Ep

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.380842183317457
Epoch 1/1000 - Validation Loss: 1.3611123732158117
Epoch 2/1000 - Training Loss: 1.3452046236821584
Epoch 2/1000 - Validation Loss: 1.32277546610151
Epoch 3/1000 - Training Loss: 1.3127725358520235
Epoch 3/1000 - Validation Loss: 1.2877433129719325
Epoch 4/1000 - Training Loss: 1.2824976252658027
Epoch 4/1000 - Validation Loss: 1.2558278696877616
Epoch 5/1000 - Training Loss: 1.2556170182568687
Epoch 5/1000 - Validation Loss: 1.2264503751482283
Epoch 6/1000 - Training Loss: 1.2312092504331045
Epoch 6/1000 - Validation Loss: 1.1996503046580724
Epoch 7/1000 - Training Loss: 1.2090918251446314
Epoch 7/1000 - Validation Loss: 1.1748663868222917
Epoch 8/1000 - Training Loss: 1.1897920072078705
Epoch 8/1000 - Validation Loss: 1.153255615915571
Epoch 9/1000 - Training Loss: 1.1709028535655566
Epoch 9/1000 - Validation Loss: 1.1335526619638716
Epoch 10/1000 - Training Loss: 1.1566236476813043
Epoch 10/1000 - Validation Loss: 1.1155361703463964
Ep

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3722646406718664
Epoch 1/1000 - Validation Loss: 1.3485551561628069
Epoch 2/1000 - Training Loss: 1.337193563580513
Epoch 2/1000 - Validation Loss: 1.311285410608564
Epoch 3/1000 - Training Loss: 1.3047968681369508
Epoch 3/1000 - Validation Loss: 1.2772410086223058
Epoch 4/1000 - Training Loss: 1.2758550026587077
Epoch 4/1000 - Validation Loss: 1.245028325489589
Epoch 5/1000 - Training Loss: 1.2487770978893553
Epoch 5/1000 - Validation Loss: 1.216102157320295
Epoch 6/1000 - Training Loss: 1.2254018698419844
Epoch 6/1000 - Validation Loss: 1.1890113013131278
Epoch 7/1000 - Training Loss: 1.2032281926700048
Epoch 7/1000 - Validation Loss: 1.166209033557347
Epoch 8/1000 - Training Loss: 1.1847653069666453
Epoch 8/1000 - Validation Loss: 1.1440445695604597
Epoch 9/1000 - Training Loss: 1.1665094537394387
Epoch 9/1000 - Validation Loss: 1.1252822961126054
Epoch 10/1000 - Training Loss: 1.151860081723758
Epoch 10/1000 - Validation Loss: 1.1075485774448939
Epoc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3598448698009764
Epoch 1/1000 - Validation Loss: 1.3362733295985632
Epoch 2/1000 - Training Loss: 1.3254971419061934
Epoch 2/1000 - Validation Loss: 1.2989355666296822
Epoch 3/1000 - Training Loss: 1.293812700680324
Epoch 3/1000 - Validation Loss: 1.2653305530548096
Epoch 4/1000 - Training Loss: 1.2654223016330175
Epoch 4/1000 - Validation Loss: 1.2342780147280012
Epoch 5/1000 - Training Loss: 1.239226947937693
Epoch 5/1000 - Validation Loss: 1.2060819183077132
Epoch 6/1000 - Training Loss: 1.2158947267702647
Epoch 6/1000 - Validation Loss: 1.1805240256445748
Epoch 7/1000 - Training Loss: 1.1951779595443182
Epoch 7/1000 - Validation Loss: 1.1576963322503226
Epoch 8/1000 - Training Loss: 1.176956295967102
Epoch 8/1000 - Validation Loss: 1.1364226256098067
Epoch 9/1000 - Training Loss: 1.1605024678366525
Epoch 9/1000 - Validation Loss: 1.1181334853172302
Epoch 10/1000 - Training Loss: 1.1455708178026336
Epoch 10/1000 - Validation Loss: 1.1013128502028329
E

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.379895186850003
Epoch 1/1000 - Validation Loss: 1.3584547894341605
Epoch 2/1000 - Training Loss: 1.3444534476314272
Epoch 2/1000 - Validation Loss: 1.3203200612749373
Epoch 3/1000 - Training Loss: 1.3113804374422346
Epoch 3/1000 - Validation Loss: 1.2862822157996041
Epoch 4/1000 - Training Loss: 1.2820225464446204
Epoch 4/1000 - Validation Loss: 1.2539571864264352
Epoch 5/1000 - Training Loss: 1.2544575397457396
Epoch 5/1000 - Validation Loss: 1.2246602433068412
Epoch 6/1000 - Training Loss: 1.2302534324782235
Epoch 6/1000 - Validation Loss: 1.1977106843675887
Epoch 7/1000 - Training Loss: 1.2079755450998033
Epoch 7/1000 - Validation Loss: 1.173436267035348
Epoch 8/1000 - Training Loss: 1.1891655049153738
Epoch 8/1000 - Validation Loss: 1.1512774229049683
Epoch 9/1000 - Training Loss: 1.170649813754218
Epoch 9/1000 - Validation Loss: 1.1318271330424718
Epoch 10/1000 - Training Loss: 1.1556253795112883
Epoch 10/1000 - Validation Loss: 1.1139609728540694
E

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.1849441326090269
Epoch 1/1000 - Validation Loss: 0.9664770535060337
Epoch 2/1000 - Training Loss: 0.9361118120806557
Epoch 2/1000 - Validation Loss: 0.7938647525651115
Epoch 3/1000 - Training Loss: 0.8805408924818039
Epoch 3/1000 - Validation Loss: 0.7878887483051845
Epoch 4/1000 - Training Loss: 0.7896870459829058
Epoch 4/1000 - Validation Loss: 0.6915316326277596
Epoch 5/1000 - Training Loss: 0.7922473634992327
Epoch 5/1000 - Validation Loss: 0.7145029178687504
Epoch 6/1000 - Training Loss: 0.7219370970768588
Epoch 6/1000 - Validation Loss: 0.6601780312401908
Epoch 7/1000 - Training Loss: 0.6909092244293008
Epoch 7/1000 - Validation Loss: 0.5980163940361568
Epoch 8/1000 - Training Loss: 0.6782357075384685
Epoch 8/1000 - Validation Loss: 0.6346790449959892
Epoch 9/1000 - Training Loss: 0.638629175722599
Epoch 9/1000 - Validation Loss: 0.6824811356408256
Epoch 10/1000 - Training Loss: 0.6353087568921703
Epoch 10/1000 - Validation Loss: 0.5922635453087943

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.0323669878499848
Epoch 1/1000 - Validation Loss: 0.7874704131058284
Epoch 2/1000 - Training Loss: 0.85826383211783
Epoch 2/1000 - Validation Loss: 0.7288290815693992
Epoch 3/1000 - Training Loss: 0.7708276462342057
Epoch 3/1000 - Validation Loss: 0.7493299416133335
Epoch 4/1000 - Training Loss: 0.7428749868912357
Epoch 4/1000 - Validation Loss: 0.6887485214642116
Epoch 5/1000 - Training Loss: 0.6558865638715881
Epoch 5/1000 - Validation Loss: 0.6504276948315757
Epoch 6/1000 - Training Loss: 0.6429899638252599
Epoch 6/1000 - Validation Loss: 0.6332338111741203
Epoch 7/1000 - Training Loss: 0.6488842956189599
Epoch 7/1000 - Validation Loss: 0.7236865886620113
Epoch 8/1000 - Training Loss: 0.5957271714827844
Epoch 8/1000 - Validation Loss: 0.6151528315884727
Epoch 9/1000 - Training Loss: 0.5805781659271035
Epoch 9/1000 - Validation Loss: 0.6315192026751382
Epoch 10/1000 - Training Loss: 0.558299431843417
Epoch 10/1000 - Validation Loss: 0.5506537471498761
E

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.051587579505784
Epoch 1/1000 - Validation Loss: 0.8020910705838885
Epoch 2/1000 - Training Loss: 0.837311429636819
Epoch 2/1000 - Validation Loss: 0.7180712393351963
Epoch 3/1000 - Training Loss: 0.768748397805861
Epoch 3/1000 - Validation Loss: 0.6548326781817845
Epoch 4/1000 - Training Loss: 0.6861097408192498
Epoch 4/1000 - Validation Loss: 0.5937772733824593
Epoch 5/1000 - Training Loss: 0.6631878332367965
Epoch 5/1000 - Validation Loss: 0.5711275296551841
Epoch 6/1000 - Training Loss: 0.6081528663635254
Epoch 6/1000 - Validation Loss: 0.6142855371747699
Epoch 7/1000 - Training Loss: 0.5933816087033067
Epoch 7/1000 - Validation Loss: 0.6602850471224103
Epoch 8/1000 - Training Loss: 0.6077530405351094
Epoch 8/1000 - Validation Loss: 0.6398281412465232
Epoch 9/1000 - Training Loss: 0.5457311421632767
Epoch 9/1000 - Validation Loss: 0.5667873833860669
Epoch 10/1000 - Training Loss: 0.510433987315212
Epoch 10/1000 - Validation Loss: 0.5221426784992218
Ep

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.0542860861335481
Epoch 1/1000 - Validation Loss: 0.8563401784215655
Epoch 2/1000 - Training Loss: 0.8761236241885594
Epoch 2/1000 - Validation Loss: 0.7476335423333305
Epoch 3/1000 - Training Loss: 0.7571182788482734
Epoch 3/1000 - Validation Loss: 0.7753863547529493
Epoch 4/1000 - Training Loss: 0.7020361811986991
Epoch 4/1000 - Validation Loss: 0.6366834470203945
Epoch 5/1000 - Training Loss: 0.681667931910072
Epoch 5/1000 - Validation Loss: 0.631602440561567
Epoch 6/1000 - Training Loss: 0.6191753638642175
Epoch 6/1000 - Validation Loss: 0.6137988184179578
Epoch 7/1000 - Training Loss: 0.5919903051108122
Epoch 7/1000 - Validation Loss: 0.5974563530513218
Epoch 8/1000 - Training Loss: 0.5714126851941858
Epoch 8/1000 - Validation Loss: 0.5795849391392299
Epoch 9/1000 - Training Loss: 0.5537087438361985
Epoch 9/1000 - Validation Loss: 0.5931948125362396
Epoch 10/1000 - Training Loss: 0.5228365372334208
Epoch 10/1000 - Validation Loss: 0.5651213782174247


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.2635481687528747
Epoch 1/1000 - Validation Loss: 1.0647529704230172
Epoch 2/1000 - Training Loss: 1.096366252217974
Epoch 2/1000 - Validation Loss: 1.0355603524616785
Epoch 3/1000 - Training Loss: 1.0265067049435206
Epoch 3/1000 - Validation Loss: 0.9285933034760612
Epoch 4/1000 - Training Loss: 1.0450676174036093
Epoch 4/1000 - Validation Loss: 0.8817232251167297
Epoch 5/1000 - Training Loss: 0.9818045603377479
Epoch 5/1000 - Validation Loss: 0.9514793668474469
Epoch 6/1000 - Training Loss: 0.9596205960427012
Epoch 6/1000 - Validation Loss: 0.8984198995998928
Epoch 7/1000 - Training Loss: 0.9551167190074921
Epoch 7/1000 - Validation Loss: 0.9880500520978656
Epoch 8/1000 - Training Loss: 0.9548438877931663
Epoch 8/1000 - Validation Loss: 0.9322214041437421
Epoch 9/1000 - Training Loss: 0.8898267511810575
Epoch 9/1000 - Validation Loss: 0.9451623899596078
Epoch 10/1000 - Training Loss: 0.8778922110795975
Epoch 10/1000 - Validation Loss: 0.9355454189436776

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.1298284232616425
Epoch 1/1000 - Validation Loss: 0.914680540561676
Epoch 2/1000 - Training Loss: 1.0541068496448653
Epoch 2/1000 - Validation Loss: 1.002405651978084
Epoch 3/1000 - Training Loss: 1.0164495999259608
Epoch 3/1000 - Validation Loss: 0.9518441472734723
Epoch 4/1000 - Training Loss: 0.989949176886252
Epoch 4/1000 - Validation Loss: 1.0127951758248466
Epoch 5/1000 - Training Loss: 0.9646887119327273
Epoch 5/1000 - Validation Loss: 0.9033933877944946
Epoch 6/1000 - Training Loss: 0.9494414776563644
Epoch 6/1000 - Validation Loss: 0.9126300896917071
Epoch 7/1000 - Training Loss: 0.926964921610696
Epoch 7/1000 - Validation Loss: 0.958184906414577
Epoch 8/1000 - Training Loss: 0.9069884823901313
Epoch 8/1000 - Validation Loss: 0.8788452063288007
Epoch 9/1000 - Training Loss: 0.8843036581362996
Epoch 9/1000 - Validation Loss: 1.0043329766818456
Epoch 10/1000 - Training Loss: 0.8485745537493911
Epoch 10/1000 - Validation Loss: 0.9189850347382682
Epo

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.1440598666667938
Epoch 1/1000 - Validation Loss: 0.9211930121694293
Epoch 2/1000 - Training Loss: 1.024538589907544
Epoch 2/1000 - Validation Loss: 0.9624067204339164
Epoch 3/1000 - Training Loss: 1.0033554072890962
Epoch 3/1000 - Validation Loss: 0.9395474961825779
Epoch 4/1000 - Training Loss: 0.9386761390737125
Epoch 4/1000 - Validation Loss: 0.8966086081096104
Epoch 5/1000 - Training Loss: 0.921940744987556
Epoch 5/1000 - Validation Loss: 0.8417593921933856
Epoch 6/1000 - Training Loss: 0.9107905775308609
Epoch 6/1000 - Validation Loss: 0.943104760987418
Epoch 7/1000 - Training Loss: 0.8789604061416217
Epoch 7/1000 - Validation Loss: 0.8616675223623004
Epoch 8/1000 - Training Loss: 0.8415634094604424
Epoch 8/1000 - Validation Loss: 0.8626234914575305
Epoch 9/1000 - Training Loss: 0.8480237706431321
Epoch 9/1000 - Validation Loss: 0.9150002896785736
Epoch 10/1000 - Training Loss: 0.8184077840830598
Epoch 10/1000 - Validation Loss: 0.9110452107020787
E

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.0970724554998534
Epoch 1/1000 - Validation Loss: 0.9532423104558673
Epoch 2/1000 - Training Loss: 1.0562191158533096
Epoch 2/1000 - Validation Loss: 0.916460462978908
Epoch 3/1000 - Training Loss: 0.9837230231080737
Epoch 3/1000 - Validation Loss: 0.8580821922847203
Epoch 4/1000 - Training Loss: 0.9575408037219729
Epoch 4/1000 - Validation Loss: 0.9244179470198495
Epoch 5/1000 - Training Loss: 0.9423223010131291
Epoch 5/1000 - Validation Loss: 0.8729137778282166
Epoch 6/1000 - Training Loss: 0.8843986370733806
Epoch 6/1000 - Validation Loss: 0.8767215439251491
Epoch 7/1000 - Training Loss: 0.872304153229509
Epoch 7/1000 - Validation Loss: 0.8682350601468768
Epoch 8/1000 - Training Loss: 0.8560011578457696
Epoch 8/1000 - Validation Loss: 0.8658067584037781
Epoch 9/1000 - Training Loss: 0.8772160352340767
Epoch 9/1000 - Validation Loss: 0.8285836151668003
Epoch 10/1000 - Training Loss: 0.855169764054673
Epoch 10/1000 - Validation Loss: 0.9152255654335022
E

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3585534351212638
Epoch 1/1000 - Validation Loss: 1.3367353507450648
Epoch 2/1000 - Training Loss: 1.3251013032027654
Epoch 2/1000 - Validation Loss: 1.3006278106144495
Epoch 3/1000 - Training Loss: 1.2942335967506682
Epoch 3/1000 - Validation Loss: 1.267218095915658
Epoch 4/1000 - Training Loss: 1.2660235464572906
Epoch 4/1000 - Validation Loss: 1.2368999889918737
Epoch 5/1000 - Training Loss: 1.2405609680073602
Epoch 5/1000 - Validation Loss: 1.2089802707944597
Epoch 6/1000 - Training Loss: 1.216703006199428
Epoch 6/1000 - Validation Loss: 1.1838266338620866
Epoch 7/1000 - Training Loss: 1.196689443928855
Epoch 7/1000 - Validation Loss: 1.1607964549745833
Epoch 8/1000 - Training Loss: 1.1782279749001776
Epoch 8/1000 - Validation Loss: 1.1396982840129308
Epoch 9/1000 - Training Loss: 1.1615831053682737
Epoch 9/1000 - Validation Loss: 1.1211587531226022
Epoch 10/1000 - Training Loss: 1.1464546684707915
Epoch 10/1000 - Validation Loss: 1.1039842196873255
E

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3794440648385458
Epoch 1/1000 - Validation Loss: 1.3581103937966483
Epoch 2/1000 - Training Loss: 1.3433084722076143
Epoch 2/1000 - Validation Loss: 1.3197443996156966
Epoch 3/1000 - Training Loss: 1.3104079536029272
Epoch 3/1000 - Validation Loss: 1.2845472948891776
Epoch 4/1000 - Training Loss: 1.280512266925403
Epoch 4/1000 - Validation Loss: 1.2519143649509974
Epoch 5/1000 - Training Loss: 1.2536482512950897
Epoch 5/1000 - Validation Loss: 1.2228583948952811
Epoch 6/1000 - Training Loss: 1.2291574627161026
Epoch 6/1000 - Validation Loss: 1.1959068264280046
Epoch 7/1000 - Training Loss: 1.206687141742025
Epoch 7/1000 - Validation Loss: 1.1717633860451835
Epoch 8/1000 - Training Loss: 1.1873823212725776
Epoch 8/1000 - Validation Loss: 1.1499539443424769
Epoch 9/1000 - Training Loss: 1.1694557539054327
Epoch 9/1000 - Validation Loss: 1.1303206597055708
Epoch 10/1000 - Training Loss: 1.1553408333233424
Epoch 10/1000 - Validation Loss: 1.112393626144954
E

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3658891362803323
Epoch 1/1000 - Validation Loss: 1.3436054161616735
Epoch 2/1000 - Training Loss: 1.3312005826405116
Epoch 2/1000 - Validation Loss: 1.3063409669058663
Epoch 3/1000 - Training Loss: 1.2992056970085417
Epoch 3/1000 - Validation Loss: 1.2716624396187919
Epoch 4/1000 - Training Loss: 1.2698781873498644
Epoch 4/1000 - Validation Loss: 1.2402118955339705
Epoch 5/1000 - Training Loss: 1.243851091180529
Epoch 5/1000 - Validation Loss: 1.211460096495492
Epoch 6/1000 - Training Loss: 1.2198568497385298
Epoch 6/1000 - Validation Loss: 1.1858391761779785
Epoch 7/1000 - Training Loss: 1.198690537895475
Epoch 7/1000 - Validation Loss: 1.1620609419686454
Epoch 8/1000 - Training Loss: 1.179412063743387
Epoch 8/1000 - Validation Loss: 1.1404588392802648
Epoch 9/1000 - Training Loss: 1.1623161339334078
Epoch 9/1000 - Validation Loss: 1.1223532727786474
Epoch 10/1000 - Training Loss: 1.147179545036384
Epoch 10/1000 - Validation Loss: 1.1046013832092285
Epo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.375941248876708
Epoch 1/1000 - Validation Loss: 1.3535885299955095
Epoch 2/1000 - Training Loss: 1.3406185997383935
Epoch 2/1000 - Validation Loss: 1.316032086099897
Epoch 3/1000 - Training Loss: 1.3079821786710195
Epoch 3/1000 - Validation Loss: 1.2818691730499268
Epoch 4/1000 - Training Loss: 1.2788184540612357
Epoch 4/1000 - Validation Loss: 1.2497251544679915
Epoch 5/1000 - Training Loss: 1.2517596185207367
Epoch 5/1000 - Validation Loss: 1.2202575206756592
Epoch 6/1000 - Training Loss: 1.2274013140371867
Epoch 6/1000 - Validation Loss: 1.1946886266980852
Epoch 7/1000 - Training Loss: 1.2066420380558287
Epoch 7/1000 - Validation Loss: 1.169985328401838
Epoch 8/1000 - Training Loss: 1.186055873121534
Epoch 8/1000 - Validation Loss: 1.148883921759469
Epoch 9/1000 - Training Loss: 1.1687358447483607
Epoch 9/1000 - Validation Loss: 1.1289367420332772
Epoch 10/1000 - Training Loss: 1.153509256030832
Epoch 10/1000 - Validation Loss: 1.1112173114504134
Epoc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3659609449761254
Epoch 1/1000 - Validation Loss: 1.3444992474147253
Epoch 2/1000 - Training Loss: 1.3318037326846803
Epoch 2/1000 - Validation Loss: 1.3081292935780116
Epoch 3/1000 - Training Loss: 1.3002259603568487
Epoch 3/1000 - Validation Loss: 1.2742630754198347
Epoch 4/1000 - Training Loss: 1.2711897662707738
Epoch 4/1000 - Validation Loss: 1.2429493835994176
Epoch 5/1000 - Training Loss: 1.2454947935683387
Epoch 5/1000 - Validation Loss: 1.21402074609484
Epoch 6/1000 - Training Loss: 1.2211054073912757
Epoch 6/1000 - Validation Loss: 1.1879316398075648
Epoch 7/1000 - Training Loss: 1.2002766323941094
Epoch 7/1000 - Validation Loss: 1.1648819276264735
Epoch 8/1000 - Training Loss: 1.1813874755586897
Epoch 8/1000 - Validation Loss: 1.1431044510432653
Epoch 9/1000 - Training Loss: 1.1638125234416552
Epoch 9/1000 - Validation Loss: 1.1240008132798331
Epoch 10/1000 - Training Loss: 1.14946677003588
Epoch 10/1000 - Validation Loss: 1.1068178926195418
Ep

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3630794052566801
Epoch 1/1000 - Validation Loss: 1.3407847370420183
Epoch 2/1000 - Training Loss: 1.3289004393986292
Epoch 2/1000 - Validation Loss: 1.3057108776909965
Epoch 3/1000 - Training Loss: 1.2977245726755686
Epoch 3/1000 - Validation Loss: 1.27177665914808
Epoch 4/1000 - Training Loss: 1.2696074332509721
Epoch 4/1000 - Validation Loss: 1.2407132387161255
Epoch 5/1000 - Training Loss: 1.243731330548014
Epoch 5/1000 - Validation Loss: 1.2127595458711897
Epoch 6/1000 - Training Loss: 1.2208637914487295
Epoch 6/1000 - Validation Loss: 1.1871076651981898
Epoch 7/1000 - Training Loss: 1.1999495391334807
Epoch 7/1000 - Validation Loss: 1.1638297183173043
Epoch 8/1000 - Training Loss: 1.181384529386248
Epoch 8/1000 - Validation Loss: 1.1428711244038172
Epoch 9/1000 - Training Loss: 1.1643656747681754
Epoch 9/1000 - Validation Loss: 1.1236389705113001
Epoch 10/1000 - Training Loss: 1.1491133709039008
Epoch 10/1000 - Validation Loss: 1.1071210929325648
Ep

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3658377741064345
Epoch 1/1000 - Validation Loss: 1.3439428125108992
Epoch 2/1000 - Training Loss: 1.330524010317666
Epoch 2/1000 - Validation Loss: 1.3069974524634225
Epoch 3/1000 - Training Loss: 1.2988293021917343
Epoch 3/1000 - Validation Loss: 1.2719788210732597
Epoch 4/1000 - Training Loss: 1.2699120342731476
Epoch 4/1000 - Validation Loss: 1.2403791291373116
Epoch 5/1000 - Training Loss: 1.2434955899204527
Epoch 5/1000 - Validation Loss: 1.2123005901064192
Epoch 6/1000 - Training Loss: 1.2200403362512589
Epoch 6/1000 - Validation Loss: 1.1861134767532349
Epoch 7/1000 - Training Loss: 1.1984609514474869
Epoch 7/1000 - Validation Loss: 1.1630204916000366
Epoch 8/1000 - Training Loss: 1.1804217717477254
Epoch 8/1000 - Validation Loss: 1.1415442058018275
Epoch 9/1000 - Training Loss: 1.1636174938508443
Epoch 9/1000 - Validation Loss: 1.1226873908724104
Epoch 10/1000 - Training Loss: 1.1487277139510428
Epoch 10/1000 - Validation Loss: 1.1056872180530004

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'embed_model': <class 'sensepolar.embed.robertaEmbed.RoBERTaWordEmbeddings'>, 'polar_dimension': 1586, 'WordPolarity_method': 'base-change', 'PoemSentimentDataset_method': 'cls', 'layer': 5}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3715872083391463
Epoch 1/1000 - Validation Loss: 1.3502173594066076
Epoch 2/1000 - Training Loss: 1.3368514797517232
Epoch 2/1000 - Validation Loss: 1.3128285578319006
Epoch 3/1000 - Training Loss: 1.3050034961530141
Epoch 3/1000 - Validation Loss: 1.2776361874171667
Epoch 4/1000 - Training Loss: 1.2757145230259215
Epoch 4/1000 - Validation Loss: 1.2466139793395996
Epoch 5/1000 - Training Loss: 1.2488668795142854
Epoch 5/1000 - Validation Loss: 1.2172068698065621
Epoch 6/1000 - Training Loss: 1.225264413016183
Epoch 6/1000 - Validation Loss: 1.1910554000309534
Epoch 7/1000 - Training Loss: 1.2035302498510905
Epoch 7/1000 - Validation Loss: 1.1671369416373116
Epoch 8/1000 - Training Loss: 1.1845035616840636
Epoch 8/1000 - Validation Loss: 1.1453875984464372
Epoch 9/1000 - Training Loss: 1.1667929302368845
Epoch 9/1000 - Validation Loss: 1.1261882100786482
Epoch 10/1000 - Training Loss: 1.1521484447377068
Epoch 10/1000 - Validation Loss: 1.1088595986366272

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.161803868732282
Epoch 1/1000 - Validation Loss: 0.8376140551907676
Epoch 2/1000 - Training Loss: 0.9155922446932111
Epoch 2/1000 - Validation Loss: 0.8065783594335828
Epoch 3/1000 - Training Loss: 0.8125607834330627
Epoch 3/1000 - Validation Loss: 0.6141500047274998
Epoch 4/1000 - Training Loss: 0.7467357915427003
Epoch 4/1000 - Validation Loss: 0.7487258634396962
Epoch 5/1000 - Training Loss: 0.6821735235197204
Epoch 5/1000 - Validation Loss: 0.7090850983347211
Epoch 6/1000 - Training Loss: 0.6502201722136566
Epoch 6/1000 - Validation Loss: 0.6857027603047234
Epoch 7/1000 - Training Loss: 0.6357936140682016
Epoch 7/1000 - Validation Loss: 0.9809778928756714
Epoch 8/1000 - Training Loss: 0.6063671556434461
Epoch 8/1000 - Validation Loss: 0.5491847438471658
Epoch 9/1000 - Training Loss: 0.5223693123885563
Epoch 9/1000 - Validation Loss: 0.556173541716167
Epoch 10/1000 - Training Loss: 0.46218416866447243
Epoch 10/1000 - Validation Loss: 0.8199672400951385

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.1106024182268552
Epoch 1/1000 - Validation Loss: 0.7697214526789529
Epoch 2/1000 - Training Loss: 0.8019023058669907
Epoch 2/1000 - Validation Loss: 0.8024495371750423
Epoch 3/1000 - Training Loss: 0.7350497522524425
Epoch 3/1000 - Validation Loss: 0.715839364698955
Epoch 4/1000 - Training Loss: 0.6343584145818438
Epoch 4/1000 - Validation Loss: 0.595405753169741
Epoch 5/1000 - Training Loss: 0.5956469236740044
Epoch 5/1000 - Validation Loss: 0.5354346718106952
Epoch 6/1000 - Training Loss: 0.5644277834466526
Epoch 6/1000 - Validation Loss: 0.5472104464258466
Epoch 7/1000 - Training Loss: 0.5205613566296441
Epoch 7/1000 - Validation Loss: 0.5777252252612796
Epoch 8/1000 - Training Loss: 0.526318120104926
Epoch 8/1000 - Validation Loss: 0.6086256482771465
Epoch 9/1000 - Training Loss: 0.4750209330980267
Epoch 9/1000 - Validation Loss: 0.6157790550163814
Epoch 10/1000 - Training Loss: 0.4729181985769953
Epoch 10/1000 - Validation Loss: 0.5308287463017872
E

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.039438534528017
Epoch 1/1000 - Validation Loss: 0.7796028341565814
Epoch 2/1000 - Training Loss: 0.7650223544665745
Epoch 2/1000 - Validation Loss: 0.7249113619327545
Epoch 3/1000 - Training Loss: 0.6604277161615235
Epoch 3/1000 - Validation Loss: 0.6127871743270329
Epoch 4/1000 - Training Loss: 0.6014422875429902
Epoch 4/1000 - Validation Loss: 0.6601505109242031
Epoch 5/1000 - Training Loss: 0.5378483954284873
Epoch 5/1000 - Validation Loss: 0.6710330418178013
Epoch 6/1000 - Training Loss: 0.5162842587700912
Epoch 6/1000 - Validation Loss: 0.593217304774693
Epoch 7/1000 - Training Loss: 0.4684189012540238
Epoch 7/1000 - Validation Loss: 0.592927988086428
Epoch 8/1000 - Training Loss: 0.470581236162356
Epoch 8/1000 - Validation Loss: 0.6045399946825845
Epoch 9/1000 - Training Loss: 0.4105213023722172
Epoch 9/1000 - Validation Loss: 0.599400384085519
Epoch 10/1000 - Training Loss: 0.394376320099192
Epoch 10/1000 - Validation Loss: 0.6329952733857291
Epoc

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.0217379991497313
Epoch 1/1000 - Validation Loss: 0.8007192526544843
Epoch 2/1000 - Training Loss: 0.756335474550724
Epoch 2/1000 - Validation Loss: 0.5758395195007324
Epoch 3/1000 - Training Loss: 0.7156306936272553
Epoch 3/1000 - Validation Loss: 0.6207643832479205
Epoch 4/1000 - Training Loss: 0.6833796075412205
Epoch 4/1000 - Validation Loss: 0.5331417364733559
Epoch 5/1000 - Training Loss: 0.5572151616215706
Epoch 5/1000 - Validation Loss: 0.563443511724472
Epoch 6/1000 - Training Loss: 0.5382092698876347
Epoch 6/1000 - Validation Loss: 0.6031956587518964
Epoch 7/1000 - Training Loss: 0.5052953750959465
Epoch 7/1000 - Validation Loss: 0.4840484346662249
Epoch 8/1000 - Training Loss: 0.5030766680304494
Epoch 8/1000 - Validation Loss: 0.5373040437698364
Epoch 9/1000 - Training Loss: 0.42274060419627596
Epoch 9/1000 - Validation Loss: 0.49755625639642986
Epoch 10/1000 - Training Loss: 0.40933707915246487
Epoch 10/1000 - Validation Loss: 0.49168192488806

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.2328036459428924
Epoch 1/1000 - Validation Loss: 1.0658349309648787
Epoch 2/1000 - Training Loss: 1.0992273784109525
Epoch 2/1000 - Validation Loss: 0.9324067575590951
Epoch 3/1000 - Training Loss: 1.082480595580169
Epoch 3/1000 - Validation Loss: 1.360661404473441
Epoch 4/1000 - Training Loss: 1.102247956608023
Epoch 4/1000 - Validation Loss: 0.8717398388045174
Epoch 5/1000 - Training Loss: 0.9119470598442214
Epoch 5/1000 - Validation Loss: 1.019109023468835
Epoch 6/1000 - Training Loss: 0.930028994168554
Epoch 6/1000 - Validation Loss: 1.0755460688046046
Epoch 7/1000 - Training Loss: 0.8526818134954998
Epoch 7/1000 - Validation Loss: 0.8092166270528521
Epoch 8/1000 - Training Loss: 0.8390483318695
Epoch 8/1000 - Validation Loss: 0.9363368919917515
Epoch 9/1000 - Training Loss: 0.8337097263761929
Epoch 9/1000 - Validation Loss: 0.8706862926483154
Epoch 10/1000 - Training Loss: 0.773197132561888
Epoch 10/1000 - Validation Loss: 0.9531203082629612
Epoch 1

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.1774129292794637
Epoch 1/1000 - Validation Loss: 0.9813429372651237
Epoch 2/1000 - Training Loss: 1.0320106904421533
Epoch 2/1000 - Validation Loss: 0.8623613885470799
Epoch 3/1000 - Training Loss: 1.030546105333737
Epoch 3/1000 - Validation Loss: 1.2236370359148299
Epoch 4/1000 - Training Loss: 0.9405485306467328
Epoch 4/1000 - Validation Loss: 1.1477070195334298
Epoch 5/1000 - Training Loss: 0.9668005577155522
Epoch 5/1000 - Validation Loss: 0.8466177156993321
Epoch 6/1000 - Training Loss: 0.8840026828859534
Epoch 6/1000 - Validation Loss: 1.084950132029397
Epoch 7/1000 - Training Loss: 0.8435124382376671
Epoch 7/1000 - Validation Loss: 0.9231223378862653
Epoch 8/1000 - Training Loss: 0.7799953115837914
Epoch 8/1000 - Validation Loss: 0.9104200686727252
Epoch 9/1000 - Training Loss: 0.8761089843298707
Epoch 9/1000 - Validation Loss: 1.0075414180755615
Epoch 10/1000 - Training Loss: 0.8568246518926961
Epoch 10/1000 - Validation Loss: 0.8730392626353672


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.1121575864298003
Epoch 1/1000 - Validation Loss: 0.9712903584752764
Epoch 2/1000 - Training Loss: 1.0241868011653423
Epoch 2/1000 - Validation Loss: 0.8934207899229867
Epoch 3/1000 - Training Loss: 0.9668517485260963
Epoch 3/1000 - Validation Loss: 0.8955487012863159
Epoch 4/1000 - Training Loss: 0.9130771069654396
Epoch 4/1000 - Validation Loss: 0.8534871850694928
Epoch 5/1000 - Training Loss: 0.862749773476805
Epoch 5/1000 - Validation Loss: 1.196879574230739
Epoch 6/1000 - Training Loss: 0.8776254451700619
Epoch 6/1000 - Validation Loss: 1.0412908451897758
Epoch 7/1000 - Training Loss: 0.8161740944321666
Epoch 7/1000 - Validation Loss: 0.8250643355505807
Epoch 8/1000 - Training Loss: 0.7839062181966645
Epoch 8/1000 - Validation Loss: 1.0204917618206568
Epoch 9/1000 - Training Loss: 0.7755508321736541
Epoch 9/1000 - Validation Loss: 0.971109824521201
Epoch 10/1000 - Training Loss: 0.7191337517329625
Epoch 10/1000 - Validation Loss: 0.8911210553986686
E

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.2424718945154123
Epoch 1/1000 - Validation Loss: 0.9767152752195086
Epoch 2/1000 - Training Loss: 1.0439146235585213
Epoch 2/1000 - Validation Loss: 0.9116264837128776
Epoch 3/1000 - Training Loss: 0.9723465283002172
Epoch 3/1000 - Validation Loss: 1.061555802822113
Epoch 4/1000 - Training Loss: 0.9116665188755307
Epoch 4/1000 - Validation Loss: 0.8999708635466439
Epoch 5/1000 - Training Loss: 0.8638418221047947
Epoch 5/1000 - Validation Loss: 0.8822856886046273
Epoch 6/1000 - Training Loss: 0.8106073471052306
Epoch 6/1000 - Validation Loss: 0.8444322092192513
Epoch 7/1000 - Training Loss: 0.7881485016218254
Epoch 7/1000 - Validation Loss: 0.9458974940436227
Epoch 8/1000 - Training Loss: 0.7639348943318639
Epoch 8/1000 - Validation Loss: 0.7708748493875776
Epoch 9/1000 - Training Loss: 0.7454659901559353
Epoch 9/1000 - Validation Loss: 0.8217512539454869
Epoch 10/1000 - Training Loss: 0.7215355506965092
Epoch 10/1000 - Validation Loss: 0.8807088903018406

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3678675826106752
Epoch 1/1000 - Validation Loss: 1.3457399266106742
Epoch 2/1000 - Training Loss: 1.3329213155167443
Epoch 2/1000 - Validation Loss: 1.30826279095241
Epoch 3/1000 - Training Loss: 1.3007274184908186
Epoch 3/1000 - Validation Loss: 1.2735490117754256
Epoch 4/1000 - Training Loss: 1.2712097764015198
Epoch 4/1000 - Validation Loss: 1.2420534917286463
Epoch 5/1000 - Training Loss: 1.244443757193429
Epoch 5/1000 - Validation Loss: 1.2134545019694738
Epoch 6/1000 - Training Loss: 1.220761435372489
Epoch 6/1000 - Validation Loss: 1.1867001397269112
Epoch 7/1000 - Training Loss: 1.19906171304839
Epoch 7/1000 - Validation Loss: 1.1633864300591605
Epoch 8/1000 - Training Loss: 1.1802071247782027
Epoch 8/1000 - Validation Loss: 1.1418646574020386
Epoch 9/1000 - Training Loss: 1.1626462627734457
Epoch 9/1000 - Validation Loss: 1.1227311491966248
Epoch 10/1000 - Training Loss: 1.147869742342404
Epoch 10/1000 - Validation Loss: 1.1059716854776656
Epoch

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.3694627348865782
Epoch 1/1000 - Validation Loss: 1.346559797014509
Epoch 2/1000 - Training Loss: 1.3348701851708549
Epoch 2/1000 - Validation Loss: 1.3091272967202323
Epoch 3/1000 - Training Loss: 1.302840686270169
Epoch 3/1000 - Validation Loss: 1.2745740073067802
Epoch 4/1000 - Training Loss: 1.2733882133449828
Epoch 4/1000 - Validation Loss: 1.2433036736079626
Epoch 5/1000 - Training Loss: 1.2475782930850983
Epoch 5/1000 - Validation Loss: 1.2143546342849731
Epoch 6/1000 - Training Loss: 1.2234615428107125
Epoch 6/1000 - Validation Loss: 1.188168968473162
Epoch 7/1000 - Training Loss: 1.2015619554689951
Epoch 7/1000 - Validation Loss: 1.164895841053554
Epoch 8/1000 - Training Loss: 1.1818956200565611
Epoch 8/1000 - Validation Loss: 1.1429025956562586
Epoch 9/1000 - Training Loss: 1.164649756891387
Epoch 9/1000 - Validation Loss: 1.1238336733409338
Epoch 10/1000 - Training Loss: 1.1494029760360718
Epoch 10/1000 - Validation Loss: 1.1061903323446
Epoch 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.3719541771071297
Epoch 1/1000 - Validation Loss: 1.3489059039524622
Epoch 2/1000 - Training Loss: 1.3369958294289452
Epoch 2/1000 - Validation Loss: 1.3116373334612166
Epoch 3/1000 - Training Loss: 1.3049004908118929
Epoch 3/1000 - Validation Loss: 1.2772554670061385
Epoch 4/1000 - Training Loss: 1.2757017655032021
Epoch 4/1000 - Validation Loss: 1.2460966791425432
Epoch 5/1000 - Training Loss: 1.249575287103653
Epoch 5/1000 - Validation Loss: 1.2171969243458338
Epoch 6/1000 - Training Loss: 1.225519597530365
Epoch 6/1000 - Validation Loss: 1.1910980258669173
Epoch 7/1000 - Training Loss: 1.2040148419993264
Epoch 7/1000 - Validation Loss: 1.1668195554188319
Epoch 8/1000 - Training Loss: 1.1839404659611839
Epoch 8/1000 - Validation Loss: 1.146304658481053
Epoch 9/1000 - Training Loss: 1.1668378338217735
Epoch 9/1000 - Validation Loss: 1.1264843429837907
Epoch 10/1000 - Training Loss: 1.1519391675080572
Epoch 10/1000 - Validation Loss: 1.1087499856948853
E

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.367193334868976
Epoch 1/1000 - Validation Loss: 1.3451264245169503
Epoch 2/1000 - Training Loss: 1.332215170775141
Epoch 2/1000 - Validation Loss: 1.3079540048326765
Epoch 3/1000 - Training Loss: 1.3005724868604116
Epoch 3/1000 - Validation Loss: 1.273998430797032
Epoch 4/1000 - Training Loss: 1.271752574614116
Epoch 4/1000 - Validation Loss: 1.241897668157305
Epoch 5/1000 - Training Loss: 1.2452910074165888
Epoch 5/1000 - Validation Loss: 1.2137842178344727
Epoch 6/1000 - Training Loss: 1.2221065951245171
Epoch 6/1000 - Validation Loss: 1.1870623145784651
Epoch 7/1000 - Training Loss: 1.1999733043568475
Epoch 7/1000 - Validation Loss: 1.1640618188040597
Epoch 8/1000 - Training Loss: 1.1814669136490141
Epoch 8/1000 - Validation Loss: 1.1425334044865199
Epoch 9/1000 - Training Loss: 1.1639749705791473
Epoch 9/1000 - Validation Loss: 1.123564030442919
Epoch 10/1000 - Training Loss: 1.1487741385187422
Epoch 10/1000 - Validation Loss: 1.1061015639986311
Epoc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.367909461259842
Epoch 1/1000 - Validation Loss: 1.3468393768583025
Epoch 2/1000 - Training Loss: 1.3333286323717661
Epoch 2/1000 - Validation Loss: 1.3102714163916451
Epoch 3/1000 - Training Loss: 1.3014954520123345
Epoch 3/1000 - Validation Loss: 1.2758963789258684
Epoch 4/1000 - Training Loss: 1.2733687588146754
Epoch 4/1000 - Validation Loss: 1.2444442680903844
Epoch 5/1000 - Training Loss: 1.2466811324868883
Epoch 5/1000 - Validation Loss: 1.2166331154959542
Epoch 6/1000 - Training Loss: 1.2231871911457606
Epoch 6/1000 - Validation Loss: 1.190434353692191
Epoch 7/1000 - Training Loss: 1.202137759753636
Epoch 7/1000 - Validation Loss: 1.1667632375444685
Epoch 8/1000 - Training Loss: 1.1831863863127572
Epoch 8/1000 - Validation Loss: 1.1453818423407418
Epoch 9/1000 - Training Loss: 1.1654801272920199
Epoch 9/1000 - Validation Loss: 1.1262806398527963
Epoch 10/1000 - Training Loss: 1.1507030172007424
Epoch 10/1000 - Validation Loss: 1.1090389319828577
E

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.3542797714471817
Epoch 1/1000 - Validation Loss: 1.3304969412939889
Epoch 2/1000 - Training Loss: 1.3200593463012151
Epoch 2/1000 - Validation Loss: 1.294840931892395
Epoch 3/1000 - Training Loss: 1.289409081850733
Epoch 3/1000 - Validation Loss: 1.2609183277402605
Epoch 4/1000 - Training Loss: 1.2616502791643143
Epoch 4/1000 - Validation Loss: 1.2297469547816686
Epoch 5/1000 - Training Loss: 1.2358872592449188
Epoch 5/1000 - Validation Loss: 1.2026441267558508
Epoch 6/1000 - Training Loss: 1.212911810193743
Epoch 6/1000 - Validation Loss: 1.1775644506726946
Epoch 7/1000 - Training Loss: 1.1923219689301081
Epoch 7/1000 - Validation Loss: 1.1547014032091414
Epoch 8/1000 - Training Loss: 1.1737427860498428
Epoch 8/1000 - Validation Loss: 1.1339802145957947
Epoch 9/1000 - Training Loss: 1.1587145839418684
Epoch 9/1000 - Validation Loss: 1.115404256752559
Epoch 10/1000 - Training Loss: 1.143590013895716
Epoch 10/1000 - Validation Loss: 1.0993403792381287
Epo

## Bert Runs

In [8]:
from itertools import product
from sklearn.metrics import classification_report

experiment_settings = {
        "embed_model": [RoBERTaWordEmbeddings, BERTWordEmbeddings, ALBERTWordEmbeddings],  
        "polar_dimension": [ 786, 1586], 
        "WordPolarity_method": ["base-change", "projection"], 
        "PoemSentimentDataset_method": ["avg", "cls"], 
        "layer": [2, 3, 4, 5], 
        "avg_embed": [True]
    }
# Create a list of lists containing values for each setting
setting_values = [values for values in experiment_settings.values()]

# Iterate through all combinations of experiment settings
for setting_combination in product(*setting_values):
    setting = {
        key: value for key, value in zip(experiment_settings.keys(), setting_combination)
    }
    print(setting)
    with io.capture_output() as captured:
        # Extract the values from the current setting
        embed_model = setting["embed_model"]
        polar_dimension = setting["polar_dimension"]
        WordPolarity_method = setting["WordPolarity_method"]
        PoemSentimentDataset_method = setting["PoemSentimentDataset_method"]
        layer = setting["layer"]
        avg_embed = setting["avg_embed"]
        
        print('Setting', embed_model, polar_dimension, WordPolarity_method, PoemSentimentDataset_method, layer)
        dataset = load_dataset("poem_sentiment")
        out_path = './antonyms/'
        antonym_path = "data/polars_all_combined.xlsx"
        embed_model = embed_model(layer=layer, avg_layers=avg_embed)
        
        dictionary = Dictionary('wordnet', api_key='')    
        lookupSpace = LookupCreator(dictionary, out_path, antonyms_file_path=antonym_path)
        lookupSpace.create_lookup_files()
        antonym_path = out_path + "polar_dimensions.pkl"

        pdc = PolarDimensions(embed_model, antonym_path=out_path + "antonym_wordnet_example_sentences_readable_extended.txt")
        pdc.create_polar_dimensions(out_path,"/polar_dimensions.pkl" )

        wp = WordPolarity(embed_model, antonym_path=antonym_path, method=WordPolarity_method)
        num_classes = 4

    # Define your model
    sensepolar_model = PolarEmbeddingClassifier(num_classes=num_classes, polar_dimension=polar_dimension, model_name=f'sense_polar_{embed_model.model_name}_dim{polar_dimension}_{WordPolarity_method}_{PoemSentimentDataset_method}_layer{layer}')


    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    sensepolar_model.to(device)

    optimizer = torch.optim.AdamW(sensepolar_model.parameters(), lr=1e-3)
    loss_fn = torch.nn.CrossEntropyLoss()

    num_epochs = 1000
    patience = 50

    preprocess_text = lambda verse: re.sub(r'\W+', ' ', re.sub(r'_([^_]+)_', r'\1', verse))
    train_texts = [preprocess_text(verse) for verse in dataset["train"]["verse_text"]]
    test_texts = [preprocess_text(verse) for verse in dataset["test"]["verse_text"]]
    valid_texts = [preprocess_text(verse) for verse in dataset["validation"]["verse_text"]]
    train_labels = dataset["train"]["label"]
    test_labels = dataset["test"]["label"]
    valid_labels = dataset["validation"]["label"]

    # train_texts_filtered = []
    # train_labels_filtered = []
    # for text, label in zip(train_texts, train_labels):
    #     if label != 3:
    #         train_texts_filtered.append(text)
    #         train_labels_filtered.append(label)`

    train_dataset = PoemSentimentDataset(train_texts, train_labels, wp, method=PoemSentimentDataset_method, dimension=polar_dimension)
    valid_dataset = PoemSentimentDataset(valid_texts, valid_labels, wp, dimension=polar_dimension, method=PoemSentimentDataset_method)
    test_dataset = PoemSentimentDataset(test_texts, test_labels, wp, dimension=polar_dimension, method=PoemSentimentDataset_method)

    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

    sensepolar_model.train_model(train_loader, valid_loader, num_epochs, patience, optimizer, loss_fn, device)

    sensepolar_model.test_model(test_loader, loss_fn, device)

{'embed_model': <class 'sensepolar.embed.bertEmbed.BERTWordEmbeddings'>, 'polar_dimension': 786, 'WordPolarity_method': 'base-change', 'PoemSentimentDataset_method': 'avg', 'layer': 2}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3687065882342202
Epoch 1/1000 - Validation Loss: 1.346116542816162
Epoch 2/1000 - Training Loss: 1.3339994528463908
Epoch 2/1000 - Validation Loss: 1.3089594841003418
Epoch 3/1000 - Training Loss: 1.30201789523874
Epoch 3/1000 - Validation Loss: 1.2746562106268746
Epoch 4/1000 - Training Loss: 1.2721277879817146
Epoch 4/1000 - Validation Loss: 1.2432794741221838
Epoch 5/1000 - Training Loss: 1.2460773821387972
Epoch 5/1000 - Validation Loss: 1.2142359529222762
Epoch 6/1000 - Training Loss: 1.221897570150239
Epoch 6/1000 - Validation Loss: 1.187694753919329
Epoch 7/1000 - Training Loss: 1.2009116794381822
Epoch 7/1000 - Validation Loss: 1.1636891875948225
Epoch 8/1000 - Training Loss: 1.1808439024857111
Epoch 8/1000 - Validation Loss: 1.1427658115114485
Epoch 9/1000 - Training Loss: 1.1638422438076563
Epoch 9/1000 - Validation Loss: 1.123573797089713
Epoch 10/1000 - Training Loss: 1.1499734286751067
Epoch 10/1000 - Validation Loss: 1.1060173681804113
Epoc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.3711503722837992
Epoch 1/1000 - Validation Loss: 1.3481003727231706
Epoch 2/1000 - Training Loss: 1.3358537554740906
Epoch 2/1000 - Validation Loss: 1.310742906161717
Epoch 3/1000 - Training Loss: 1.3033654178891863
Epoch 3/1000 - Validation Loss: 1.2763084173202515
Epoch 4/1000 - Training Loss: 1.2742787897586823
Epoch 4/1000 - Validation Loss: 1.244286230632237
Epoch 5/1000 - Training Loss: 1.2470363633973258
Epoch 5/1000 - Validation Loss: 1.215685316494533
Epoch 6/1000 - Training Loss: 1.2231273182800837
Epoch 6/1000 - Validation Loss: 1.1892551353999548
Epoch 7/1000 - Training Loss: 1.2021798023155756
Epoch 7/1000 - Validation Loss: 1.1650667360850744
Epoch 8/1000 - Training Loss: 1.1826469153165817
Epoch 8/1000 - Validation Loss: 1.1441071374075753
Epoch 9/1000 - Training Loss: 1.1656023902552468
Epoch 9/1000 - Validation Loss: 1.124387868813106
Epoch 10/1000 - Training Loss: 1.1502561505351747
Epoch 10/1000 - Validation Loss: 1.107170752116612
Epo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.3772491259234292
Epoch 1/1000 - Validation Loss: 1.3563461303710938
Epoch 2/1000 - Training Loss: 1.341656744480133
Epoch 2/1000 - Validation Loss: 1.3178029230662756
Epoch 3/1000 - Training Loss: 1.3090022844927651
Epoch 3/1000 - Validation Loss: 1.2832272733960832
Epoch 4/1000 - Training Loss: 1.2793166658708028
Epoch 4/1000 - Validation Loss: 1.2510940006801061
Epoch 5/1000 - Training Loss: 1.252024463244847
Epoch 5/1000 - Validation Loss: 1.221992083958217
Epoch 6/1000 - Training Loss: 1.2279319699321474
Epoch 6/1000 - Validation Loss: 1.1951057059424264
Epoch 7/1000 - Training Loss: 1.2058564977986472
Epoch 7/1000 - Validation Loss: 1.1710830926895142
Epoch 8/1000 - Training Loss: 1.185962286378656
Epoch 8/1000 - Validation Loss: 1.1493454660688127
Epoch 9/1000 - Training Loss: 1.1692494515861784
Epoch 9/1000 - Validation Loss: 1.1298035723822457
Epoch 10/1000 - Training Loss: 1.153059075985636
Epoch 10/1000 - Validation Loss: 1.1118129066058569
Epo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.3760203740426473
Epoch 1/1000 - Validation Loss: 1.3545490503311157
Epoch 2/1000 - Training Loss: 1.3405693237270628
Epoch 2/1000 - Validation Loss: 1.3168202468327113
Epoch 3/1000 - Training Loss: 1.3086130108152116
Epoch 3/1000 - Validation Loss: 1.2820715393338884
Epoch 4/1000 - Training Loss: 1.2794420314686639
Epoch 4/1000 - Validation Loss: 1.2502095018114363
Epoch 5/1000 - Training Loss: 1.2518829724618368
Epoch 5/1000 - Validation Loss: 1.221221685409546
Epoch 6/1000 - Training Loss: 1.228557220527104
Epoch 6/1000 - Validation Loss: 1.1944846425737654
Epoch 7/1000 - Training Loss: 1.2062441600220544
Epoch 7/1000 - Validation Loss: 1.1709914548056466
Epoch 8/1000 - Training Loss: 1.1860726433140891
Epoch 8/1000 - Validation Loss: 1.1489599432264055
Epoch 9/1000 - Training Loss: 1.1692285218409129
Epoch 9/1000 - Validation Loss: 1.1295266321727209
Epoch 10/1000 - Training Loss: 1.15421134020601
Epoch 10/1000 - Validation Loss: 1.1115660241671972
Ep

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.3714849863733565
Epoch 1/1000 - Validation Loss: 1.3501717022487096
Epoch 2/1000 - Training Loss: 1.3369027950934
Epoch 2/1000 - Validation Loss: 1.3130714212145125
Epoch 3/1000 - Training Loss: 1.304731628724507
Epoch 3/1000 - Validation Loss: 1.2791233914239066
Epoch 4/1000 - Training Loss: 1.2758101778370994
Epoch 4/1000 - Validation Loss: 1.24747257573264
Epoch 5/1000 - Training Loss: 1.249598509499005
Epoch 5/1000 - Validation Loss: 1.2187639134270805
Epoch 6/1000 - Training Loss: 1.225027124796595
Epoch 6/1000 - Validation Loss: 1.1929534673690796
Epoch 7/1000 - Training Loss: 1.203946413738387
Epoch 7/1000 - Validation Loss: 1.1689576591764177
Epoch 8/1000 - Training Loss: 1.1842839462416512
Epoch 8/1000 - Validation Loss: 1.147429176739284
Epoch 9/1000 - Training Loss: 1.1671507986528533
Epoch 9/1000 - Validation Loss: 1.1276258145059859
Epoch 10/1000 - Training Loss: 1.1518396266869135
Epoch 10/1000 - Validation Loss: 1.1100743838718958
Epoch 11

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.3628105138029372
Epoch 1/1000 - Validation Loss: 1.3380260637828283
Epoch 2/1000 - Training Loss: 1.3283655835049493
Epoch 2/1000 - Validation Loss: 1.3016379560743059
Epoch 3/1000 - Training Loss: 1.2970311450106757
Epoch 3/1000 - Validation Loss: 1.2673460756029402
Epoch 4/1000 - Training Loss: 1.2682268981422697
Epoch 4/1000 - Validation Loss: 1.23636794090271
Epoch 5/1000 - Training Loss: 1.2417782566377096
Epoch 5/1000 - Validation Loss: 1.2081843784877233
Epoch 6/1000 - Training Loss: 1.2182704678603582
Epoch 6/1000 - Validation Loss: 1.1824253797531128
Epoch 7/1000 - Training Loss: 1.1970142147370748
Epoch 7/1000 - Validation Loss: 1.158861449786595
Epoch 8/1000 - Training Loss: 1.1783440560102463
Epoch 8/1000 - Validation Loss: 1.1376943247658866
Epoch 9/1000 - Training Loss: 1.161132570888315
Epoch 9/1000 - Validation Loss: 1.1189123903002058
Epoch 10/1000 - Training Loss: 1.1471991805093629
Epoch 10/1000 - Validation Loss: 1.101686247757503
Epo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.3586206968341554
Epoch 1/1000 - Validation Loss: 1.3364608798708235
Epoch 2/1000 - Training Loss: 1.3243921654564994
Epoch 2/1000 - Validation Loss: 1.2998400926589966
Epoch 3/1000 - Training Loss: 1.2933091606412614
Epoch 3/1000 - Validation Loss: 1.266099521092006
Epoch 4/1000 - Training Loss: 1.2649491131305695
Epoch 4/1000 - Validation Loss: 1.2354732751846313
Epoch 5/1000 - Training Loss: 1.2393004489796502
Epoch 5/1000 - Validation Loss: 1.2079133306230818
Epoch 6/1000 - Training Loss: 1.2164417654275894
Epoch 6/1000 - Validation Loss: 1.1826144286564417
Epoch 7/1000 - Training Loss: 1.1952595987490244
Epoch 7/1000 - Validation Loss: 1.1595665557043893
Epoch 8/1000 - Training Loss: 1.1776208749839239
Epoch 8/1000 - Validation Loss: 1.1388361198561532
Epoch 9/1000 - Training Loss: 1.160638420709542
Epoch 9/1000 - Validation Loss: 1.1201676385743278
Epoch 10/1000 - Training Loss: 1.1457339716809136
Epoch 10/1000 - Validation Loss: 1.1032529132706779


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.3764674599681581
Epoch 1/1000 - Validation Loss: 1.3556444474628992
Epoch 2/1000 - Training Loss: 1.3411293178796768
Epoch 2/1000 - Validation Loss: 1.317342962537493
Epoch 3/1000 - Training Loss: 1.3084409194333213
Epoch 3/1000 - Validation Loss: 1.282970939363752
Epoch 4/1000 - Training Loss: 1.278976559638977
Epoch 4/1000 - Validation Loss: 1.2506360156195504
Epoch 5/1000 - Training Loss: 1.252054968050548
Epoch 5/1000 - Validation Loss: 1.2214389869144984
Epoch 6/1000 - Training Loss: 1.2279598606484277
Epoch 6/1000 - Validation Loss: 1.1950511932373047
Epoch 7/1000 - Training Loss: 1.2058266869613103
Epoch 7/1000 - Validation Loss: 1.1709001404898507
Epoch 8/1000 - Training Loss: 1.186614751815796
Epoch 8/1000 - Validation Loss: 1.1490411758422852
Epoch 9/1000 - Training Loss: 1.168304582791669
Epoch 9/1000 - Validation Loss: 1.1297619427953447
Epoch 10/1000 - Training Loss: 1.1533816999622755
Epoch 10/1000 - Validation Loss: 1.1119587932314192
Epoc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.0382470062800817
Epoch 1/1000 - Validation Loss: 0.7931533839021411
Epoch 2/1000 - Training Loss: 0.8074067575590951
Epoch 2/1000 - Validation Loss: 0.7544539741107396
Epoch 3/1000 - Training Loss: 0.7109145208128861
Epoch 3/1000 - Validation Loss: 0.7494803071022034
Epoch 4/1000 - Training Loss: 0.6512489968112537
Epoch 4/1000 - Validation Loss: 0.7039219396454948
Epoch 5/1000 - Training Loss: 0.596740755119494
Epoch 5/1000 - Validation Loss: 0.7050164597375053
Epoch 6/1000 - Training Loss: 0.567360972453441
Epoch 6/1000 - Validation Loss: 0.6851641280310494
Epoch 7/1000 - Training Loss: 0.5184374140309436
Epoch 7/1000 - Validation Loss: 0.7141699280057635
Epoch 8/1000 - Training Loss: 0.4936973298234599
Epoch 8/1000 - Validation Loss: 0.69862471308027
Epoch 9/1000 - Training Loss: 0.46538667167936054
Epoch 9/1000 - Validation Loss: 0.7051998036248344
Epoch 10/1000 - Training Loss: 0.4539184056754623
Epoch 10/1000 - Validation Loss: 0.7267585694789886
E

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 0.9606169749583516
Epoch 1/1000 - Validation Loss: 0.7861336043902806
Epoch 2/1000 - Training Loss: 0.74026577600411
Epoch 2/1000 - Validation Loss: 0.7359270623752049
Epoch 3/1000 - Training Loss: 0.6456727321658816
Epoch 3/1000 - Validation Loss: 0.7089455681187766
Epoch 4/1000 - Training Loss: 0.5860293763024467
Epoch 4/1000 - Validation Loss: 0.7135340017931802
Epoch 5/1000 - Training Loss: 0.5470098698777812
Epoch 5/1000 - Validation Loss: 0.6991671281201499
Epoch 6/1000 - Training Loss: 0.49553373402782847
Epoch 6/1000 - Validation Loss: 0.7039084008761815
Epoch 7/1000 - Training Loss: 0.4712852691965444
Epoch 7/1000 - Validation Loss: 0.7078481273991721
Epoch 8/1000 - Training Loss: 0.4394949860870838
Epoch 8/1000 - Validation Loss: 0.6818208183561053
Epoch 9/1000 - Training Loss: 0.42370227059083326
Epoch 9/1000 - Validation Loss: 0.6983143091201782
Epoch 10/1000 - Training Loss: 0.3869033860308783
Epoch 10/1000 - Validation Loss: 0.699707648583820

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 0.9880476295948029
Epoch 1/1000 - Validation Loss: 0.7634026748793465
Epoch 2/1000 - Training Loss: 0.7707823057259832
Epoch 2/1000 - Validation Loss: 0.6987759726388114
Epoch 3/1000 - Training Loss: 0.680271645209619
Epoch 3/1000 - Validation Loss: 0.6748430132865906
Epoch 4/1000 - Training Loss: 0.6158411242067814
Epoch 4/1000 - Validation Loss: 0.6528174153396061
Epoch 5/1000 - Training Loss: 0.5672959094601018
Epoch 5/1000 - Validation Loss: 0.6518493677888598
Epoch 6/1000 - Training Loss: 0.5313744827040604
Epoch 6/1000 - Validation Loss: 0.6643095782824925
Epoch 7/1000 - Training Loss: 0.49948458799294065
Epoch 7/1000 - Validation Loss: 0.6138473664011274
Epoch 8/1000 - Training Loss: 0.46168446594050955
Epoch 8/1000 - Validation Loss: 0.6409060401575906
Epoch 9/1000 - Training Loss: 0.4334505461156368
Epoch 9/1000 - Validation Loss: 0.6222259657723563
Epoch 10/1000 - Training Loss: 0.4236058435801949
Epoch 10/1000 - Validation Loss: 0.63178712555340

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.0231694355607033
Epoch 1/1000 - Validation Loss: 0.8259134633200509
Epoch 2/1000 - Training Loss: 0.7986924307686942
Epoch 2/1000 - Validation Loss: 0.7536244903291974
Epoch 3/1000 - Training Loss: 0.6998245274381978
Epoch 3/1000 - Validation Loss: 0.7098367639950344
Epoch 4/1000 - Training Loss: 0.6274982967547008
Epoch 4/1000 - Validation Loss: 0.6812210466180529
Epoch 5/1000 - Training Loss: 0.5908688386636121
Epoch 5/1000 - Validation Loss: 0.6680071907384055
Epoch 6/1000 - Training Loss: 0.5433304073022944
Epoch 6/1000 - Validation Loss: 0.6729491353034973
Epoch 7/1000 - Training Loss: 0.5104285773954221
Epoch 7/1000 - Validation Loss: 0.660273517881121
Epoch 8/1000 - Training Loss: 0.48945227212139536
Epoch 8/1000 - Validation Loss: 0.6560392592634473
Epoch 9/1000 - Training Loss: 0.45648388138839174
Epoch 9/1000 - Validation Loss: 0.6554496969495501
Epoch 10/1000 - Training Loss: 0.4310090424759047
Epoch 10/1000 - Validation Loss: 0.67495166403906

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.0378480681351252
Epoch 1/1000 - Validation Loss: 0.8275419516222817
Epoch 2/1000 - Training Loss: 0.9012069489274707
Epoch 2/1000 - Validation Loss: 0.8055880623204368
Epoch 3/1000 - Training Loss: 0.8451766499451229
Epoch 3/1000 - Validation Loss: 0.7993462256022862
Epoch 4/1000 - Training Loss: 0.7839318716100284
Epoch 4/1000 - Validation Loss: 0.7773549471582685
Epoch 5/1000 - Training Loss: 0.7473664986235755
Epoch 5/1000 - Validation Loss: 0.736797673361642
Epoch 6/1000 - Training Loss: 0.7135540752538613
Epoch 6/1000 - Validation Loss: 0.7775297335215977
Epoch 7/1000 - Training Loss: 0.661503440035241
Epoch 7/1000 - Validation Loss: 0.7513765820435115
Epoch 8/1000 - Training Loss: 0.6427839643188885
Epoch 8/1000 - Validation Loss: 0.7836622510637555
Epoch 9/1000 - Training Loss: 0.6189150677195617
Epoch 9/1000 - Validation Loss: 0.7569208059992109
Epoch 10/1000 - Training Loss: 0.596300653048924
Epoch 10/1000 - Validation Loss: 0.7541131121771676
E

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.04338995047978
Epoch 1/1000 - Validation Loss: 0.8590771811349052
Epoch 2/1000 - Training Loss: 0.923254560147013
Epoch 2/1000 - Validation Loss: 0.8539381197520665
Epoch 3/1000 - Training Loss: 0.8483611504946437
Epoch 3/1000 - Validation Loss: 0.8181122967175075
Epoch 4/1000 - Training Loss: 0.791712256946734
Epoch 4/1000 - Validation Loss: 0.7514433179582868
Epoch 5/1000 - Training Loss: 0.7601525565343243
Epoch 5/1000 - Validation Loss: 0.7364406500543866
Epoch 6/1000 - Training Loss: 0.7194481220628534
Epoch 6/1000 - Validation Loss: 0.7298445275851658
Epoch 7/1000 - Training Loss: 0.6851806427751269
Epoch 7/1000 - Validation Loss: 0.7392476711954389
Epoch 8/1000 - Training Loss: 0.6756464945418494
Epoch 8/1000 - Validation Loss: 0.7039381521088737
Epoch 9/1000 - Training Loss: 0.6440280031945024
Epoch 9/1000 - Validation Loss: 0.718907756464822
Epoch 10/1000 - Training Loss: 0.6076678426137992
Epoch 10/1000 - Validation Loss: 0.6900216851915631
Epo

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.120212649660451
Epoch 1/1000 - Validation Loss: 0.8871706042970929
Epoch 2/1000 - Training Loss: 0.9762188792228699
Epoch 2/1000 - Validation Loss: 0.8370033587728228
Epoch 3/1000 - Training Loss: 0.9167240251387868
Epoch 3/1000 - Validation Loss: 0.8210515975952148
Epoch 4/1000 - Training Loss: 0.8614382179720061
Epoch 4/1000 - Validation Loss: 0.818173348903656
Epoch 5/1000 - Training Loss: 0.8194118929760796
Epoch 5/1000 - Validation Loss: 0.8071478009223938
Epoch 6/1000 - Training Loss: 0.7876670206231731
Epoch 6/1000 - Validation Loss: 0.7949567777769906
Epoch 7/1000 - Training Loss: 0.7451818010636738
Epoch 7/1000 - Validation Loss: 0.8250374623707363
Epoch 8/1000 - Training Loss: 0.7298719877643245
Epoch 8/1000 - Validation Loss: 0.8189375741141183
Epoch 9/1000 - Training Loss: 0.7010337593300002
Epoch 9/1000 - Validation Loss: 0.78679906470435
Epoch 10/1000 - Training Loss: 0.687350661626884
Epoch 10/1000 - Validation Loss: 0.7969193884304592
Epo

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.0730523339339666
Epoch 1/1000 - Validation Loss: 0.8745658567973545
Epoch 2/1000 - Training Loss: 0.9658876487186977
Epoch 2/1000 - Validation Loss: 0.8308116112436567
Epoch 3/1000 - Training Loss: 0.9028242900967598
Epoch 3/1000 - Validation Loss: 0.792981241430555
Epoch 4/1000 - Training Loss: 0.8679277268903596
Epoch 4/1000 - Validation Loss: 0.7968008688517979
Epoch 5/1000 - Training Loss: 0.8381660133600235
Epoch 5/1000 - Validation Loss: 0.8087129933493478
Epoch 6/1000 - Training Loss: 0.8033350918974195
Epoch 6/1000 - Validation Loss: 0.8079553927694049
Epoch 7/1000 - Training Loss: 0.7862485429005963
Epoch 7/1000 - Validation Loss: 0.7776675394603184
Epoch 8/1000 - Training Loss: 0.7514584091092859
Epoch 8/1000 - Validation Loss: 0.7826827849660601
Epoch 9/1000 - Training Loss: 0.7324927992054394
Epoch 9/1000 - Validation Loss: 0.8239034925188337
Epoch 10/1000 - Training Loss: 0.7164599943373885
Epoch 10/1000 - Validation Loss: 0.7862372313226972

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3659771233797073
Epoch 1/1000 - Validation Loss: 1.3442609650748116
Epoch 2/1000 - Training Loss: 1.3310929238796234
Epoch 2/1000 - Validation Loss: 1.3064171757016863
Epoch 3/1000 - Training Loss: 1.299623280763626
Epoch 3/1000 - Validation Loss: 1.27255083833422
Epoch 4/1000 - Training Loss: 1.2707820875304086
Epoch 4/1000 - Validation Loss: 1.2416625704084123
Epoch 5/1000 - Training Loss: 1.2444057336875372
Epoch 5/1000 - Validation Loss: 1.2133270502090454
Epoch 6/1000 - Training Loss: 1.2209517785481043
Epoch 6/1000 - Validation Loss: 1.1873480081558228
Epoch 7/1000 - Training Loss: 1.1997679727418082
Epoch 7/1000 - Validation Loss: 1.1637520960399084
Epoch 8/1000 - Training Loss: 1.180942177772522
Epoch 8/1000 - Validation Loss: 1.1431191648755754
Epoch 9/1000 - Training Loss: 1.164514313851084
Epoch 9/1000 - Validation Loss: 1.1237244776317052
Epoch 10/1000 - Training Loss: 1.1495375782251358
Epoch 10/1000 - Validation Loss: 1.1061605981418066
Epo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'embed_model': <class 'sensepolar.embed.bertEmbed.BERTWordEmbeddings'>, 'polar_dimension': 1586, 'WordPolarity_method': 'base-change', 'PoemSentimentDataset_method': 'avg', 'layer': 3}


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.3609769025019236
Epoch 1/1000 - Validation Loss: 1.3375920908791679
Epoch 2/1000 - Training Loss: 1.3263332737343652
Epoch 2/1000 - Validation Loss: 1.301573497908456
Epoch 3/1000 - Training Loss: 1.2951959648302622
Epoch 3/1000 - Validation Loss: 1.2674963474273682
Epoch 4/1000 - Training Loss: 1.2665612527302332
Epoch 4/1000 - Validation Loss: 1.236663818359375
Epoch 5/1000 - Training Loss: 1.241139646087374
Epoch 5/1000 - Validation Loss: 1.2077793223517281
Epoch 6/1000 - Training Loss: 1.2176805308886938
Epoch 6/1000 - Validation Loss: 1.1825347798211234
Epoch 7/1000 - Training Loss: 1.1959597042628698
Epoch 7/1000 - Validation Loss: 1.159471869468689
Epoch 8/1000 - Training Loss: 1.1776783019304276
Epoch 8/1000 - Validation Loss: 1.1381065249443054
Epoch 9/1000 - Training Loss: 1.1607348769903183
Epoch 9/1000 - Validation Loss: 1.1194628732545036
Epoch 10/1000 - Training Loss: 1.146326435463769
Epoch 10/1000 - Validation Loss: 1.1025542191096716
Epo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.372242386851992
Epoch 1/1000 - Validation Loss: 1.3496106692722865
Epoch 2/1000 - Training Loss: 1.3374986520835332
Epoch 2/1000 - Validation Loss: 1.3128595692770821
Epoch 3/1000 - Training Loss: 1.3057778179645538
Epoch 3/1000 - Validation Loss: 1.2776364428656442
Epoch 4/1000 - Training Loss: 1.276171169110707
Epoch 4/1000 - Validation Loss: 1.2463322026388985
Epoch 5/1000 - Training Loss: 1.249722182750702
Epoch 5/1000 - Validation Loss: 1.2173843724387032
Epoch 6/1000 - Training Loss: 1.2256313796554292
Epoch 6/1000 - Validation Loss: 1.1908128261566162
Epoch 7/1000 - Training Loss: 1.2039764395781927
Epoch 7/1000 - Validation Loss: 1.1668746982301985
Epoch 8/1000 - Training Loss: 1.184906440121787
Epoch 8/1000 - Validation Loss: 1.1448585646493095
Epoch 9/1000 - Training Loss: 1.1677313478929656
Epoch 9/1000 - Validation Loss: 1.1257383993693761
Epoch 10/1000 - Training Loss: 1.1509630967463766
Epoch 10/1000 - Validation Loss: 1.1091159241540092
Ep

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.3710067293473653
Epoch 1/1000 - Validation Loss: 1.348744000707354
Epoch 2/1000 - Training Loss: 1.3363110848835535
Epoch 2/1000 - Validation Loss: 1.3109921557562692
Epoch 3/1000 - Training Loss: 1.3040281300033842
Epoch 3/1000 - Validation Loss: 1.2771785599844796
Epoch 4/1000 - Training Loss: 1.2752427863223212
Epoch 4/1000 - Validation Loss: 1.245803679738726
Epoch 5/1000 - Training Loss: 1.249636682016509
Epoch 5/1000 - Validation Loss: 1.2166661024093628
Epoch 6/1000 - Training Loss: 1.2248375373227256
Epoch 6/1000 - Validation Loss: 1.191241707120623
Epoch 7/1000 - Training Loss: 1.203796767762729
Epoch 7/1000 - Validation Loss: 1.1671167612075806
Epoch 8/1000 - Training Loss: 1.1840578934976034
Epoch 8/1000 - Validation Loss: 1.1456591061183385
Epoch 9/1000 - Training Loss: 1.1671413949557714
Epoch 9/1000 - Validation Loss: 1.1259691630090987
Epoch 10/1000 - Training Loss: 1.1509361330951964
Epoch 10/1000 - Validation Loss: 1.1087910107203893
Epo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.3812647823776518
Epoch 1/1000 - Validation Loss: 1.3604627847671509
Epoch 2/1000 - Training Loss: 1.345740258693695
Epoch 2/1000 - Validation Loss: 1.3225714479173933
Epoch 3/1000 - Training Loss: 1.3131550465311324
Epoch 3/1000 - Validation Loss: 1.287287882396153
Epoch 4/1000 - Training Loss: 1.283017247915268
Epoch 4/1000 - Validation Loss: 1.2552849224635534
Epoch 5/1000 - Training Loss: 1.2559571244886942
Epoch 5/1000 - Validation Loss: 1.2256871632167272
Epoch 6/1000 - Training Loss: 1.231267633182662
Epoch 6/1000 - Validation Loss: 1.1989167077200753
Epoch 7/1000 - Training Loss: 1.208970274244036
Epoch 7/1000 - Validation Loss: 1.174629534993853
Epoch 8/1000 - Training Loss: 1.189609210406031
Epoch 8/1000 - Validation Loss: 1.1519135066441126
Epoch 9/1000 - Training Loss: 1.1722806032214845
Epoch 9/1000 - Validation Loss: 1.1321195278848921
Epoch 10/1000 - Training Loss: 1.1562604765806879
Epoch 10/1000 - Validation Loss: 1.1141405701637268
Epoch

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.3699802458286285
Epoch 1/1000 - Validation Loss: 1.3479522466659546
Epoch 2/1000 - Training Loss: 1.3347691467830114
Epoch 2/1000 - Validation Loss: 1.310598509652274
Epoch 3/1000 - Training Loss: 1.3023328014782496
Epoch 3/1000 - Validation Loss: 1.2761931419372559
Epoch 4/1000 - Training Loss: 1.273036886538778
Epoch 4/1000 - Validation Loss: 1.2442835058484758
Epoch 5/1000 - Training Loss: 1.2469928477491652
Epoch 5/1000 - Validation Loss: 1.215238298688616
Epoch 6/1000 - Training Loss: 1.223044944661004
Epoch 6/1000 - Validation Loss: 1.189346364566258
Epoch 7/1000 - Training Loss: 1.2014516038554055
Epoch 7/1000 - Validation Loss: 1.1657204117093767
Epoch 8/1000 - Training Loss: 1.1822593935898371
Epoch 8/1000 - Validation Loss: 1.1443942785263062
Epoch 9/1000 - Training Loss: 1.1652023898703712
Epoch 9/1000 - Validation Loss: 1.1247795820236206
Epoch 10/1000 - Training Loss: 1.150557549936431
Epoch 10/1000 - Validation Loss: 1.1073185546057565
Epoc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.3642444057124001
Epoch 1/1000 - Validation Loss: 1.3421077387673515
Epoch 2/1000 - Training Loss: 1.3290919129337584
Epoch 2/1000 - Validation Loss: 1.3054141317095076
Epoch 3/1000 - Training Loss: 1.297680982521602
Epoch 3/1000 - Validation Loss: 1.270716769354684
Epoch 4/1000 - Training Loss: 1.2691161568675722
Epoch 4/1000 - Validation Loss: 1.2401903356824602
Epoch 5/1000 - Training Loss: 1.2436000896351678
Epoch 5/1000 - Validation Loss: 1.211896436555045
Epoch 6/1000 - Training Loss: 1.2199372627905436
Epoch 6/1000 - Validation Loss: 1.1866851363863264
Epoch 7/1000 - Training Loss: 1.1990229615143366
Epoch 7/1000 - Validation Loss: 1.1630372149603707
Epoch 8/1000 - Training Loss: 1.1804213396140508
Epoch 8/1000 - Validation Loss: 1.141869374683925
Epoch 9/1000 - Training Loss: 1.1639222417558943
Epoch 9/1000 - Validation Loss: 1.1228298459734236
Epoch 10/1000 - Training Loss: 1.1479669830628805
Epoch 10/1000 - Validation Loss: 1.10601829630988
Epoc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.377624694790159
Epoch 1/1000 - Validation Loss: 1.3560027735573905
Epoch 2/1000 - Training Loss: 1.3423981730427061
Epoch 2/1000 - Validation Loss: 1.3185411521366663
Epoch 3/1000 - Training Loss: 1.309772408434323
Epoch 3/1000 - Validation Loss: 1.2833996500287737
Epoch 4/1000 - Training Loss: 1.2801901676825114
Epoch 4/1000 - Validation Loss: 1.2513629027775355
Epoch 5/1000 - Training Loss: 1.2531108898775918
Epoch 5/1000 - Validation Loss: 1.222085782459804
Epoch 6/1000 - Training Loss: 1.2287897361176354
Epoch 6/1000 - Validation Loss: 1.194657598223005
Epoch 7/1000 - Training Loss: 1.2066729643515177
Epoch 7/1000 - Validation Loss: 1.1712632519858224
Epoch 8/1000 - Training Loss: 1.1878730899521284
Epoch 8/1000 - Validation Loss: 1.1486207927976335
Epoch 9/1000 - Training Loss: 1.1695979599441801
Epoch 9/1000 - Validation Loss: 1.1297603590147836
Epoch 10/1000 - Training Loss: 1.1546799881117684
Epoch 10/1000 - Validation Loss: 1.1117132476397924
Ep

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 0.9786119471703257
Epoch 1/1000 - Validation Loss: 0.727473531450544
Epoch 2/1000 - Training Loss: 0.6991606769817216
Epoch 2/1000 - Validation Loss: 0.702954867056438
Epoch 3/1000 - Training Loss: 0.5812978909483978
Epoch 3/1000 - Validation Loss: 0.7374644577503204
Epoch 4/1000 - Training Loss: 0.5095998288265297
Epoch 4/1000 - Validation Loss: 0.6497335263660976
Epoch 5/1000 - Training Loss: 0.44529449274497374
Epoch 5/1000 - Validation Loss: 0.6589489238602775
Epoch 6/1000 - Training Loss: 0.4097842693861042
Epoch 6/1000 - Validation Loss: 0.6867408411843436
Epoch 7/1000 - Training Loss: 0.3661049905100039
Epoch 7/1000 - Validation Loss: 0.6387209040778024
Epoch 8/1000 - Training Loss: 0.33040323853492737
Epoch 8/1000 - Validation Loss: 0.6716400044304984
Epoch 9/1000 - Training Loss: 0.2977165201944964
Epoch 9/1000 - Validation Loss: 0.6878876686096191
Epoch 10/1000 - Training Loss: 0.2673964856990746
Epoch 10/1000 - Validation Loss: 0.682243049144744

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 0.9441700535161155
Epoch 1/1000 - Validation Loss: 0.7295813730784825
Epoch 2/1000 - Training Loss: 0.6680793410965374
Epoch 2/1000 - Validation Loss: 0.6756511926651001
Epoch 3/1000 - Training Loss: 0.5545043461024761
Epoch 3/1000 - Validation Loss: 0.6316660557474408
Epoch 4/1000 - Training Loss: 0.4836376418492624
Epoch 4/1000 - Validation Loss: 0.6571028275149209
Epoch 5/1000 - Training Loss: 0.41514281556010246
Epoch 5/1000 - Validation Loss: 0.6979772363390241
Epoch 6/1000 - Training Loss: 0.3672490260962929
Epoch 6/1000 - Validation Loss: 0.6580258139542171
Epoch 7/1000 - Training Loss: 0.32718194888106417
Epoch 7/1000 - Validation Loss: 0.686872307743345
Epoch 8/1000 - Training Loss: 0.2924465736640351
Epoch 8/1000 - Validation Loss: 0.6579795437199729
Epoch 9/1000 - Training Loss: 0.2712215900953327
Epoch 9/1000 - Validation Loss: 0.6610850947243827
Epoch 10/1000 - Training Loss: 0.24540862933333432
Epoch 10/1000 - Validation Loss: 0.6568746055875

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 0.9153036400675774
Epoch 1/1000 - Validation Loss: 0.673694406236921
Epoch 2/1000 - Training Loss: 0.6737674405532224
Epoch 2/1000 - Validation Loss: 0.6545232662132808
Epoch 3/1000 - Training Loss: 0.5695433127028602
Epoch 3/1000 - Validation Loss: 0.5932386687823704
Epoch 4/1000 - Training Loss: 0.4820867361766951
Epoch 4/1000 - Validation Loss: 0.6029486954212189
Epoch 5/1000 - Training Loss: 0.42662680548216614
Epoch 5/1000 - Validation Loss: 0.6144248119422367
Epoch 6/1000 - Training Loss: 0.38431171061737196
Epoch 6/1000 - Validation Loss: 0.6285028500216348
Epoch 7/1000 - Training Loss: 0.35573353671601843
Epoch 7/1000 - Validation Loss: 0.5821008639676231
Epoch 8/1000 - Training Loss: 0.31878881954721044
Epoch 8/1000 - Validation Loss: 0.5991185562951225
Epoch 9/1000 - Training Loss: 0.28510601366204874
Epoch 9/1000 - Validation Loss: 0.598476094858987
Epoch 10/1000 - Training Loss: 0.2691254989643182
Epoch 10/1000 - Validation Loss: 0.549652470009

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 0.950575624193464
Epoch 1/1000 - Validation Loss: 0.7662766064916339
Epoch 2/1000 - Training Loss: 0.680343568857227
Epoch 2/1000 - Validation Loss: 0.6668565315859658
Epoch 3/1000 - Training Loss: 0.5814039733793054
Epoch 3/1000 - Validation Loss: 0.6361481462206159
Epoch 4/1000 - Training Loss: 0.5057833112244096
Epoch 4/1000 - Validation Loss: 0.6295201012066433
Epoch 5/1000 - Training Loss: 0.45428397453257013
Epoch 5/1000 - Validation Loss: 0.6251155265739986
Epoch 6/1000 - Training Loss: 0.40961288208408014
Epoch 6/1000 - Validation Loss: 0.6202857834952218
Epoch 7/1000 - Training Loss: 0.3680616707674095
Epoch 7/1000 - Validation Loss: 0.611472168139049
Epoch 8/1000 - Training Loss: 0.3269783359553133
Epoch 8/1000 - Validation Loss: 0.6144480236939022
Epoch 9/1000 - Training Loss: 0.3016606838043247
Epoch 9/1000 - Validation Loss: 0.6278603630406516
Epoch 10/1000 - Training Loss: 0.27651751121240004
Epoch 10/1000 - Validation Loss: 0.610282314675194

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificatio

Epoch 1/1000 - Training Loss: 1.1200057129774774
Epoch 1/1000 - Validation Loss: 0.8207135541098458
Epoch 2/1000 - Training Loss: 0.8215120940336159
Epoch 2/1000 - Validation Loss: 0.7594207780701774
Epoch 3/1000 - Training Loss: 0.7383281482117516
Epoch 3/1000 - Validation Loss: 0.6778750249317714
Epoch 4/1000 - Training Loss: 0.6643748565443924
Epoch 4/1000 - Validation Loss: 0.7090329825878143
Epoch 5/1000 - Training Loss: 0.6163646898099354
Epoch 5/1000 - Validation Loss: 0.7158729732036591
Epoch 6/1000 - Training Loss: 0.5556150775934968
Epoch 6/1000 - Validation Loss: 0.673275317464556
Epoch 7/1000 - Training Loss: 0.5199338011443615
Epoch 7/1000 - Validation Loss: 0.7202696204185486
Epoch 8/1000 - Training Loss: 0.4973530332956995
Epoch 8/1000 - Validation Loss: 0.7284176094191415
Epoch 9/1000 - Training Loss: 0.46451878840369837
Epoch 9/1000 - Validation Loss: 0.6922540536948613
Epoch 10/1000 - Training Loss: 0.430939026975206
Epoch 10/1000 - Validation Loss: 0.7519956827163696

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.0587313590305192
Epoch 1/1000 - Validation Loss: 0.7873591695513044
Epoch 2/1000 - Training Loss: 0.8530951599989619
Epoch 2/1000 - Validation Loss: 0.7379601001739502
Epoch 3/1000 - Training Loss: 0.752700462937355
Epoch 3/1000 - Validation Loss: 0.7311263510159084
Epoch 4/1000 - Training Loss: 0.6970396212169102
Epoch 4/1000 - Validation Loss: 0.6638266316481999
Epoch 5/1000 - Training Loss: 0.6163423066692693
Epoch 5/1000 - Validation Loss: 0.7349078144345965
Epoch 6/1000 - Training Loss: 0.5765228063932487
Epoch 6/1000 - Validation Loss: 0.7559653222560883
Epoch 7/1000 - Training Loss: 0.5338163535509791
Epoch 7/1000 - Validation Loss: 0.7089369169303349
Epoch 8/1000 - Training Loss: 0.504579208791256
Epoch 8/1000 - Validation Loss: 0.7046127830232892
Epoch 9/1000 - Training Loss: 0.4752896193947111
Epoch 9/1000 - Validation Loss: 0.725817084312439
Epoch 10/1000 - Training Loss: 0.4279073016451938
Epoch 10/1000 - Validation Loss: 0.7053222869123731
E

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.0864843438778604
Epoch 1/1000 - Validation Loss: 0.995547924722944
Epoch 2/1000 - Training Loss: 0.9125387610069343
Epoch 2/1000 - Validation Loss: 0.7762636329446521
Epoch 3/1000 - Training Loss: 0.8117560589952129
Epoch 3/1000 - Validation Loss: 0.7721799526895795
Epoch 4/1000 - Training Loss: 0.7475337338234697
Epoch 4/1000 - Validation Loss: 0.8134214111736843
Epoch 5/1000 - Training Loss: 0.713051511240857
Epoch 5/1000 - Validation Loss: 0.7567971902234214
Epoch 6/1000 - Training Loss: 0.6397800158177104
Epoch 6/1000 - Validation Loss: 0.8132691723959786
Epoch 7/1000 - Training Loss: 0.6170674982879844
Epoch 7/1000 - Validation Loss: 0.7425303374017987
Epoch 8/1000 - Training Loss: 0.5720739439129829
Epoch 8/1000 - Validation Loss: 0.7364918334143502
Epoch 9/1000 - Training Loss: 0.5297591539898089
Epoch 9/1000 - Validation Loss: 0.7455977712358747
Epoch 10/1000 - Training Loss: 0.5120332123977798
Epoch 10/1000 - Validation Loss: 0.7511399814060756


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 1.0607959681323595
Epoch 1/1000 - Validation Loss: 0.8752250713961465
Epoch 2/1000 - Training Loss: 0.9206630523715701
Epoch 2/1000 - Validation Loss: 0.8307085037231445
Epoch 3/1000 - Training Loss: 0.8543457766728741
Epoch 3/1000 - Validation Loss: 0.8235612767083305
Epoch 4/1000 - Training Loss: 0.7836490837591035
Epoch 4/1000 - Validation Loss: 0.7535832950047084
Epoch 5/1000 - Training Loss: 0.7304515253220286
Epoch 5/1000 - Validation Loss: 0.7318317506994519
Epoch 6/1000 - Training Loss: 0.68983219191432
Epoch 6/1000 - Validation Loss: 0.6986076491219657
Epoch 7/1000 - Training Loss: 0.6472510312284742
Epoch 7/1000 - Validation Loss: 0.6933721899986267
Epoch 8/1000 - Training Loss: 0.616870461830071
Epoch 8/1000 - Validation Loss: 0.6696051614625114
Epoch 9/1000 - Training Loss: 0.575433760881424
Epoch 9/1000 - Validation Loss: 0.7069386669567653
Epoch 10/1000 - Training Loss: 0.565249580357756
Epoch 10/1000 - Validation Loss: 0.7072554656437465
Epo

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.decoder.bias', 'predictions.dense.weight', 'predictions.LayerNorm.bias', 'predictions.dense.bias', 'predictions.decoder.weight', 'predictions.LayerNorm.weight', 'predictions.bias']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 2.734238574237076e+18
Epoch 1/1000 - Validation Loss: 1.464945665798083e+18
Epoch 2/1000 - Training Loss: 1.6499779660801452e+18
Epoch 2/1000 - Validation Loss: 9.56994069363575e+17
Epoch 3/1000 - Training Loss: 1.904047686642228e+18
Epoch 3/1000 - Validation Loss: 7.110598194054337e+17
Epoch 4/1000 - Training Loss: 1.8446086316648038e+18
Epoch 4/1000 - Validation Loss: 1.2736033069389204e+18
Epoch 5/1000 - Training Loss: 1.6852344242865065e+18
Epoch 5/1000 - Validation Loss: 8.391497285856695e+17
Epoch 6/1000 - Training Loss: 1.8866624940923758e+18
Epoch 6/1000 - Validation Loss: 3.0683391262549233e+18
Epoch 7/1000 - Training Loss: 2.6716165435392563e+18
Epoch 7/1000 - Validation Loss: 6.149556851840123e+17
Epoch 8/1000 - Training Loss: 1.7251687037872e+18
Epoch 8/1000 - Validation Loss: 1.0262230273915617e+18
Epoch 9/1000 - Training Loss: 1.7984167811552005e+18
Epoch 9/1000 - Validation Loss: 1.369980551995027e+18
Epoch 10/1000 - Training Loss: 1.6944444

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.decoder.bias', 'predictions.dense.weight', 'predictions.LayerNorm.bias', 'predictions.dense.bias', 'predictions.decoder.weight', 'predictions.LayerNorm.weight', 'predictions.bias']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 3.0166276537458734e+18
Epoch 1/1000 - Validation Loss: 9.547970782411209e+17
Epoch 2/1000 - Training Loss: 1.9509700564814397e+18
Epoch 2/1000 - Validation Loss: 2.5226010849279063e+18
Epoch 3/1000 - Training Loss: 2.2543075702342382e+18
Epoch 3/1000 - Validation Loss: 1.3333023755550438e+18
Epoch 4/1000 - Training Loss: 1.8021328218330048e+18
Epoch 4/1000 - Validation Loss: 6.166886922166131e+17
Epoch 5/1000 - Training Loss: 2.1705161826053466e+18
Epoch 5/1000 - Validation Loss: 1.3398958031894328e+18
Epoch 6/1000 - Training Loss: 1.8744101218551258e+18
Epoch 6/1000 - Validation Loss: 6.952012834115795e+17
Epoch 7/1000 - Training Loss: 1.652169521001136e+18
Epoch 7/1000 - Validation Loss: 5.945719520129563e+17
Epoch 8/1000 - Training Loss: 1.816926409641614e+18
Epoch 8/1000 - Validation Loss: 1.1795032388175508e+18
Epoch 9/1000 - Training Loss: 1.9806694755468388e+18
Epoch 9/1000 - Validation Loss: 1.313113143045821e+18
Epoch 10/1000 - Training Loss: 1.44

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.decoder.bias', 'predictions.dense.weight', 'predictions.LayerNorm.bias', 'predictions.dense.bias', 'predictions.decoder.weight', 'predictions.LayerNorm.weight', 'predictions.bias']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/1000 - Training Loss: 2.781031205999177e+18
Epoch 1/1000 - Validation Loss: 9.227327520118632e+17
Epoch 2/1000 - Training Loss: 2.0535024547563156e+18
Epoch 2/1000 - Validation Loss: 1.1592588442026452e+18
Epoch 3/1000 - Training Loss: 2.471496901160375e+18
Epoch 3/1000 - Validation Loss: 6.931527803527496e+17
Epoch 4/1000 - Training Loss: 1.6846566045427397e+18
Epoch 4/1000 - Validation Loss: 1.2006998595874511e+18
Epoch 5/1000 - Training Loss: 1.8077715481841032e+18
Epoch 5/1000 - Validation Loss: 1.0551769235794541e+18
Epoch 6/1000 - Training Loss: 1.639883783003664e+18
Epoch 6/1000 - Validation Loss: 1.6847118286186647e+18
Epoch 7/1000 - Training Loss: 1.6510504764756646e+18
Epoch 7/1000 - Validation Loss: 1.8483721709995013e+18
Epoch 8/1000 - Training Loss: 1.5654443234302922e+18
Epoch 8/1000 - Validation Loss: 6.568443294232084e+17
Epoch 9/1000 - Training Loss: 1.8730843942998694e+18
Epoch 9/1000 - Validation Loss: 1.2238850747580698e+18
Epoch 10/1000 - Training Loss: 1.8

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.decoder.bias', 'predictions.dense.weight', 'predictions.LayerNorm.bias', 'predictions.dense.bias', 'predictions.decoder.weight', 'predictions.LayerNorm.weight', 'predictions.bias']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


KeyboardInterrupt: 